svga: update driver for version 10 GPU interface

This is a squash commit of roughly two years of development work. Authors include: Brian Paul Charmaine Lee Thomas Hellstrom Jakob Bornecrantz Sinclair Yeh Mingcheng Chen Kai Ninomiya MengLin Wu The driver supports OpenGL 3.3. Signed-off-by: Brian Paul <brianp@vmware.com>
2026-04-25 13:30:38 +02:00 · 2015-08-13 11:00:58 -07:00 · 2015-08-13 11:00:58 -07:00 · e054251ed1
commit e054251ed1
parent 656dac120d
64 changed files with 8722 additions and 2436 deletions
--- a/src/gallium/drivers/svga/Makefile.sources
+++ b/src/gallium/drivers/svga/Makefile.sources
@ -1,6 +1,7 @@
 C_SOURCES := \
 	svga_cmd.c \
 	svga_cmd.h \
+	svga_cmd_vgpu10.c \
 	svga_context.c \
 	svga_context.h \
 	svga_debug.h \
@ -12,6 +13,7 @@ C_SOURCES := \
 	svga_format.c \
 	svga_format.h \
 	svga_hw_reg.h \
+	svga_link.c \
 	svga_pipe_blend.c \
 	svga_pipe_blit.c \
 	svga_pipe_clear.c \
@ -20,10 +22,12 @@ C_SOURCES := \
 	svga_pipe_draw.c \
 	svga_pipe_flush.c \
 	svga_pipe_fs.c \
+	svga_pipe_gs.c \
 	svga_pipe_misc.c \
 	svga_pipe_query.c \
 	svga_pipe_rasterizer.c \
 	svga_pipe_sampler.c \
+        svga_pipe_streamout.c \
 	svga_pipe_vertex.c \
 	svga_pipe_vs.c \
 	svga_public.h \
@ -44,14 +48,17 @@ C_SOURCES := \
 	svga_shader.c \
 	svga_shader.h \
 	svga_state.c \
+	svga_state.h \
 	svga_state_constants.c \
 	svga_state_framebuffer.c \
-	svga_state_fs.c \
-	svga_state.h \
 	svga_state_need_swtnl.c \
 	svga_state_rss.c \
 	svga_state_tss.c \
 	svga_state_vdecl.c \
+	svga_state_sampler.c \
+	svga_state_fs.c \
+	svga_state_gs.c \
+	svga_state_tgsi_transform.c \
 	svga_state_vs.c \
 	svga_surface.c \
 	svga_surface.h \
@ -65,6 +72,7 @@ C_SOURCES := \
 	svga_tgsi_emit.h \
 	svga_tgsi.h \
 	svga_tgsi_insn.c \
+	svga_tgsi_vgpu10.c \
 	svga_winsys.h \
 	\
 	svgadump/svga_dump.c \
@ -74,20 +82,3 @@ C_SOURCES := \
 	svgadump/svga_shader.h \
 	svgadump/svga_shader_op.c \
 	svgadump/svga_shader_op.h
-
-SVGA_H_FILES := \
-	include/includeCheck.h \
-	include/svga3d_caps.h \
-	include/svga3d_cmd.h \
-	include/svga3d_devcaps.h \
-	include/svga3d_limits.h \
-	include/svga3d_reg.h \
-	include/svga3d_shaderdefs.h \
-	include/svga3d_surfacedefs.h \
-	include/svga3d_types.h \
-	include/svga_escape.h \
-	include/svga_overlay.h \
-	include/svga_reg.h \
-	include/svga_types.h \
-	include/vmware_pack_begin.h \
-	include/vmware_pack_end.h
--- a/src/gallium/drivers/svga/svga_cmd.c
+++ b/src/gallium/drivers/svga/svga_cmd.c
@ -66,7 +66,7 @@ surface_to_surfaceid(struct svga_winsys_context *swc, // IN
   if (surface) {
      struct svga_surface *s = svga_surface(surface);
      swc->surface_relocation(swc, &id->sid, NULL, s->handle, flags);
-      id->face = s->real_face; /* faces have the same order */
+      id->face = s->real_layer; /* faces have the same order */
      id->mipmap = s->real_level;
   }
   else {
@ -460,7 +460,7 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,

   swc->surface_relocation(swc, &cmd->host.sid, NULL,
                           texture->handle, surface_flags);
-   cmd->host.face = st->face; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */
+   cmd->host.face = st->slice; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */
   cmd->host.mipmap = st->base.level;

   cmd->transfer = transfer;
@ -842,6 +842,8 @@ SVGA3D_SetShader(struct svga_winsys_context *swc,
 {
   SVGA3dCmdSetShader *cmd;

+   assert(type == SVGA3D_SHADERTYPE_VS || type == SVGA3D_SHADERTYPE_PS);
+
   cmd = SVGA3D_FIFOReserve(swc,
                            SVGA_3D_CMD_SET_SHADER, sizeof *cmd,
                            0);
@ -1385,7 +1387,7 @@ SVGA3D_BeginGBQuery(struct svga_winsys_context *swc,
   if(!cmd)
      return PIPE_ERROR_OUT_OF_MEMORY;

-   swc->context_relocation(swc, &cmd->cid);
+   cmd->cid = swc->cid;
   cmd->type = type;

   swc->commit(swc);
@ -1465,7 +1467,7 @@ SVGA3D_EndGBQuery(struct svga_winsys_context *swc,
   if(!cmd)
      return PIPE_ERROR_OUT_OF_MEMORY;

-   swc->context_relocation(swc, &cmd->cid);
+   cmd->cid = swc->cid;
   cmd->type = type;

   swc->mob_relocation(swc, &cmd->mobid, &cmd->offset, buffer,
@ -1552,7 +1554,7 @@ SVGA3D_WaitForGBQuery(struct svga_winsys_context *swc,
   if(!cmd)
      return PIPE_ERROR_OUT_OF_MEMORY;

-   swc->context_relocation(swc, &cmd->cid);
+   cmd->cid = swc->cid;
   cmd->type = type;

   swc->mob_relocation(swc, &cmd->mobid, &cmd->offset, buffer,
@ -1642,6 +1644,8 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc,
                   struct svga_winsys_gb_shader *gbshader)
 {
   SVGA3dCmdSetShader *cmd;
+
+   assert(type == SVGA3D_SHADERTYPE_VS || type == SVGA3D_SHADERTYPE_PS);
   
   cmd = SVGA3D_FIFOReserve(swc,
                            SVGA_3D_CMD_SET_SHADER,
@ -1650,7 +1654,7 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc,
   if (!cmd)
      return PIPE_ERROR_OUT_OF_MEMORY;
   
-   swc->context_relocation(swc, &cmd->cid);
+   cmd->cid = swc->cid;
   cmd->type = type;
   if (gbshader)
      swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader, 0);
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@ -30,6 +30,7 @@
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
 #include "util/u_bitmask.h"
+#include "util/u_upload_mgr.h"

 #include "svga_context.h"
 #include "svga_screen.h"
@ -42,6 +43,10 @@
 #include "svga_draw.h"
 #include "svga_debug.h"
 #include "svga_state.h"
+#include "svga_winsys.h"
+
+#define CONST0_UPLOAD_DEFAULT_SIZE 65536
+#define CONST0_UPLOAD_ALIGNMENT 256

 DEBUG_GET_ONCE_BOOL_OPTION(no_swtnl, "SVGA_NO_SWTNL", FALSE)
 DEBUG_GET_ONCE_BOOL_OPTION(force_swtnl, "SVGA_FORCE_SWTNL", FALSE);
@ -53,27 +58,67 @@ DEBUG_GET_ONCE_BOOL_OPTION(force_hw_line_stipple, "SVGA_FORCE_HW_LINE_STIPPLE",
 static void svga_destroy( struct pipe_context *pipe )
 {
   struct svga_context *svga = svga_context( pipe );
-   struct svga_winsys_screen *sws = svga_screen(pipe->screen)->sws;
-   unsigned shader;
+   unsigned shader, i;
+
+   /* free any alternate rasterizer states used for point sprite */
+   for (i = 0; i < Elements(svga->rasterizer_no_cull); i++) {
+      if (svga->rasterizer_no_cull[i]) {
+         pipe->delete_rasterizer_state(pipe, svga->rasterizer_no_cull[i]);
+      }
+   }
+
+   /* free polygon stipple state */
+   if (svga->polygon_stipple.sampler) {
+      pipe->delete_sampler_state(pipe, svga->polygon_stipple.sampler);
+   }
+   if (svga->polygon_stipple.sampler_view) {
+      pipe->sampler_view_destroy(pipe,
+                                 &svga->polygon_stipple.sampler_view->base);
+   }
+   pipe_resource_reference(&svga->polygon_stipple.texture, NULL);
+
+   /* free HW constant buffers */
+   for (shader = 0; shader < Elements(svga->state.hw_draw.constbuf); shader++) {
+      pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], NULL);
+   }
+
+   pipe->delete_blend_state(pipe, svga->noop_blend);
+
+   /* free query gb object */
+   if (svga->gb_query) {
+      pipe->destroy_query(pipe, NULL);
+      svga->gb_query = NULL;
+   }

   util_blitter_destroy(svga->blitter);

   svga_cleanup_framebuffer( svga );
   svga_cleanup_tss_binding( svga );

-   svga_hwtnl_destroy( svga->hwtnl );
-
   svga_cleanup_vertex_state(svga);
   
-   svga->swc->destroy(svga->swc);
-   
   svga_destroy_swtnl( svga );
+   svga_hwtnl_destroy( svga->hwtnl );

-   util_bitmask_destroy( svga->shader_id_bm );
+   svga->swc->destroy(svga->swc);

+   util_bitmask_destroy(svga->blend_object_id_bm);
+   util_bitmask_destroy(svga->ds_object_id_bm);
+   util_bitmask_destroy(svga->input_element_object_id_bm);
+   util_bitmask_destroy(svga->rast_object_id_bm);
+   util_bitmask_destroy(svga->sampler_object_id_bm);
+   util_bitmask_destroy(svga->sampler_view_id_bm);
+   util_bitmask_destroy(svga->shader_id_bm);
+   util_bitmask_destroy(svga->surface_view_id_bm);
+   util_bitmask_destroy(svga->stream_output_id_bm);
+   util_bitmask_destroy(svga->query_id_bm);
+   u_upload_destroy(svga->const0_upload);
+
+   /* free user's constant buffers */
   for (shader = 0; shader < PIPE_SHADER_TYPES; ++shader) {
-      pipe_resource_reference( &svga->curr.cbufs[shader].buffer, NULL );
-      sws->surface_reference(sws, &svga->state.hw_draw.hw_cb[shader], NULL);
+      for (i = 0; i < Elements(svga->curr.constbufs[shader]); ++i) {
+         pipe_resource_reference(&svga->curr.constbufs[shader][i].buffer, NULL);
+      }
   }

   FREE( svga );
@ -90,7 +135,7 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,

   svga = CALLOC_STRUCT(svga_context);
   if (svga == NULL)
-      goto no_svga;
+      goto cleanup;

   LIST_INITHEAD(&svga->dirty_buffers);

@ -100,8 +145,8 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,
   svga->pipe.clear = svga_clear;

   svga->swc = svgascreen->sws->context_create(svgascreen->sws);
-   if(!svga->swc)
-      goto no_swc;
+   if (!svga->swc)
+      goto cleanup;

   svga_init_resource_functions(svga);
   svga_init_blend_functions(svga);
@ -114,11 +159,15 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,
   svga_init_sampler_functions(svga);
   svga_init_fs_functions(svga);
   svga_init_vs_functions(svga);
+   svga_init_gs_functions(svga);
   svga_init_vertex_functions(svga);
   svga_init_constbuffer_functions(svga);
   svga_init_query_functions(svga);
   svga_init_surface_functions(svga);
+   svga_init_stream_output_functions(svga);

+   /* init misc state */
+   svga->curr.sample_mask = ~0;

   /* debug */
   svga->debug.no_swtnl = debug_get_option_no_swtnl();
@ -128,21 +177,54 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,
   svga->debug.no_line_width = debug_get_option_no_line_width();
   svga->debug.force_hw_line_stipple = debug_get_option_force_hw_line_stipple();

-   svga->shader_id_bm = util_bitmask_create();
-   if (svga->shader_id_bm == NULL)
-      goto no_shader_bm;
+   if (!(svga->blend_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->ds_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->input_element_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->rast_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->sampler_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->sampler_view_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->shader_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->surface_view_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->stream_output_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->query_id_bm = util_bitmask_create()))
+      goto cleanup;

   svga->hwtnl = svga_hwtnl_create(svga);
   if (svga->hwtnl == NULL)
-      goto no_hwtnl;
+      goto cleanup;

   if (!svga_init_swtnl(svga))
-      goto no_swtnl;
+      goto cleanup;

   ret = svga_emit_initial_state( svga );
   if (ret != PIPE_OK)
-      goto no_state;
-   
+      goto cleanup;
+
+   svga->const0_upload = u_upload_create(&svga->pipe,
+                                         CONST0_UPLOAD_DEFAULT_SIZE,
+                                         CONST0_UPLOAD_ALIGNMENT,
+                                         PIPE_BIND_CONSTANT_BUFFER);
+   if (!svga->const0_upload)
+      goto cleanup;
+
   /* Avoid shortcircuiting state with initial value of zero.
    */
   memset(&svga->state.hw_clear, 0xcd, sizeof(svga->state.hw_clear));
@ -151,24 +233,64 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,

   memset(&svga->state.hw_draw, 0xcd, sizeof(svga->state.hw_draw));
   memset(&svga->state.hw_draw.views, 0x0, sizeof(svga->state.hw_draw.views));
+   memset(&svga->state.hw_draw.num_sampler_views, 0,
+      sizeof(svga->state.hw_draw.num_sampler_views));
   svga->state.hw_draw.num_views = 0;
-   memset(&svga->state.hw_draw.hw_cb, 0x0, sizeof(svga->state.hw_draw.hw_cb));
+
+   /* Initialize the shader pointers */
+   svga->state.hw_draw.vs = NULL;
+   svga->state.hw_draw.gs = NULL;
+   svga->state.hw_draw.fs = NULL;
+   memset(svga->state.hw_draw.constbuf, 0,
+          sizeof(svga->state.hw_draw.constbuf));
+   memset(svga->state.hw_draw.default_constbuf_size, 0,
+          sizeof(svga->state.hw_draw.default_constbuf_size));
+   memset(svga->state.hw_draw.enabled_constbufs, 0,
+          sizeof(svga->state.hw_draw.enabled_constbufs));
+
+   /* Create a no-operation blend state which we will bind whenever the
+    * requested blend state is impossible (e.g. due to having an integer
+    * render target attached).
+    *
+    * XXX: We will probably actually need 16 of these, one for each possible
+    * RGBA color mask (4 bits).  Then, we would bind the one with a color mask
+    * matching the blend state it is replacing.
+    */
+   {
+      struct pipe_blend_state noop_tmpl = {0};
+      unsigned i;
+
+      for (i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
+         // Set the color mask to all-ones.  Later this may change.
+         noop_tmpl.rt[i].colormask = PIPE_MASK_RGBA;
+      }
+      svga->noop_blend = svga->pipe.create_blend_state(&svga->pipe, &noop_tmpl);
+   }

   svga->dirty = ~0;

   return &svga->pipe;

-no_state:
+cleanup:
   svga_destroy_swtnl(svga);
-no_swtnl:
-   svga_hwtnl_destroy( svga->hwtnl );
-no_hwtnl:
-   util_bitmask_destroy( svga->shader_id_bm );
-no_shader_bm:
-   svga->swc->destroy(svga->swc);
-no_swc:
+
+   if (svga->const0_upload)
+      u_upload_destroy(svga->const0_upload);
+   if (svga->hwtnl)
+      svga_hwtnl_destroy(svga->hwtnl);
+   if (svga->swc)
+      svga->swc->destroy(svga->swc);
+   util_bitmask_destroy(svga->blend_object_id_bm);
+   util_bitmask_destroy(svga->ds_object_id_bm);
+   util_bitmask_destroy(svga->input_element_object_id_bm);
+   util_bitmask_destroy(svga->rast_object_id_bm);
+   util_bitmask_destroy(svga->sampler_object_id_bm);
+   util_bitmask_destroy(svga->sampler_view_id_bm);
+   util_bitmask_destroy(svga->shader_id_bm);
+   util_bitmask_destroy(svga->surface_view_id_bm);
+   util_bitmask_destroy(svga->stream_output_id_bm);
+   util_bitmask_destroy(svga->query_id_bm);
   FREE(svga);
-no_svga:
   return NULL;
 }

@ -195,11 +317,19 @@ void svga_context_flush( struct svga_context *svga,
   /* To force the re-emission of rendertargets and texture sampler bindings on
    * the next command buffer.
    */
-   svga->rebind.rendertargets = TRUE;
-   svga->rebind.texture_samplers = TRUE;
+   svga->rebind.flags.rendertargets = TRUE;
+   svga->rebind.flags.texture_samplers = TRUE;
+
   if (svga_have_gb_objects(svga)) {
-      svga->rebind.vs = TRUE;
-      svga->rebind.fs = TRUE;
+
+      svga->rebind.flags.constbufs = TRUE;
+      svga->rebind.flags.vs = TRUE;
+      svga->rebind.flags.fs = TRUE;
+      svga->rebind.flags.gs = TRUE;
+
+      if (svga_need_to_rebind_resources(svga)) {
+         svga->rebind.flags.query = TRUE;
+      }
   }

   if (SVGA_DEBUG & DEBUG_SYNC) {
@ -215,6 +345,26 @@ void svga_context_flush( struct svga_context *svga,
 }


+/**
+ * Flush pending commands and wait for completion with a fence.
+ */
+void
+svga_context_finish(struct svga_context *svga)
+{
+   struct pipe_screen *screen = svga->pipe.screen;
+   struct pipe_fence_handle *fence = NULL;
+
+   svga_context_flush(svga, &fence);
+   svga->pipe.screen->fence_finish(screen, fence, PIPE_TIMEOUT_INFINITE);
+   screen->fence_reference(screen, &fence, NULL);
+}
+
+
+/**
+ * Emit pending drawing commands to the command buffer.
+ * If the command buffer overflows, we flush it and retry.
+ * \sa svga_hwtnl_flush()
+ */
 void svga_hwtnl_flush_retry( struct svga_context *svga )
 {
   enum pipe_error ret = PIPE_OK;
@ -225,7 +375,7 @@ void svga_hwtnl_flush_retry( struct svga_context *svga )
      ret = svga_hwtnl_flush( svga->hwtnl );
   }

-   assert(ret == 0);
+   assert(ret == PIPE_OK);
 }


--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@ -38,7 +38,6 @@

 #include "svga_screen.h"
 #include "svga_state.h"
-#include "svga_tgsi.h"
 #include "svga_winsys.h"
 #include "svga_hw_reg.h"
 #include "svga3d_shaderdefs.h"
@ -48,7 +47,19 @@
 #define SVGA_QUERY_DRAW_CALLS   (PIPE_QUERY_DRIVER_SPECIFIC + 0)
 #define SVGA_QUERY_FALLBACKS    (PIPE_QUERY_DRIVER_SPECIFIC + 1)
 #define SVGA_QUERY_MEMORY_USED  (PIPE_QUERY_DRIVER_SPECIFIC + 2)
+#define SVGA_QUERY_MAX          (PIPE_QUERY_DRIVER_SPECIFIC + 3)

+/**
+ * Maximum supported number of constant buffers per shader
+ */
+#define SVGA_MAX_CONST_BUFS 14
+
+/**
+ * Maximum constant buffer size that can be set in the
+ * DXSetSingleConstantBuffer command is
+ * DX10 constant buffer element count * 4 4-bytes components
+ */
+#define SVGA_MAX_CONST_BUF_SIZE (4096 * 4 * sizeof(int))

 struct draw_vertex_shader;
 struct draw_fragment_shader;
@ -57,49 +68,16 @@ struct SVGACmdMemory;
 struct util_bitmask;


-struct svga_shader
-{
-   const struct tgsi_token *tokens;
-
-   struct tgsi_shader_info info;
-
-   /** Head of linked list of variants */
-   struct svga_shader_variant *variants;
-
-   unsigned id;  /**< for debugging only */
-};
-
-
-struct svga_fragment_shader
-{
-   struct svga_shader base;
-
-   struct draw_fragment_shader *draw_shader;
-
-   /** Mask of which generic varying variables are read by this shader */
-   unsigned generic_inputs;
-   /** Table mapping original TGSI generic indexes to low integers */
-   int8_t generic_remap_table[MAX_GENERIC_VARYING];
-};
-
-
-struct svga_vertex_shader
-{
-   struct svga_shader base;
-
-   struct draw_vertex_shader *draw_shader;
-};
-
-
 struct svga_cache_context;
 struct svga_tracked_state;

 struct svga_blend_state {
+   unsigned need_white_fragments:1;
+   unsigned independent_blend_enable:1;
+   unsigned alpha_to_coverage:1;
+   unsigned blend_color_alpha:1;  /**< set blend color to alpha value */

-   boolean need_white_fragments;
-
-   /* Should be per-render-target:
-    */
+   /** Per-render target state */
   struct {
      uint8_t writemask;

@ -112,8 +90,9 @@ struct svga_blend_state {
      uint8_t srcblend_alpha;
      uint8_t dstblend_alpha;
      uint8_t blendeq_alpha;
+   } rt[PIPE_MAX_COLOR_BUFS];

-   } rt[1];
+   SVGA3dBlendStateId id;  /**< vgpu10 */
 };

 struct svga_depth_stencil_state {
@ -139,6 +118,8 @@ struct svga_depth_stencil_state {
   unsigned stencil_writemask:8;

   float    alpharef;
+
+   SVGA3dDepthStencilStateId id;  /**< vgpu10 */
 };

 #define SVGA_UNFILLED_DISABLE 0
@ -167,11 +148,13 @@ struct svga_rasterizer_state {
   float pointsize;
   float linewidth;
   
-   unsigned hw_unfilled:16;         /* PIPE_POLYGON_MODE_x */
+   unsigned hw_fillmode:2;         /* PIPE_POLYGON_MODE_x */

   /** Which prims do we need help for?  Bitmask of (1 << PIPE_PRIM_x) flags */
   unsigned need_pipeline:16;

+   SVGA3dRasterizerStateId id;    /**< vgpu10 */
+
   /** For debugging: */
   const char* need_pipeline_tris_str;
   const char* need_pipeline_lines_str;
@ -195,15 +178,45 @@ struct svga_sampler_state {
   unsigned min_lod;
   unsigned view_min_lod;
   unsigned view_max_lod;
+
+   SVGA3dSamplerId id;
 };

+
+struct svga_pipe_sampler_view
+{
+   struct pipe_sampler_view base;
+
+   SVGA3dShaderResourceViewId id;
+};
+
+
+static inline struct svga_pipe_sampler_view *
+svga_pipe_sampler_view(struct pipe_sampler_view *v)
+{
+   return (struct svga_pipe_sampler_view *) v;
+}
+
+
 struct svga_velems_state {
   unsigned count;
   struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
   SVGA3dDeclType decl_type[PIPE_MAX_ATTRIBS]; /**< vertex attrib formats */
-   unsigned adjust_attrib_range; /* bitmask of attrs needing range adjustment */
-   unsigned adjust_attrib_w_1;   /* bitmask of attrs needing w = 1 */
+
+   /** Bitmasks indicating which attributes need format conversion */
+   unsigned adjust_attrib_range;     /**< range adjustment */
+   unsigned attrib_is_pure_int;      /**< pure int */
+   unsigned adjust_attrib_w_1;       /**< set w = 1 */
+   unsigned adjust_attrib_itof;      /**< int->float */
+   unsigned adjust_attrib_utof;      /**< uint->float */
+   unsigned attrib_is_bgra;          /**< R / B swizzling */
+   unsigned attrib_puint_to_snorm;   /**< 10_10_10_2 packed uint -> snorm */
+   unsigned attrib_puint_to_uscaled; /**< 10_10_10_2 packed uint -> uscaled */
+   unsigned attrib_puint_to_sscaled; /**< 10_10_10_2 packed uint -> sscaled */
+
   boolean need_swvfetch;
+
+   SVGA3dElementLayoutId id; /**< VGPU10 */
 };

 /* Use to calculate differences between state emitted to hardware and
@ -214,16 +227,22 @@ struct svga_state
   const struct svga_blend_state *blend;
   const struct svga_depth_stencil_state *depth;
   const struct svga_rasterizer_state *rast;
-   const struct svga_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+   const struct svga_sampler_state *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
   const struct svga_velems_state *velems;

-   struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; /* or texture ID's? */
+   struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; /* or texture ID's? */
   struct svga_fragment_shader *fs;
   struct svga_vertex_shader *vs;
+   struct svga_geometry_shader *user_gs; /* user-specified GS */
+   struct svga_geometry_shader *gs;      /* derived GS */

   struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
   struct pipe_index_buffer ib;
-   struct pipe_constant_buffer cbufs[PIPE_SHADER_TYPES];
+   /** Constant buffers for each shader.
+    * The size should probably always match with that of
+    * svga_shader_emitter_v10.num_shader_consts.
+    */
+   struct pipe_constant_buffer constbufs[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS];

   struct pipe_framebuffer_state framebuffer;
   float depthscale;
@ -240,8 +259,8 @@ struct svga_state
   struct pipe_clip_state clip;
   struct pipe_viewport_state viewport;

-   unsigned num_samplers;
-   unsigned num_sampler_views;
+   unsigned num_samplers[PIPE_SHADER_TYPES];
+   unsigned num_sampler_views[PIPE_SHADER_TYPES];
   unsigned num_vertex_buffers;
   unsigned reduced_prim;

@ -249,6 +268,8 @@ struct svga_state
      unsigned flag_1d;
      unsigned flag_srgb;
   } tex_flags;
+
+   unsigned sample_mask;
 };

 struct svga_prescale {
@ -262,9 +283,7 @@ struct svga_prescale {
 */
 struct svga_hw_clear_state
 {
-   struct {
-      unsigned x,y,w,h;
-   } viewport;
+   SVGA3dRect viewport;

   struct {
      float zmin, zmax;
@ -291,16 +310,29 @@ struct svga_hw_draw_state
   unsigned ts[SVGA3D_PIXEL_SAMPLERREG_MAX][SVGA3D_TS_MAX];
   float cb[PIPE_SHADER_TYPES][SVGA3D_CONSTREG_MAX][4];

-   /**
-    * For guest backed shader constants only.
-    */
-   struct svga_winsys_surface *hw_cb[PIPE_SHADER_TYPES];
-
   struct svga_shader_variant *fs;
   struct svga_shader_variant *vs;
+   struct svga_shader_variant *gs;
   struct svga_hw_view_state views[PIPE_MAX_SAMPLERS];
-
   unsigned num_views;
+   struct pipe_resource *constbuf[PIPE_SHADER_TYPES];
+
+   /* Bitmask of enabled constant bufffers */
+   unsigned enabled_constbufs[PIPE_SHADER_TYPES];
+
+   /* VGPU10 HW state (used to prevent emitting redundant state) */
+   SVGA3dDepthStencilStateId depth_stencil_id;
+   unsigned stencil_ref;
+   SVGA3dBlendStateId blend_id;
+   float blend_factor[4];
+   unsigned blend_sample_mask;
+   SVGA3dRasterizerStateId rasterizer_id;
+   SVGA3dElementLayoutId layout_id;
+   SVGA3dPrimitiveType topology;
+
+   /* used for rebinding */
+   unsigned num_sampler_views[PIPE_SHADER_TYPES];
+   unsigned default_constbuf_size[PIPE_SHADER_TYPES];
 };


@ -326,12 +358,14 @@ struct svga_sw_state
 struct svga_hw_queue;

 struct svga_query;
+struct svga_qmem_alloc_entry;

 struct svga_context
 {
   struct pipe_context pipe;
   struct svga_winsys_context *swc;
   struct blitter_context *blitter;
+   struct u_upload_mgr *const0_upload;

   struct {
      boolean no_swtnl;
@ -355,12 +389,42 @@ struct svga_context
      boolean new_vdecl;
   } swtnl;

+   /* Bitmask of blend state objects IDs */
+   struct util_bitmask *blend_object_id_bm;
+
+   /* Bitmask of depth/stencil state objects IDs */
+   struct util_bitmask *ds_object_id_bm;
+
+   /* Bitmaks of input element object IDs */
+   struct util_bitmask *input_element_object_id_bm;
+
+   /* Bitmask of rasterizer object IDs */
+   struct util_bitmask *rast_object_id_bm;
+
+   /* Bitmask of sampler state objects IDs */
+   struct util_bitmask *sampler_object_id_bm;
+
+   /* Bitmask of sampler view IDs */
+   struct util_bitmask *sampler_view_id_bm;
+
   /* Bitmask of used shader IDs */
   struct util_bitmask *shader_id_bm;

+   /* Bitmask of used surface view IDs */
+   struct util_bitmask *surface_view_id_bm;
+
+   /* Bitmask of used stream output IDs */
+   struct util_bitmask *stream_output_id_bm;
+
+   /* Bitmask of used query IDs */
+   struct util_bitmask *query_id_bm;
+
   struct {
      unsigned dirty[SVGA_STATE_MAX];

+      /** bitmasks of which const buffers are changed */
+      unsigned dirty_constbufs[PIPE_SHADER_TYPES];
+
      unsigned texture_timestamp;

      /* 
@ -373,17 +437,28 @@ struct svga_context
   struct svga_state curr;      /* state from the state tracker */
   unsigned dirty;              /* statechanges since last update_state() */

-   struct {
-      unsigned rendertargets:1;
-      unsigned texture_samplers:1;
-      unsigned vs:1;
-      unsigned fs:1;
+   union {
+      struct {
+         unsigned rendertargets:1;
+         unsigned texture_samplers:1;
+         unsigned constbufs:1;
+         unsigned vs:1;
+         unsigned fs:1;
+         unsigned gs:1;
+         unsigned query:1;
+      } flags;
+      unsigned val;
   } rebind;

   struct svga_hwtnl *hwtnl;

-   /** The occlusion query currently in progress */
-   struct svga_query *sq;
+   /** Queries states */
+   struct svga_winsys_gb_query *gb_query;     /**< gb query object, one per context */
+   unsigned gb_query_len;                     /**< gb query object size */
+   struct util_bitmask *gb_query_alloc_mask;  /**< gb query object allocation mask */
+   struct svga_qmem_alloc_entry *gb_query_map[SVGA_QUERY_MAX];
+                                              /**< query mem block mapping */
+   struct svga_query *sq[SVGA_QUERY_MAX];     /**< queries currently in progress */

   /** List of buffers with queued transfers */
   struct list_head dirty_buffers;
@ -391,12 +466,32 @@ struct svga_context
   /** performance / info queries */
   uint64_t num_draw_calls;  /**< SVGA_QUERY_DRAW_CALLS */
   uint64_t num_fallbacks;   /**< SVGA_QUERY_FALLBACKS */
+
+   /** The currently bound stream output targets */
+   unsigned num_so_targets;
+   struct svga_winsys_surface *so_surfaces[SVGA3D_DX_MAX_SOTARGETS];
+   struct pipe_stream_output_target *so_targets[SVGA3D_DX_MAX_SOTARGETS];
+   struct svga_stream_output *current_so;
+
+   /** A blend state with blending disabled, for falling back to when blending
+    * is illegal (e.g. an integer texture is bound)
+    */
+   struct svga_blend_state *noop_blend;
+
+   struct {
+      struct pipe_resource *texture;
+      struct svga_pipe_sampler_view *sampler_view;
+      void *sampler;
+   } polygon_stipple;
+
+   /** Alternate rasterizer states created for point sprite */
+   struct svga_rasterizer_state *rasterizer_no_cull[2];
 };

 /* A flag for each state_tracker state object:
 */
 #define SVGA_NEW_BLEND               0x1
-#define SVGA_NEW_DEPTH_STENCIL       0x2
+#define SVGA_NEW_DEPTH_STENCIL_ALPHA 0x2
 #define SVGA_NEW_RAST                0x4
 #define SVGA_NEW_SAMPLER             0x8
 #define SVGA_NEW_TEXTURE             0x10
@ -422,7 +517,9 @@ struct svga_context
 #define SVGA_NEW_VS_VARIANT          0x1000000
 #define SVGA_NEW_TEXTURE_FLAGS       0x4000000
 #define SVGA_NEW_STENCIL_REF         0x8000000
-
+#define SVGA_NEW_GS                  0x10000000
+#define SVGA_NEW_GS_CONST_BUFFER     0x20000000
+#define SVGA_NEW_GS_VARIANT          0x40000000



@ -457,11 +554,13 @@ void svga_init_rasterizer_functions( struct svga_context *svga );
 void svga_init_sampler_functions( struct svga_context *svga );
 void svga_init_fs_functions( struct svga_context *svga );
 void svga_init_vs_functions( struct svga_context *svga );
+void svga_init_gs_functions( struct svga_context *svga );
 void svga_init_vertex_functions( struct svga_context *svga );
 void svga_init_constbuffer_functions( struct svga_context *svga );
 void svga_init_draw_functions( struct svga_context *svga );
 void svga_init_query_functions( struct svga_context *svga );
 void svga_init_surface_functions(struct svga_context *svga);
+void svga_init_stream_output_functions( struct svga_context *svga );

 void svga_cleanup_vertex_state( struct svga_context *svga );
 void svga_cleanup_tss_binding( struct svga_context *svga );
@ -470,6 +569,8 @@ void svga_cleanup_framebuffer( struct svga_context *svga );
 void svga_context_flush( struct svga_context *svga,
                         struct pipe_fence_handle **pfence );

+void svga_context_finish(struct svga_context *svga);
+
 void svga_hwtnl_flush_retry( struct svga_context *svga );
 void svga_hwtnl_flush_buffer( struct svga_context *svga,
                              struct pipe_resource *buffer );
@ -504,5 +605,22 @@ svga_have_gb_dma(const struct svga_context *svga)
   return svga_screen(svga->pipe.screen)->sws->have_gb_dma;
 }

+static inline boolean
+svga_have_vgpu10(const struct svga_context *svga)
+{
+   return svga_screen(svga->pipe.screen)->sws->have_vgpu10;
+}
+
+static inline boolean
+svga_need_to_rebind_resources(const struct svga_context *svga)
+{
+   return svga_screen(svga->pipe.screen)->sws->need_to_rebind_resources;
+}
+
+static inline boolean
+svga_rects_equal(const SVGA3dRect *r1, const SVGA3dRect *r2)
+{
+   return memcmp(r1, r2, sizeof(*r1)) == 0;
+}

 #endif
--- a/src/gallium/drivers/svga/svga_debug.h
+++ b/src/gallium/drivers/svga/svga_debug.h
@ -29,21 +29,22 @@
 #include "pipe/p_compiler.h"
 #include "util/u_debug.h"

-#define DEBUG_DMA      0x1
-#define DEBUG_TGSI     0x4
-#define DEBUG_PIPE     0x8
-#define DEBUG_STATE    0x10
-#define DEBUG_SCREEN   0x20
-#define DEBUG_TEX      0x40
-#define DEBUG_SWTNL    0x80
-#define DEBUG_CONSTS   0x100
-#define DEBUG_VIEWPORT 0x200
-#define DEBUG_VIEWS    0x400
-#define DEBUG_PERF     0x800    /* print something when we hit any slow path operation */
-#define DEBUG_FLUSH    0x1000   /* flush after every draw */
-#define DEBUG_SYNC     0x2000   /* sync after every flush */
-#define DEBUG_QUERY    0x4000
-#define DEBUG_CACHE    0x8000
+#define DEBUG_DMA          0x1
+#define DEBUG_TGSI         0x4
+#define DEBUG_PIPE         0x8
+#define DEBUG_STATE        0x10
+#define DEBUG_SCREEN       0x20
+#define DEBUG_TEX          0x40
+#define DEBUG_SWTNL        0x80
+#define DEBUG_CONSTS       0x100
+#define DEBUG_VIEWPORT     0x200
+#define DEBUG_VIEWS        0x400
+#define DEBUG_PERF         0x800    /* print something when we hit any slow path operation */
+#define DEBUG_FLUSH        0x1000   /* flush after every draw */
+#define DEBUG_SYNC         0x2000   /* sync after every flush */
+#define DEBUG_QUERY        0x4000
+#define DEBUG_CACHE        0x8000
+#define DEBUG_STREAMOUT    0x10000

 #ifdef DEBUG
 extern int SVGA_DEBUG;
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@ -26,17 +26,19 @@
 #include "pipe/p_compiler.h"
 #include "util/u_inlines.h"
 #include "pipe/p_defines.h"
+#include "util/u_helpers.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
-#include "util/u_upload_mgr.h"

 #include "svga_context.h"
 #include "svga_draw.h"
 #include "svga_draw_private.h"
 #include "svga_debug.h"
 #include "svga_screen.h"
+#include "svga_resource.h"
 #include "svga_resource_buffer.h"
 #include "svga_resource_texture.h"
+#include "svga_shader.h"
 #include "svga_surface.h"
 #include "svga_winsys.h"
 #include "svga_cmd.h"
@ -71,8 +73,8 @@ svga_hwtnl_destroy(struct svga_hwtnl *hwtnl)
      }
   }

-   for (i = 0; i < hwtnl->cmd.vdecl_count; i++)
-      pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], NULL);
+   for (i = 0; i < hwtnl->cmd.vbuf_count; i++)
+      pipe_resource_reference(&hwtnl->cmd.vbufs[i].buffer, NULL);

   for (i = 0; i < hwtnl->cmd.prim_count; i++)
      pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
@ -85,45 +87,55 @@ void
 svga_hwtnl_set_flatshade(struct svga_hwtnl *hwtnl,
                         boolean flatshade, boolean flatshade_first)
 {
-   hwtnl->hw_pv = PV_FIRST;
+   struct svga_screen *svgascreen = svga_screen(hwtnl->svga->pipe.screen);
+
+   /* User-specified PV */
   hwtnl->api_pv = (flatshade && !flatshade_first) ? PV_LAST : PV_FIRST;
+
+   /* Device supported PV */
+   if (svgascreen->haveProvokingVertex) {
+      /* use the mode specified by the user */
+      hwtnl->hw_pv = hwtnl->api_pv;
+   }
+   else {
+      /* the device only support first provoking vertex */
+      hwtnl->hw_pv = PV_FIRST;
+   }
 }


 void
-svga_hwtnl_set_unfilled(struct svga_hwtnl *hwtnl, unsigned mode)
+svga_hwtnl_set_fillmode(struct svga_hwtnl *hwtnl, unsigned mode)
 {
   hwtnl->api_fillmode = mode;
 }


 void
-svga_hwtnl_reset_vdecl(struct svga_hwtnl *hwtnl, unsigned count)
+svga_hwtnl_vertex_decls(struct svga_hwtnl *hwtnl,
+                        unsigned count,
+                        const SVGA3dVertexDecl * decls,
+                        const unsigned *buffer_indexes,
+                        SVGA3dElementLayoutId layout_id)
 {
-   unsigned i;
-
   assert(hwtnl->cmd.prim_count == 0);
-
-   for (i = count; i < hwtnl->cmd.vdecl_count; i++) {
-      pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], NULL);
-   }
-
   hwtnl->cmd.vdecl_count = count;
+   hwtnl->cmd.vdecl_layout_id = layout_id;
+   memcpy(hwtnl->cmd.vdecl, decls, count * sizeof(*decls));
+   memcpy(hwtnl->cmd.vdecl_buffer_index, buffer_indexes,
+          count * sizeof(unsigned));
 }


+/**
+ * Specify vertex buffers for hardware drawing.
+ */
 void
-svga_hwtnl_vdecl(struct svga_hwtnl *hwtnl,
-                 unsigned i,
-                 const SVGA3dVertexDecl * decl, struct pipe_resource *vb)
+svga_hwtnl_vertex_buffers(struct svga_hwtnl *hwtnl,
+                          unsigned count, struct pipe_vertex_buffer *buffers)
 {
-   assert(hwtnl->cmd.prim_count == 0);
-
-   assert(i < hwtnl->cmd.vdecl_count);
-
-   hwtnl->cmd.vdecl[i] = *decl;
-
-   pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], vb);
+   util_set_vertex_buffers_count(hwtnl->cmd.vbufs,
+                                 &hwtnl->cmd.vbuf_count, buffers, 0, count);
 }


@ -145,8 +157,8 @@ svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl,
      return FALSE;
   }

-   for (i = 0; i < hwtnl->cmd.vdecl_count; ++i) {
-      if (hwtnl->cmd.vdecl_vb[i] == buffer) {
+   for (i = 0; i < hwtnl->cmd.vbuf_count; ++i) {
+      if (hwtnl->cmd.vbufs[i].buffer == buffer) {
         return TRUE;
      }
   }
@ -161,116 +173,440 @@ svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl,
 }


-enum pipe_error
-svga_hwtnl_flush(struct svga_hwtnl *hwtnl)
+static enum pipe_error
+draw_vgpu9(struct svga_hwtnl *hwtnl)
 {
   struct svga_winsys_context *swc = hwtnl->cmd.swc;
   struct svga_context *svga = hwtnl->svga;
   enum pipe_error ret;
+   struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
+   struct svga_winsys_surface *ib_handle[QSZ];
+   struct svga_winsys_surface *handle;
+   SVGA3dVertexDecl *vdecl;
+   SVGA3dPrimitiveRange *prim;
+   unsigned i;

-   if (hwtnl->cmd.prim_count) {
-      struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
-      struct svga_winsys_surface *ib_handle[QSZ];
-      struct svga_winsys_surface *handle;
-      SVGA3dVertexDecl *vdecl;
-      SVGA3dPrimitiveRange *prim;
-      unsigned i;
+   for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+      unsigned j = hwtnl->cmd.vdecl_buffer_index[i];
+      handle = svga_buffer_handle(svga, hwtnl->cmd.vbufs[j].buffer);
+      if (handle == NULL)
+         return PIPE_ERROR_OUT_OF_MEMORY;

-      for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
-         assert(!svga_buffer_is_user_buffer(hwtnl->cmd.vdecl_vb[i]));
-         handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]);
+      vb_handle[i] = handle;
+   }
+
+   for (i = 0; i < hwtnl->cmd.prim_count; i++) {
+      if (hwtnl->cmd.prim_ib[i]) {
+         handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]);
         if (handle == NULL)
            return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+      else
+         handle = NULL;

-         vb_handle[i] = handle;
+      ib_handle[i] = handle;
+   }
+
+   if (svga->rebind.flags.rendertargets) {
+      ret = svga_reemit_framebuffer_bindings(svga);
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
+
+   if (svga->rebind.flags.texture_samplers) {
+      ret = svga_reemit_tss_bindings(svga);
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
+
+   if (svga->rebind.flags.vs) {
+      ret = svga_reemit_vs_bindings(svga);
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
+
+   if (svga->rebind.flags.fs) {
+      ret = svga_reemit_fs_bindings(svga);
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
+
+   SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
+            svga->curr.framebuffer.cbufs[0] ?
+            svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL,
+            hwtnl->cmd.prim_count);
+
+   ret = SVGA3D_BeginDrawPrimitives(swc,
+                                    &vdecl,
+                                    hwtnl->cmd.vdecl_count,
+                                    &prim, hwtnl->cmd.prim_count);
+   if (ret != PIPE_OK)
+      return ret;
+
+   memcpy(vdecl,
+          hwtnl->cmd.vdecl,
+          hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]);
+
+   for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+      /* check for 4-byte alignment */
+      assert(vdecl[i].array.offset % 4 == 0);
+      assert(vdecl[i].array.stride % 4 == 0);
+
+      /* Given rangeHint is considered to be relative to indexBias, and
+       * indexBias varies per primitive, we cannot accurately supply an
+       * rangeHint when emitting more than one primitive per draw command.
+       */
+      if (hwtnl->cmd.prim_count == 1) {
+         vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0];
+         vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1;
+      }
+      else {
+         vdecl[i].rangeHint.first = 0;
+         vdecl[i].rangeHint.last = 0;
      }

-      for (i = 0; i < hwtnl->cmd.prim_count; i++) {
-         if (hwtnl->cmd.prim_ib[i]) {
-            assert(!svga_buffer_is_user_buffer(hwtnl->cmd.prim_ib[i]));
-            handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]);
-            if (handle == NULL)
-               return PIPE_ERROR_OUT_OF_MEMORY;
+      swc->surface_relocation(swc,
+                              &vdecl[i].array.surfaceId,
+                              NULL, vb_handle[i], SVGA_RELOC_READ);
+   }
+
+   memcpy(prim,
+          hwtnl->cmd.prim, hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]);
+
+   for (i = 0; i < hwtnl->cmd.prim_count; i++) {
+      swc->surface_relocation(swc,
+                              &prim[i].indexArray.surfaceId,
+                              NULL, ib_handle[i], SVGA_RELOC_READ);
+      pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
+   }
+
+   SVGA_FIFOCommitAll(swc);
+
+   hwtnl->cmd.prim_count = 0;
+
+   return PIPE_OK;
+}
+
+
+static SVGA3dSurfaceFormat
+xlate_index_format(unsigned indexWidth)
+{
+   if (indexWidth == 2) {
+      return SVGA3D_R16_UINT;
+   }
+   else if (indexWidth == 4) {
+      return SVGA3D_R32_UINT;
+   }
+   else {
+      assert(!"Bad indexWidth");
+      return SVGA3D_R32_UINT;
+   }
+}
+
+
+static enum pipe_error
+validate_sampler_resources(struct svga_context *svga)
+{
+   unsigned shader;
+
+   assert(svga_have_vgpu10(svga));
+
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      unsigned count = svga->curr.num_sampler_views[shader];
+      unsigned i;
+      struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS];
+      enum pipe_error ret;
+
+      /*
+       * Reference bound sampler resources to ensure pending updates are
+       * noticed by the device.
+       */
+      for (i = 0; i < count; i++) {
+         struct svga_pipe_sampler_view *sv =
+            svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
+
+         if (sv) {
+            if (sv->base.texture->target == PIPE_BUFFER) {
+               surfaces[i] = svga_buffer_handle(svga, sv->base.texture);
+            }
+            else {
+               surfaces[i] = svga_texture(sv->base.texture)->handle;
+            }
         }
         else {
-            handle = NULL;
-         }
-
-         ib_handle[i] = handle;
-      }
-
-      if (svga->rebind.rendertargets) {
-         ret = svga_reemit_framebuffer_bindings(svga);
-         if (ret != PIPE_OK) {
-            return ret;
+            surfaces[i] = NULL;
         }
      }

-      if (svga->rebind.texture_samplers) {
-         ret = svga_reemit_tss_bindings(svga);
-         if (ret != PIPE_OK) {
-            return ret;
+      if (shader == PIPE_SHADER_FRAGMENT &&
+          svga->curr.rast->templ.poly_stipple_enable) {
+         const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+         struct svga_pipe_sampler_view *sv =
+            svga->polygon_stipple.sampler_view;
+
+         assert(sv);
+         surfaces[unit] = svga_texture(sv->base.texture)->handle;
+         count = MAX2(count, unit+1);
+      }
+
+      /* rebind the shader resources if needed */
+      if (svga->rebind.flags.texture_samplers) {
+         for (i = 0; i < count; i++) {
+            if (surfaces[i]) {
+               ret = svga->swc->resource_rebind(svga->swc,
+                                                surfaces[i],
+                                                NULL,
+                                                SVGA_RELOC_READ);
+               if (ret != PIPE_OK)
+                  return ret;
+            }
+         }
+      }
+   }
+   svga->rebind.flags.texture_samplers = FALSE;
+
+   return PIPE_OK;
+}
+
+
+static enum pipe_error
+validate_constant_buffers(struct svga_context *svga)
+{
+   unsigned shader;
+
+   assert(svga_have_vgpu10(svga));
+
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      enum pipe_error ret;
+      struct svga_buffer *buffer;
+      struct svga_winsys_surface *handle;
+      unsigned enabled_constbufs;
+
+      /* Rebind the default constant buffer if needed */
+      if (svga->rebind.flags.constbufs) {
+         buffer = svga_buffer(svga->state.hw_draw.constbuf[shader]);
+         if (buffer) {
+            ret = svga->swc->resource_rebind(svga->swc,
+                                             buffer->handle,
+                                             NULL,
+                                             SVGA_RELOC_READ);
+            if (ret != PIPE_OK)
+               return ret;
         }
      }

-      if (svga->rebind.vs) {
-         ret = svga_reemit_vs_bindings(svga);
-         if (ret != PIPE_OK) {
-            return ret;
+      /*
+       * Reference other bound constant buffers to ensure pending updates are
+       * noticed by the device.
+       */
+      enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] & ~1u;
+      while (enabled_constbufs) {
+         unsigned i = u_bit_scan(&enabled_constbufs);
+         buffer = svga_buffer(svga->curr.constbufs[shader][i].buffer);
+         if (buffer) {
+            handle = svga_buffer_handle(svga, &buffer->b.b);
+
+            if (svga->rebind.flags.constbufs) {
+               ret = svga->swc->resource_rebind(svga->swc,
+                                                handle,
+                                                NULL,
+                                                SVGA_RELOC_READ);
+               if (ret != PIPE_OK)
+                  return ret;
+            }
         }
      }
+   }
+   svga->rebind.flags.constbufs = FALSE;

-      if (svga->rebind.fs) {
-         ret = svga_reemit_fs_bindings(svga);
-         if (ret != PIPE_OK) {
-            return ret;
-         }
-      }
+   return PIPE_OK;
+}

-      SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
-               svga->curr.framebuffer.cbufs[0] ?
-               svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL,
-               hwtnl->cmd.prim_count);

-      ret = SVGA3D_BeginDrawPrimitives(swc, &vdecl, hwtnl->cmd.vdecl_count,
-                                       &prim, hwtnl->cmd.prim_count);
+static enum pipe_error
+draw_vgpu10(struct svga_hwtnl *hwtnl,
+            const SVGA3dPrimitiveRange *range,
+            unsigned vcount,
+            unsigned min_index,
+            unsigned max_index, struct pipe_resource *ib,
+            unsigned start_instance, unsigned instance_count)
+{
+   struct svga_context *svga = hwtnl->svga;
+   struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
+   struct svga_winsys_surface *ib_handle;
+   const unsigned vbuf_count = hwtnl->cmd.vbuf_count;
+   enum pipe_error ret;
+   unsigned i;
+
+   assert(svga_have_vgpu10(svga));
+   assert(hwtnl->cmd.prim_count == 0);
+
+   /* We need to reemit all the current resource bindings along with the Draw
+    * command to be sure that the referenced resources are available for the
+    * Draw command, just in case the surfaces associated with the resources
+    * are paged out.
+    */
+   if (svga->rebind.val) {
+      ret = svga_rebind_framebuffer_bindings(svga);
      if (ret != PIPE_OK)
         return ret;

-      memcpy(vdecl, hwtnl->cmd.vdecl,
-             hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]);
-
-      for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
-         /* Given rangeHint is considered to be relative to indexBias, and 
-          * indexBias varies per primitive, we cannot accurately supply an 
-          * rangeHint when emitting more than one primitive per draw command.
-          */
-         if (hwtnl->cmd.prim_count == 1) {
-            vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0];
-            vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1;
-         }
-         else {
-            vdecl[i].rangeHint.first = 0;
-            vdecl[i].rangeHint.last = 0;
-         }
-
-         swc->surface_relocation(swc, &vdecl[i].array.surfaceId, NULL,
-                                 vb_handle[i], SVGA_RELOC_READ);
-      }
-
-      memcpy(prim, hwtnl->cmd.prim,
-             hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]);
-
-      for (i = 0; i < hwtnl->cmd.prim_count; i++) {
-         swc->surface_relocation(swc, &prim[i].indexArray.surfaceId, NULL,
-                                 ib_handle[i], SVGA_RELOC_READ);
-         pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
-      }
-
-      SVGA_FIFOCommitAll(swc);
-      hwtnl->cmd.prim_count = 0;
+      ret = svga_rebind_shaders(svga);
+      if (ret != PIPE_OK)
+         return ret;
   }

+   ret = validate_sampler_resources(svga);
+   if (ret != PIPE_OK)
+      return ret;
+
+   ret = validate_constant_buffers(svga);
+   if (ret != PIPE_OK)
+      return ret;
+
+   /* Get handle for each referenced vertex buffer */
+   for (i = 0; i < vbuf_count; i++) {
+      struct svga_buffer *sbuf = svga_buffer(hwtnl->cmd.vbufs[i].buffer);
+
+      if (sbuf) {
+         assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_VERTEX_BUFFER);
+         vb_handle[i] = svga_buffer_handle(svga, &sbuf->b.b);
+         if (vb_handle[i] == NULL)
+            return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+      else {
+         vb_handle[i] = NULL;
+      }
+   }
+
+   /* Get handles for the index buffers */
+   if (ib) {
+      struct svga_buffer *sbuf = svga_buffer(ib);
+
+      assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_INDEX_BUFFER);
+      (void) sbuf; /* silence unused var warning */
+
+      ib_handle = svga_buffer_handle(svga, ib);
+      if (ib_handle == NULL)
+         return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+   else {
+      ib_handle = NULL;
+   }
+
+   /* setup vertex attribute input layout */
+   if (svga->state.hw_draw.layout_id != hwtnl->cmd.vdecl_layout_id) {
+      ret = SVGA3D_vgpu10_SetInputLayout(svga->swc,
+                                         hwtnl->cmd.vdecl_layout_id);
+      if (ret != PIPE_OK)
+         return ret;
+
+      svga->state.hw_draw.layout_id = hwtnl->cmd.vdecl_layout_id;
+   }
+
+   /* setup vertex buffers */
+   {
+      SVGA3dVertexBuffer buffers[PIPE_MAX_ATTRIBS];
+
+      for (i = 0; i < vbuf_count; i++) {
+         buffers[i].stride = hwtnl->cmd.vbufs[i].stride;
+         buffers[i].offset = hwtnl->cmd.vbufs[i].buffer_offset;
+      }
+      if (vbuf_count > 0) {
+         ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc, vbuf_count,
+                                              0,    /* startBuffer */
+                                              buffers, vb_handle);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+   }
+
+   /* Set primitive type (line, tri, etc) */
+   if (svga->state.hw_draw.topology != range->primType) {
+      ret = SVGA3D_vgpu10_SetTopology(svga->swc, range->primType);
+      if (ret != PIPE_OK)
+         return ret;
+
+      svga->state.hw_draw.topology = range->primType;
+   }
+
+   if (ib_handle) {
+      /* indexed drawing */
+      SVGA3dSurfaceFormat indexFormat = xlate_index_format(range->indexWidth);
+
+      /* setup index buffer */
+      ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle,
+                                         indexFormat,
+                                         range->indexArray.offset);
+      if (ret != PIPE_OK)
+         return ret;
+
+      if (instance_count > 1) {
+         ret = SVGA3D_vgpu10_DrawIndexedInstanced(svga->swc,
+                                                  vcount,
+                                                  instance_count,
+                                                  0, /* startIndexLocation */
+                                                  range->indexBias,
+                                                  start_instance);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+      else {
+         /* non-instanced drawing */
+         ret = SVGA3D_vgpu10_DrawIndexed(svga->swc,
+                                         vcount,
+                                         0,      /* startIndexLocation */
+                                         range->indexBias);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+   }
+   else {
+      /* non-indexed drawing */
+      if (instance_count > 1) {
+         ret = SVGA3D_vgpu10_DrawInstanced(svga->swc,
+                                           vcount,
+                                           instance_count,
+                                           range->indexBias,
+                                           start_instance);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+      else {
+         /* non-instanced */
+         ret = SVGA3D_vgpu10_Draw(svga->swc,
+                                  vcount,
+                                  range->indexBias);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+   }
+
+   hwtnl->cmd.prim_count = 0;
+
+   return PIPE_OK;
+}
+
+
+
+/**
+ * Emit any pending drawing commands to the command buffer.
+ * When we receive VGPU9 drawing commands we accumulate them and don't
+ * immediately emit them into the command buffer.
+ * This function needs to be called before we change state that could
+ * effect those pending draws.
+ */
+enum pipe_error
+svga_hwtnl_flush(struct svga_hwtnl *hwtnl)
+{
+   if (!svga_have_vgpu10(hwtnl->svga) && hwtnl->cmd.prim_count) {
+      /* we only queue up primitive for VGPU9 */
+      return draw_vgpu9(hwtnl);
+   }
   return PIPE_OK;
 }

@ -298,18 +634,28 @@ check_draw_params(struct svga_hwtnl *hwtnl,
 {
   unsigned i;

+   assert(!svga_have_vgpu10(hwtnl->svga));
+
   for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
-      struct pipe_resource *vb = hwtnl->cmd.vdecl_vb[i];
-      unsigned size = vb ? vb->width0 : 0;
+      unsigned j = hwtnl->cmd.vdecl_buffer_index[i];
+      const struct pipe_vertex_buffer *vb = &hwtnl->cmd.vbufs[j];
+      unsigned size = vb->buffer ? vb->buffer->width0 : 0;
      unsigned offset = hwtnl->cmd.vdecl[i].array.offset;
      unsigned stride = hwtnl->cmd.vdecl[i].array.stride;
      int index_bias = (int) range->indexBias + hwtnl->index_bias;
      unsigned width;

+      if (size == 0)
+         continue;
+
      assert(vb);
      assert(size);
      assert(offset < size);
      assert(min_index <= max_index);
+      (void) width;
+      (void) stride;
+      (void) offset;
+      (void) size;

      switch (hwtnl->cmd.vdecl[i].identity.type) {
      case SVGA3D_DECLTYPE_FLOAT1:
@ -390,6 +736,9 @@ check_draw_params(struct svga_hwtnl *hwtnl,
      assert(size);
      assert(offset < size);
      assert(stride);
+      (void) size;
+      (void) offset;
+      (void) stride;

      switch (range->primType) {
      case SVGA3D_PRIMITIVE_POINTLIST:
@ -421,33 +770,57 @@ check_draw_params(struct svga_hwtnl *hwtnl,
 }


+/**
+ * All drawing filters down into this function, either directly
+ * on the hardware path or after doing software vertex processing.
+ */
 enum pipe_error
 svga_hwtnl_prim(struct svga_hwtnl *hwtnl,
                const SVGA3dPrimitiveRange * range,
+                unsigned vcount,
                unsigned min_index,
-                unsigned max_index, struct pipe_resource *ib)
+                unsigned max_index, struct pipe_resource *ib,
+                unsigned start_instance, unsigned instance_count)
 {
   enum pipe_error ret = PIPE_OK;

+   if (svga_have_vgpu10(hwtnl->svga)) {
+      /* draw immediately */
+      ret = draw_vgpu10(hwtnl, range, vcount, min_index, max_index, ib,
+                        start_instance, instance_count);
+      if (ret != PIPE_OK) {
+         svga_context_flush(hwtnl->svga, NULL);
+         ret = draw_vgpu10(hwtnl, range, vcount, min_index, max_index, ib,
+                           start_instance, instance_count);
+         assert(ret == PIPE_OK);
+      }
+   }
+   else {
+      /* batch up drawing commands */
 #ifdef DEBUG
-   check_draw_params(hwtnl, range, min_index, max_index, ib);
+      check_draw_params(hwtnl, range, min_index, max_index, ib);
+      assert(start_instance == 0);
+      assert(instance_count <= 1);
+#else
+      (void) check_draw_params;
 #endif

-   if (hwtnl->cmd.prim_count + 1 >= QSZ) {
-      ret = svga_hwtnl_flush(hwtnl);
-      if (ret != PIPE_OK)
-         return ret;
+      if (hwtnl->cmd.prim_count + 1 >= QSZ) {
+         ret = svga_hwtnl_flush(hwtnl);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+
+      /* min/max indices are relative to bias */
+      hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index;
+      hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
+
+      hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
+      hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
+
+      pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
+      hwtnl->cmd.prim_count++;
   }

-   /* min/max indices are relative to bias */
-   hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index;
-   hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
-
-   hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
-   hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
-
-   pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
-   hwtnl->cmd.prim_count++;
-
   return ret;
 }
--- a/src/gallium/drivers/svga/svga_draw.h
+++ b/src/gallium/drivers/svga/svga_draw.h
@ -35,54 +35,50 @@ struct svga_winsys_context;
 struct svga_screen;
 struct svga_context;
 struct pipe_resource;
+struct u_upload_mgr;

-struct svga_hwtnl *
-svga_hwtnl_create(struct svga_context *svga);
+struct svga_hwtnl *svga_hwtnl_create(struct svga_context *svga);

-void svga_hwtnl_destroy( struct svga_hwtnl *hwtnl );
+void svga_hwtnl_destroy(struct svga_hwtnl *hwtnl);

-void svga_hwtnl_set_flatshade( struct svga_hwtnl *hwtnl,
-                               boolean flatshade,
-                               boolean flatshade_first );
+void svga_hwtnl_set_flatshade(struct svga_hwtnl *hwtnl,
+                              boolean flatshade, boolean flatshade_first);

-void svga_hwtnl_set_unfilled( struct svga_hwtnl *hwtnl,
-                              unsigned mode );
+void svga_hwtnl_set_fillmode(struct svga_hwtnl *hwtnl, unsigned mode);

-void svga_hwtnl_vdecl( struct svga_hwtnl *hwtnl,
-                       unsigned i,
-                       const SVGA3dVertexDecl *decl,
-                       struct pipe_resource *vb);
+void
+svga_hwtnl_vertex_decls(struct svga_hwtnl *hwtnl,
+                        unsigned count,
+                        const SVGA3dVertexDecl * decls,
+                        const unsigned *buffer_indexes,
+                        SVGA3dElementLayoutId layoutId);

-void svga_hwtnl_reset_vdecl( struct svga_hwtnl *hwtnl,
-                             unsigned count );
-
-
-enum pipe_error 
-svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl,
-                        unsigned prim, 
-                        unsigned start, 
-                        unsigned count);
+void
+svga_hwtnl_vertex_buffers(struct svga_hwtnl *hwtnl,
+                          unsigned count, struct pipe_vertex_buffer *buffers);

 enum pipe_error
-svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl,
-                                struct pipe_resource *indexBuffer,
-                                unsigned index_size,
-                                int index_bias,
-                                unsigned min_index,
-                                unsigned max_index,
-                                unsigned prim, 
-                                unsigned start, 
-                                unsigned count );
+svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
+                       unsigned prim, unsigned start, unsigned count,
+                       unsigned start_instance, unsigned instance_count);
+
+enum pipe_error
+svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
+                               struct pipe_resource *indexBuffer,
+                               unsigned index_size,
+                               int index_bias,
+                               unsigned min_index,
+                               unsigned max_index,
+                               unsigned prim, unsigned start, unsigned count,
+                               unsigned start_instance, unsigned instance_count);

 boolean
-svga_hwtnl_is_buffer_referred( struct svga_hwtnl *hwtnl,
-                               struct pipe_resource *buffer );
+svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl,
+                              struct pipe_resource *buffer);

-enum pipe_error
-svga_hwtnl_flush( struct svga_hwtnl *hwtnl );
+enum pipe_error svga_hwtnl_flush(struct svga_hwtnl *hwtnl);

-void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
-                                int index_bias);
+void svga_hwtnl_set_index_bias(struct svga_hwtnl *hwtnl, int index_bias);


 #endif /* SVGA_DRAW_H_ */
--- a/src/gallium/drivers/svga/svga_draw_arrays.c
+++ b/src/gallium/drivers/svga/svga_draw_arrays.c
@ -49,8 +49,8 @@ generate_indices(struct svga_hwtnl *hwtnl,
   struct pipe_resource *dst = NULL;
   void *dst_map = NULL;

-   dst = pipe_buffer_create(pipe->screen,
-                            PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_DEFAULT, size);
+   dst = pipe_buffer_create(pipe->screen, PIPE_BIND_INDEX_BUFFER,
+                            PIPE_USAGE_IMMUTABLE, size);
   if (dst == NULL)
      goto fail;

@ -168,7 +168,8 @@ retrieve_or_generate_indices(struct svga_hwtnl *hwtnl,

 static enum pipe_error
 simple_draw_arrays(struct svga_hwtnl *hwtnl,
-                   unsigned prim, unsigned start, unsigned count)
+                   unsigned prim, unsigned start, unsigned count,
+                   unsigned start_instance, unsigned instance_count)
 {
   SVGA3dPrimitiveRange range;
   unsigned hw_prim;
@ -191,13 +192,16 @@ simple_draw_arrays(struct svga_hwtnl *hwtnl,
    * looking at those numbers knows to adjust them by
    * range.indexBias.
    */
-   return svga_hwtnl_prim(hwtnl, &range, 0, count - 1, NULL);
+   return svga_hwtnl_prim(hwtnl, &range, count,
+                          0, count - 1, NULL,
+                          start_instance, instance_count);
 }


 enum pipe_error
 svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
-                       unsigned prim, unsigned start, unsigned count)
+                       unsigned prim, unsigned start, unsigned count,
+                       unsigned start_instance, unsigned instance_count)
 {
   unsigned gen_prim, gen_size, gen_nr, gen_type;
   u_generate_func gen_func;
@ -228,7 +232,8 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
   }

   if (gen_type == U_GENERATE_LINEAR) {
-      return simple_draw_arrays(hwtnl, gen_prim, start, count);
+      return simple_draw_arrays(hwtnl, gen_prim, start, count,
+                                start_instance, instance_count);
   }
   else {
      struct pipe_resource *gen_buf = NULL;
@ -250,8 +255,9 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
                                                  start,
                                                  0,
                                                  count - 1,
-                                                  gen_prim, 0, gen_nr);
-
+                                                  gen_prim, 0, gen_nr,
+                                                  start_instance,
+                                                  instance_count);
      if (ret != PIPE_OK)
         goto done;

--- a/src/gallium/drivers/svga/svga_draw_elements.c
+++ b/src/gallium/drivers/svga/svga_draw_elements.c
@ -25,6 +25,7 @@

 #include "util/u_inlines.h"
 #include "util/u_prim.h"
+#include "util/u_upload_mgr.h"
 #include "indices/u_indices.h"

 #include "svga_cmd.h"
@ -45,7 +46,7 @@ translate_indices(struct svga_hwtnl *hwtnl, struct pipe_resource *src,
   struct pipe_context *pipe = &hwtnl->svga->pipe;
   struct pipe_transfer *src_transfer = NULL;
   struct pipe_transfer *dst_transfer = NULL;
-   unsigned size;
+   unsigned size = index_size * nr;
   const void *src_map = NULL;
   struct pipe_resource *dst = NULL;
   void *dst_map = NULL;
@ -98,7 +99,9 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
                                      unsigned index_size, int index_bias,
                                      unsigned min_index, unsigned max_index,
                                      unsigned prim, unsigned start,
-                                      unsigned count)
+                                      unsigned count,
+                                      unsigned start_instance,
+                                      unsigned instance_count)
 {
   SVGA3dPrimitiveRange range;
   unsigned hw_prim;
@ -109,12 +112,6 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
   if (hw_count == 0)
      return PIPE_OK; /* nothing to draw */

-   /* We should never see user-space buffers in the driver.  The vbuf
-    * module should have converted them into real buffers.
-    */
-   if (index_buffer)
-      assert(!svga_buffer_is_user_buffer(index_buffer));
-
   range.primType = hw_prim;
   range.primitiveCount = hw_count;
   range.indexArray.offset = index_offset;
@ -122,7 +119,9 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
   range.indexWidth = index_size;
   range.indexBias = index_bias;

-   return svga_hwtnl_prim(hwtnl, &range, min_index, max_index, index_buffer);
+   return svga_hwtnl_prim(hwtnl, &range, count,
+                          min_index, max_index, index_buffer,
+                          start_instance, instance_count);
 }


@ -131,7 +130,8 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
                               struct pipe_resource *index_buffer,
                               unsigned index_size, int index_bias,
                               unsigned min_index, unsigned max_index,
-                               unsigned prim, unsigned start, unsigned count)
+                               unsigned prim, unsigned start, unsigned count,
+                               unsigned start_instance, unsigned instance_count)
 {
   unsigned gen_prim, gen_size, gen_nr, gen_type;
   u_translate_func gen_func;
@ -165,7 +165,9 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
                                                   index_bias,
                                                   min_index,
                                                   max_index,
-                                                   gen_prim, start, count);
+                                                   gen_prim, start, count,
+                                                   start_instance,
+                                                   instance_count);
   }
   else {
      struct pipe_resource *gen_buf = NULL;
@ -190,7 +192,9 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
                                                  index_bias,
                                                  min_index,
                                                  max_index,
-                                                  gen_prim, 0, gen_nr);
+                                                  gen_prim, 0, gen_nr,
+                                                  start_instance,
+                                                  instance_count);
      if (ret != PIPE_OK)
         goto done;

--- a/src/gallium/drivers/svga/svga_draw_private.h
+++ b/src/gallium/drivers/svga/svga_draw_private.h
@ -46,7 +46,11 @@ static const unsigned svga_hw_prims =
    (1 << PIPE_PRIM_LINE_STRIP) |
    (1 << PIPE_PRIM_TRIANGLES) |
    (1 << PIPE_PRIM_TRIANGLE_STRIP) |
-    (1 << PIPE_PRIM_TRIANGLE_FAN));
+    (1 << PIPE_PRIM_TRIANGLE_FAN) |
+    (1 << PIPE_PRIM_LINES_ADJACENCY) |
+    (1 << PIPE_PRIM_LINE_STRIP_ADJACENCY) |
+    (1 << PIPE_PRIM_TRIANGLES_ADJACENCY) |
+    (1 << PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY));


 /**
@ -57,8 +61,8 @@ static const unsigned svga_hw_prims =
 * PIPE_PRIM_QUADS, PIPE_PRIM_QUAD_STRIP or PIPE_PRIM_POLYGON.  We convert
 * those to other types of primitives with index/translation code.
 */
-static inline unsigned
-svga_translate_prim(unsigned mode, unsigned vcount,unsigned *prim_count)
+static inline SVGA3dPrimitiveType
+svga_translate_prim(unsigned mode, unsigned vcount, unsigned *prim_count)
 {
   switch (mode) {
   case PIPE_PRIM_POINTS:
@ -85,6 +89,22 @@ svga_translate_prim(unsigned mode, unsigned vcount,unsigned *prim_count)
      *prim_count = vcount - 2;
      return SVGA3D_PRIMITIVE_TRIANGLEFAN; 

+   case PIPE_PRIM_LINES_ADJACENCY:
+      *prim_count = vcount / 4;
+      return SVGA3D_PRIMITIVE_LINELIST_ADJ;
+
+   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+      *prim_count = vcount - 3;
+      return SVGA3D_PRIMITIVE_LINESTRIP_ADJ;
+
+   case PIPE_PRIM_TRIANGLES_ADJACENCY:
+      *prim_count = vcount / 6;
+      return SVGA3D_PRIMITIVE_TRIANGLELIST_ADJ;
+
+   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+      *prim_count = vcount / 2 - 2 ;
+      return SVGA3D_PRIMITIVE_TRIANGLESTRIP_ADJ;
+
   default:
      assert(0);
      *prim_count = 0;
@ -110,13 +130,19 @@ struct index_cache {
 struct draw_cmd {
   struct svga_winsys_context *swc;

+   /* vertex layout info */
   SVGA3dVertexDecl vdecl[SVGA3D_INPUTREG_MAX];
-   struct pipe_resource *vdecl_vb[SVGA3D_INPUTREG_MAX];
   unsigned vdecl_count;
+   SVGA3dElementLayoutId vdecl_layout_id;
+   unsigned vdecl_buffer_index[SVGA3D_INPUTREG_MAX];
+
+   /* vertex buffer info */
+   struct pipe_vertex_buffer vbufs[SVGA3D_INPUTREG_MAX];
+   unsigned vbuf_count;

   SVGA3dPrimitiveRange prim[QSZ];
   struct pipe_resource *prim_ib[QSZ];
-   unsigned prim_count;
+   unsigned prim_count;   /**< number of primitives for this draw */
   unsigned min_index[QSZ];
   unsigned max_index[QSZ];
 };
@ -158,9 +184,11 @@ struct svga_hwtnl {
 enum pipe_error 
 svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
                 const SVGA3dPrimitiveRange *range,
+                 unsigned vcount,
                 unsigned min_index,
                 unsigned max_index,
-                 struct pipe_resource *ib );
+                 struct pipe_resource *ib,
+                 unsigned start_instance, unsigned instance_count);

 enum pipe_error
 svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
@ -171,7 +199,9 @@ svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
                                       unsigned max_index,
                                       unsigned prim, 
                                       unsigned start,
-                                       unsigned count );
+                                       unsigned count,
+                                       unsigned start_instance,
+                                       unsigned instance_count);


 #endif
--- a/src/gallium/drivers/svga/svga_format.c
+++ b/src/gallium/drivers/svga/svga_format.c
--- a/src/gallium/drivers/svga/svga_format.h
+++ b/src/gallium/drivers/svga/svga_format.h
@ -28,6 +28,7 @@


 #include "pipe/p_format.h"
+#include "svga_context.h"
 #include "svga_types.h"
 #include "svga_reg.h"
 #include "svga3d_reg.h"
@ -36,6 +37,27 @@
 struct svga_screen;


+/**
+ * Vertex format flags.  These are used to specify that some vertex formats
+ * need extra processing/conversion in the vertex shader.  For example,
+ * setting the W component to 1, or swapping R/B, or converting packed uint
+ * types to signed int/snorm.
+ */
+#define VF_ADJUST_RANGE     (1 << 0)
+#define VF_W_TO_1           (1 << 1)
+#define VF_U_TO_F_CAST      (1 << 2)  /* convert uint to float */
+#define VF_I_TO_F_CAST      (1 << 3)  /* convert sint to float */
+#define VF_BGRA             (1 << 4)  /* swap R/B */
+#define VF_PUINT_TO_SNORM   (1 << 5)  /* 10_10_10_2 to snorm */
+#define VF_PUINT_TO_USCALED (1 << 6)  /* 10_10_10_2 to uscaled */
+#define VF_PUINT_TO_SSCALED (1 << 7)  /* 10_10_10_2 to sscaled */
+
+
+void
+svga_translate_vertex_format_vgpu10(enum pipe_format format,
+                                    SVGA3dSurfaceFormat *svga_format,
+                                    unsigned *vf_flags);
+
 enum SVGA3dSurfaceFormat
 svga_translate_format(struct svga_screen *ss,
                      enum pipe_format format,
@ -52,5 +74,23 @@ svga_format_size(SVGA3dSurfaceFormat format,
                 unsigned *block_height,
                 unsigned *bytes_per_block);

+const char *
+svga_format_name(SVGA3dSurfaceFormat format);
+
+boolean
+svga_format_is_integer(SVGA3dSurfaceFormat format);
+
+enum tgsi_return_type
+svga_get_texture_datatype(enum pipe_format format);
+
+
+// XXX: Move this to svga_context?
+boolean
+svga_has_any_integer_cbufs(const struct svga_context *svga);
+
+
+SVGA3dSurfaceFormat
+svga_typeless_format(SVGA3dSurfaceFormat format);
+

 #endif /* SVGA_FORMAT_H_ */
--- a/src/gallium/drivers/svga/svga_pipe_blend.c
+++ b/src/gallium/drivers/svga/svga_pipe_blend.c
@ -27,14 +27,15 @@
 #include "pipe/p_defines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/u_bitmask.h"

 #include "svga_context.h"
-
 #include "svga_hw_reg.h"
+#include "svga_cmd.h"


 static inline unsigned
-svga_translate_blend_factor(unsigned factor)
+svga_translate_blend_factor(const struct svga_context *svga, unsigned factor)
 {
   switch (factor) {
   case PIPE_BLENDFACTOR_ZERO:            return SVGA3D_BLENDOP_ZERO;
@ -50,8 +51,21 @@ svga_translate_blend_factor(unsigned factor)
   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return SVGA3D_BLENDOP_SRCALPHASAT;
   case PIPE_BLENDFACTOR_CONST_COLOR:     return SVGA3D_BLENDOP_BLENDFACTOR;
   case PIPE_BLENDFACTOR_INV_CONST_COLOR: return SVGA3D_BLENDOP_INVBLENDFACTOR;
-   case PIPE_BLENDFACTOR_CONST_ALPHA:     return SVGA3D_BLENDOP_BLENDFACTOR; /* ? */
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return SVGA3D_BLENDOP_INVBLENDFACTOR; /* ? */
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+      if (svga_have_vgpu10(svga))
+         return SVGA3D_BLENDOP_BLENDFACTORALPHA;
+      else
+         return SVGA3D_BLENDOP_BLENDFACTOR; /* as close as we can get */
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+      if (svga_have_vgpu10(svga))
+         return SVGA3D_BLENDOP_INVBLENDFACTORALPHA;
+      else
+         return SVGA3D_BLENDOP_INVBLENDFACTOR; /* as close as we can get */
+   case PIPE_BLENDFACTOR_SRC1_COLOR:      return SVGA3D_BLENDOP_SRC1COLOR;
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:  return SVGA3D_BLENDOP_INVSRC1COLOR;
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:      return SVGA3D_BLENDOP_SRC1ALPHA;
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:  return SVGA3D_BLENDOP_INVSRC1ALPHA;
+   case 0:                                return SVGA3D_BLENDOP_ONE;
   default:
      assert(0);
      return SVGA3D_BLENDOP_ZERO;
@ -74,18 +88,64 @@ svga_translate_blend_func(unsigned mode)
 }


+/**
+ * Define a vgpu10 blend state object for the given
+ * svga blend state.
+ */
+static void
+define_blend_state_object(struct svga_context *svga,
+                          struct svga_blend_state *bs)
+{
+   SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS];
+   unsigned try;
+   int i;
+
+   assert(svga_have_vgpu10(svga));
+
+   bs->id = util_bitmask_add(svga->blend_object_id_bm);
+
+   for (i = 0; i < SVGA3D_DX_MAX_RENDER_TARGETS; i++) {
+      perRT[i].blendEnable = bs->rt[i].blend_enable;
+      perRT[i].srcBlend = bs->rt[i].srcblend;
+      perRT[i].destBlend = bs->rt[i].dstblend;
+      perRT[i].blendOp = bs->rt[i].blendeq;
+      perRT[i].srcBlendAlpha = bs->rt[i].srcblend_alpha;
+      perRT[i].destBlendAlpha = bs->rt[i].dstblend_alpha;
+      perRT[i].blendOpAlpha = bs->rt[i].blendeq_alpha;
+      perRT[i].renderTargetWriteMask = bs->rt[i].writemask;
+      perRT[i].logicOpEnable = 0;
+      perRT[i].logicOp = SVGA3D_LOGICOP_COPY;
+      assert(perRT[i].srcBlend == perRT[0].srcBlend);
+   }
+
+   /* Loop in case command buffer is full and we need to flush and retry */
+   for (try = 0; try < 2; try++) {
+      enum pipe_error ret;
+
+      ret = SVGA3D_vgpu10_DefineBlendState(svga->swc,
+                                           bs->id,
+                                           bs->alpha_to_coverage,
+                                           bs->independent_blend_enable,
+                                           perRT);
+      if (ret == PIPE_OK)
+         return;
+      svga_context_flush(svga, NULL);
+   }
+}
+
+
 static void *
 svga_create_blend_state(struct pipe_context *pipe,
                        const struct pipe_blend_state *templ)
 {
+   struct svga_context *svga = svga_context(pipe);
   struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state );
   unsigned i;

- 
   /* Fill in the per-rendertarget blend state.  We currently only
-    * have one rendertarget.
+    * support independent blend enable and colormask per render target.
    */
-   for (i = 0; i < 1; i++) {
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
      /* No way to set this in SVGA3D, and no way to correctly implement it on
       * top of D3D9 API.  Instead we try to simulate with various blend modes.
       */
@ -107,6 +167,9 @@ svga_create_blend_state(struct pipe_context *pipe,
            break;
         case PIPE_LOGICOP_COPY:
            blend->rt[i].blend_enable = FALSE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_ADD;
            break;
         case PIPE_LOGICOP_COPY_INVERTED:
            blend->rt[i].blend_enable   = TRUE;
@ -169,38 +232,99 @@ svga_create_blend_state(struct pipe_context *pipe,
         case PIPE_LOGICOP_EQUIV:
            /* Fill these in with plausible values */
            blend->rt[i].blend_enable = FALSE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_ADD;
            break;
         default:
            assert(0);
            break;
         }
+         blend->rt[i].srcblend_alpha = blend->rt[i].srcblend;
+         blend->rt[i].dstblend_alpha = blend->rt[i].dstblend;
+         blend->rt[i].blendeq_alpha = blend->rt[i].blendeq;
      }
      else {
-         blend->rt[i].blend_enable   = templ->rt[0].blend_enable;
-
-         if (templ->rt[0].blend_enable) {
-            blend->rt[i].srcblend       = svga_translate_blend_factor(templ->rt[0].rgb_src_factor);
-            blend->rt[i].dstblend       = svga_translate_blend_factor(templ->rt[0].rgb_dst_factor);
-            blend->rt[i].blendeq        = svga_translate_blend_func(templ->rt[0].rgb_func);
-            blend->rt[i].srcblend_alpha = svga_translate_blend_factor(templ->rt[0].alpha_src_factor);
-            blend->rt[i].dstblend_alpha = svga_translate_blend_factor(templ->rt[0].alpha_dst_factor);
-            blend->rt[i].blendeq_alpha  = svga_translate_blend_func(templ->rt[0].alpha_func);
+         /* Note: the vgpu10 device does not yet support independent
+          * blend terms per render target.  Target[0] always specifies the
+          * blending terms.
+          */
+         if (templ->independent_blend_enable || templ->rt[0].blend_enable) {
+            /* always use the 0th target's blending terms for now */
+            blend->rt[i].srcblend =
+               svga_translate_blend_factor(svga, templ->rt[0].rgb_src_factor);
+            blend->rt[i].dstblend =
+               svga_translate_blend_factor(svga, templ->rt[0].rgb_dst_factor);
+            blend->rt[i].blendeq =
+               svga_translate_blend_func(templ->rt[0].rgb_func);
+            blend->rt[i].srcblend_alpha =
+               svga_translate_blend_factor(svga, templ->rt[0].alpha_src_factor);
+            blend->rt[i].dstblend_alpha =
+               svga_translate_blend_factor(svga, templ->rt[0].alpha_dst_factor);
+            blend->rt[i].blendeq_alpha =
+               svga_translate_blend_func(templ->rt[0].alpha_func);

            if (blend->rt[i].srcblend_alpha != blend->rt[i].srcblend ||
                blend->rt[i].dstblend_alpha != blend->rt[i].dstblend ||
-                blend->rt[i].blendeq_alpha  != blend->rt[i].blendeq)
-            {
+                blend->rt[i].blendeq_alpha  != blend->rt[i].blendeq) {
               blend->rt[i].separate_alpha_blend_enable = TRUE;
            }
         }
+         else {
+            /* disabled - default blend terms */
+            blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD;
+            blend->rt[i].srcblend_alpha = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend_alpha = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq_alpha = SVGA3D_BLENDEQ_ADD;
+         }
+
+         if (templ->independent_blend_enable) {
+            blend->rt[i].blend_enable = templ->rt[i].blend_enable;
+         }
+         else {
+            blend->rt[i].blend_enable = templ->rt[0].blend_enable;
+         }
      }

-      blend->rt[i].writemask = templ->rt[0].colormask;
+      /* Some GL blend modes are not supported by the VGPU9 device (there's
+       * no equivalent of PIPE_BLENDFACTOR_[INV_]CONST_ALPHA).
+       * When we set this flag, we copy the constant blend alpha value
+       * to the R, G, B components.
+       * This works as long as the src/dst RGB blend factors doesn't use
+       * PIPE_BLENDFACTOR_CONST_COLOR and PIPE_BLENDFACTOR_CONST_ALPHA
+       * at the same time.  There's no work-around for that.
+       */
+      if (!svga_have_vgpu10(svga)) {
+         if (templ->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_CONST_ALPHA ||
+             templ->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_CONST_ALPHA ||
+             templ->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_INV_CONST_ALPHA ||
+             templ->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_CONST_ALPHA) {
+            blend->blend_color_alpha = TRUE;
+         }
+      }
+
+      if (templ->independent_blend_enable) {
+         blend->rt[i].writemask = templ->rt[i].colormask;
+      }
+      else {
+         blend->rt[i].writemask = templ->rt[0].colormask;
+      }
+   }
+
+   blend->independent_blend_enable = templ->independent_blend_enable;
+
+   blend->alpha_to_coverage = templ->alpha_to_coverage;
+
+   if (svga_have_vgpu10(svga)) {
+      define_blend_state_object(svga, blend);
   }

   return blend;
 }

+
 static void svga_bind_blend_state(struct pipe_context *pipe,
                                  void *blend)
 {
@ -210,9 +334,30 @@ static void svga_bind_blend_state(struct pipe_context *pipe,
   svga->dirty |= SVGA_NEW_BLEND;
 }

-
-static void svga_delete_blend_state(struct pipe_context *pipe, void *blend)
+static void svga_delete_blend_state(struct pipe_context *pipe,
+                                    void *blend)
 {
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_blend_state *bs =
+      (struct svga_blend_state *) blend;
+
+   if (bs->id != SVGA3D_INVALID_ID) {
+      enum pipe_error ret;
+
+      ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id);
+         assert(ret == PIPE_OK);
+      }
+
+      if (bs->id == svga->state.hw_draw.blend_id)
+         svga->state.hw_draw.blend_id = SVGA3D_INVALID_ID;
+
+      util_bitmask_clear(svga->blend_object_id_bm, bs->id);
+      bs->id = SVGA3D_INVALID_ID;
+   }
+
   FREE(blend);
 }

@ -235,6 +380,3 @@ void svga_init_blend_functions( struct svga_context *svga )

   svga->pipe.set_blend_color = svga_set_blend_color;
 }
-
-
-
--- a/src/gallium/drivers/svga/svga_pipe_blit.c
+++ b/src/gallium/drivers/svga/svga_pipe_blit.c
@ -29,6 +29,7 @@
 #include "svga_cmd.h"
 #include "svga_surface.h"

+//#include "util/u_blit_sw.h"
 #include "util/u_format.h"
 #include "util/u_surface.h"

@ -159,7 +160,8 @@ static void svga_blit(struct pipe_context *pipe,
   struct svga_context *svga = svga_context(pipe);
   struct pipe_blit_info info = *blit_info;

-   if (info.src.resource->nr_samples > 1 &&
+   if (!svga_have_vgpu10(svga) &&
+       info.src.resource->nr_samples > 1 &&
       info.dst.resource->nr_samples <= 1 &&
       !util_format_is_depth_or_stencil(info.src.resource->format) &&
       !util_format_is_pure_integer(info.src.resource->format)) {
@ -171,12 +173,8 @@ static void svga_blit(struct pipe_context *pipe,
      return; /* done */
   }

-   if (info.mask & PIPE_MASK_S) {
-      debug_printf("svga: cannot blit stencil, skipping\n");
-      info.mask &= ~PIPE_MASK_S;
-   }
-
-   if (!util_blitter_is_blit_supported(svga->blitter, &info)) {
+   if ((info.mask & PIPE_MASK_S) ||
+       !util_blitter_is_blit_supported(svga->blitter, &info)) {
      debug_printf("svga: blit unsupported %s -> %s\n",
                   util_format_short_name(info.src.resource->format),
                   util_format_short_name(info.dst.resource->format));
@ -188,9 +186,9 @@ static void svga_blit(struct pipe_context *pipe,
   util_blitter_save_vertex_buffer_slot(svga->blitter, svga->curr.vb);
   util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems);
   util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs);
-   /*util_blitter_save_geometry_shader(svga->blitter, svga->curr.gs);*/
-   /*util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
-                     (struct pipe_stream_output_target**)svga->so_targets);*/
+   util_blitter_save_geometry_shader(svga->blitter, svga->curr.user_gs);
+   util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
+                     (struct pipe_stream_output_target**)svga->so_targets);
   util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast);
   util_blitter_save_viewport(svga->blitter, &svga->curr.viewport);
   util_blitter_save_scissor(svga->blitter, &svga->curr.scissor);
@ -199,14 +197,14 @@ static void svga_blit(struct pipe_context *pipe,
   util_blitter_save_depth_stencil_alpha(svga->blitter,
                                         (void*)svga->curr.depth);
   util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref);
-   /*util_blitter_save_sample_mask(svga->blitter, svga->sample_mask);*/
+   util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask);
   util_blitter_save_framebuffer(svga->blitter, &svga->curr.framebuffer);
   util_blitter_save_fragment_sampler_states(svga->blitter,
-                     svga->curr.num_samplers,
-                     (void**)svga->curr.sampler);
+                     svga->curr.num_samplers[PIPE_SHADER_FRAGMENT],
+                     (void**)svga->curr.sampler[PIPE_SHADER_FRAGMENT]);
   util_blitter_save_fragment_sampler_views(svga->blitter,
-                     svga->curr.num_sampler_views,
-                     svga->curr.sampler_views);
+                     svga->curr.num_sampler_views[PIPE_SHADER_FRAGMENT],
+                     svga->curr.sampler_views[PIPE_SHADER_FRAGMENT]);
   /*util_blitter_save_render_condition(svga->blitter, svga->render_cond_query,
                                      svga->render_cond_cond, svga->render_cond_mode);*/
   util_blitter_blit(svga->blitter, &info);
--- a/src/gallium/drivers/svga/svga_pipe_clear.c
+++ b/src/gallium/drivers/svga/svga_pipe_clear.c
@ -34,6 +34,78 @@
 #include "svga_surface.h"


+/**
+ * Clear the whole color buffer(s) by drawing a quad.  For VGPU10 we use
+ * this when clearing integer render targets.  We'll also clear the
+ * depth and/or stencil buffers if the clear_buffers mask specifies them.
+ */
+static void
+clear_buffers_with_quad(struct svga_context *svga,
+                        unsigned clear_buffers,
+                        const union pipe_color_union *color,
+                        double depth, unsigned stencil)
+{
+   const struct pipe_framebuffer_state *fb = &svga->curr.framebuffer;
+
+   util_blitter_save_vertex_buffer_slot(svga->blitter, svga->curr.vb);
+   util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems);
+   util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs);
+   util_blitter_save_geometry_shader(svga->blitter, svga->curr.gs);
+   util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
+                     (struct pipe_stream_output_target**)svga->so_targets);
+   util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast);
+   util_blitter_save_viewport(svga->blitter, &svga->curr.viewport);
+   util_blitter_save_scissor(svga->blitter, &svga->curr.scissor);
+   util_blitter_save_fragment_shader(svga->blitter, svga->curr.fs);
+   util_blitter_save_blend(svga->blitter, (void*)svga->curr.blend);
+   util_blitter_save_depth_stencil_alpha(svga->blitter,
+                                         (void*)svga->curr.depth);
+   util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref);
+   util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask);
+
+   util_blitter_clear(svga->blitter,
+                      fb->width, fb->height,
+                      1, /* num_layers */
+                      clear_buffers, color,
+                      depth, stencil);
+}
+
+
+/**
+ * Check if any of the color buffers are integer buffers.
+ */
+static boolean
+is_integer_target(struct pipe_framebuffer_state *fb, unsigned buffers)
+{
+   unsigned i;
+
+   for (i = 0; i < fb->nr_cbufs; i++) {
+      if ((buffers & (PIPE_CLEAR_COLOR0 << i)) &&
+          fb->cbufs[i] &&
+          util_format_is_pure_integer(fb->cbufs[i]->format)) {
+         return TRUE;
+      }
+   }
+   return FALSE;
+}
+
+
+/**
+ * Check if the integer values in the clear color can be represented
+ * by floats.  If so, we can use the VGPU10 ClearRenderTargetView command.
+ * Otherwise, we need to clear with a quad.
+ */
+static boolean
+ints_fit_in_floats(const union pipe_color_union *color)
+{
+   const int max = 1 << 24;
+   return (color->i[0] <= max &&
+           color->i[1] <= max &&
+           color->i[2] <= max &&
+           color->i[3] <= max);
+}
+
+
 static enum pipe_error
 try_clear(struct svga_context *svga, 
          unsigned buffers,
@ -52,7 +124,7 @@ try_clear(struct svga_context *svga,
   if (ret != PIPE_OK)
      return ret;

-   if (svga->rebind.rendertargets) {
+   if (svga->rebind.flags.rendertargets) {
      ret = svga_reemit_framebuffer_bindings(svga);
      if (ret != PIPE_OK) {
         return ret;
@ -71,29 +143,72 @@ try_clear(struct svga_context *svga,
      if (buffers & PIPE_CLEAR_DEPTH)
         flags |= SVGA3D_CLEAR_DEPTH;

-      if ((svga->curr.framebuffer.zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) &&
-          (buffers & PIPE_CLEAR_STENCIL))
+      if (buffers & PIPE_CLEAR_STENCIL)
         flags |= SVGA3D_CLEAR_STENCIL;

      rect.w = MAX2(rect.w, fb->zsbuf->width);
      rect.h = MAX2(rect.h, fb->zsbuf->height);
   }

-   if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) {
+   if (!svga_have_vgpu10(svga) &&
+       !svga_rects_equal(&rect, &svga->state.hw_clear.viewport)) {
      restore_viewport = TRUE;
      ret = SVGA3D_SetViewport(svga->swc, &rect);
      if (ret != PIPE_OK)
         return ret;
   }

-   ret = SVGA3D_ClearRect(svga->swc, flags, uc.ui[0], (float) depth, stencil,
-                          rect.x, rect.y, rect.w, rect.h);
-   if (ret != PIPE_OK)
-      return ret;
+   if (svga_have_vgpu10(svga)) {
+      if (flags & SVGA3D_CLEAR_COLOR) {
+         unsigned i;
+
+         if (is_integer_target(fb, buffers) && !ints_fit_in_floats(color)) {
+            clear_buffers_with_quad(svga, buffers, color, depth, stencil);
+            /* We also cleared depth/stencil, so that's done */
+            flags &= ~(SVGA3D_CLEAR_DEPTH | SVGA3D_CLEAR_STENCIL);
+         }
+         else {
+            struct pipe_surface *rtv;
+
+            /* Issue VGPU10 Clear commands */
+            for (i = 0; i < fb->nr_cbufs; i++) {
+               if ((fb->cbufs[i] == NULL) ||
+                   !(buffers & (PIPE_CLEAR_COLOR0 << i)))
+                  continue;
+
+               rtv = svga_validate_surface_view(svga,
+                                                svga_surface(fb->cbufs[i]));
+               if (rtv == NULL)
+                  return PIPE_ERROR_OUT_OF_MEMORY;
+
+               ret = SVGA3D_vgpu10_ClearRenderTargetView(svga->swc,
+                                                         rtv, color->f);
+               if (ret != PIPE_OK)
+                  return ret;
+            }
+         }
+      }
+      if (flags & (SVGA3D_CLEAR_DEPTH | SVGA3D_CLEAR_STENCIL)) {
+         struct pipe_surface *dsv =
+            svga_validate_surface_view(svga, svga_surface(fb->zsbuf));
+         if (dsv == NULL)
+            return PIPE_ERROR_OUT_OF_MEMORY;
+
+         ret = SVGA3D_vgpu10_ClearDepthStencilView(svga->swc, dsv, flags,
+                                                   stencil, (float) depth);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+   }
+   else {
+      ret = SVGA3D_ClearRect(svga->swc, flags, uc.ui[0], (float) depth, stencil,
+                             rect.x, rect.y, rect.w, rect.h);
+      if (ret != PIPE_OK)
+         return ret;
+   }

   if (restore_viewport) {
-      memcpy(&rect, &svga->state.hw_clear.viewport, sizeof rect);
-      ret = SVGA3D_SetViewport(svga->swc, &rect);
+      ret = SVGA3D_SetViewport(svga->swc, &svga->state.hw_clear.viewport);
   }
   
   return ret;
--- a/src/gallium/drivers/svga/svga_pipe_constants.c
+++ b/src/gallium/drivers/svga/svga_pipe_constants.c
@ -48,28 +48,46 @@ static void svga_set_constant_buffer(struct pipe_context *pipe,
                                     uint shader, uint index,
                                     struct pipe_constant_buffer *cb)
 {
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
   struct svga_context *svga = svga_context(pipe);
   struct pipe_resource *buf = cb ? cb->buffer : NULL;
+   unsigned buffer_size = 0;

-   if (cb && cb->user_buffer) {
-      buf = svga_user_buffer_create(pipe->screen,
-                                    (void *) cb->user_buffer,
-                                    cb->buffer_size,
-                                    PIPE_BIND_CONSTANT_BUFFER);
+   if (cb) {
+      buffer_size = cb->buffer_size;
+      if (cb->user_buffer) {
+         buf = svga_user_buffer_create(pipe->screen,
+                                       (void *) cb->user_buffer,
+                                       cb->buffer_size,
+                                       PIPE_BIND_CONSTANT_BUFFER);
+      }
   }

   assert(shader < PIPE_SHADER_TYPES);
-   assert(index == 0);
+   assert(index < Elements(svga->curr.constbufs[shader]));
+   assert(index < svgascreen->max_const_buffers);
+   (void) svgascreen;

-   pipe_resource_reference(&svga->curr.cbufs[shader].buffer, buf);
-   svga->curr.cbufs[shader].buffer_size = cb ? cb->buffer_size : 0;
-   svga->curr.cbufs[shader].buffer_offset = cb ? cb->buffer_offset : 0;
-   svga->curr.cbufs[shader].user_buffer = NULL; /* not used */
+   pipe_resource_reference(&svga->curr.constbufs[shader][index].buffer, buf);
+
+   /* Make sure the constant buffer size to be updated is within the
+    * limit supported by the device.
+    */
+   svga->curr.constbufs[shader][index].buffer_size =
+      MIN2(buffer_size, SVGA_MAX_CONST_BUF_SIZE);
+
+   svga->curr.constbufs[shader][index].buffer_offset = cb ? cb->buffer_offset : 0;
+   svga->curr.constbufs[shader][index].user_buffer = NULL; /* not used */

   if (shader == PIPE_SHADER_FRAGMENT)
      svga->dirty |= SVGA_NEW_FS_CONST_BUFFER;
-   else
+   else if (shader == PIPE_SHADER_VERTEX)
      svga->dirty |= SVGA_NEW_VS_CONST_BUFFER;
+   else
+      svga->dirty |= SVGA_NEW_GS_CONST_BUFFER;
+
+   /* update bitmask of dirty const buffers */
+   svga->state.dirty_constbufs[shader] |= (1 << index);

   if (cb && cb->user_buffer) {
      pipe_resource_reference(&buf, NULL);
--- a/src/gallium/drivers/svga/svga_pipe_depthstencil.c
+++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
@ -23,13 +23,15 @@
 *
 **********************************************************/

-#include "util/u_inlines.h"
 #include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
+#include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"

 #include "svga_context.h"
 #include "svga_hw_reg.h"
+#include "svga_cmd.h"


 static inline unsigned
@ -69,10 +71,67 @@ svga_translate_stencil_op(unsigned op)
 }


+/**
+ * Define a vgpu10 depth/stencil state object for the given
+ * svga depth/stencil state.
+ */
+static void
+define_depth_stencil_state_object(struct svga_context *svga,
+                                  struct svga_depth_stencil_state *ds)
+{
+   unsigned try;
+
+   assert(svga_have_vgpu10(svga));
+
+   ds->id = util_bitmask_add(svga->ds_object_id_bm);
+
+   /* spot check that these comparision tokens are the same */
+   assert(SVGA3D_COMPARISON_NEVER == SVGA3D_CMP_NEVER);
+   assert(SVGA3D_COMPARISON_LESS == SVGA3D_CMP_LESS);
+   assert(SVGA3D_COMPARISON_NOT_EQUAL == SVGA3D_CMP_NOTEQUAL);
+
+   /* Loop in case command buffer is full and we need to flush and retry */
+   for (try = 0; try < 2; try++) {
+      enum pipe_error ret;
+
+      /* Note: we use the ds->stencil[0].enabled value for both the front
+       * and back-face enables.  If single-side stencil is used, we'll have
+       * set the back state the same as the front state.
+       */
+      ret = SVGA3D_vgpu10_DefineDepthStencilState(svga->swc,
+                                                  ds->id,
+                                                  /* depth/Z */
+                                                  ds->zenable,
+                                                  ds->zwriteenable,
+                                                  ds->zfunc,
+                                                  /* Stencil */
+                                                  ds->stencil[0].enabled, /*f|b*/
+                                                  ds->stencil[0].enabled, /*f*/
+                                                  ds->stencil[0].enabled, /*b*/
+                                                  ds->stencil_mask,
+                                                  ds->stencil_writemask,
+                                                  /* front stencil */
+                                                  ds->stencil[0].fail,
+                                                  ds->stencil[0].zfail,
+                                                  ds->stencil[0].pass,
+                                                  ds->stencil[0].func,
+                                                  /* back stencil */
+                                                  ds->stencil[1].fail,
+                                                  ds->stencil[1].zfail,
+                                                  ds->stencil[1].pass,
+                                                  ds->stencil[1].func);
+      if (ret == PIPE_OK)
+         return;
+      svga_context_flush(svga, NULL);
+   }
+}
+
+
 static void *
 svga_create_depth_stencil_state(struct pipe_context *pipe,
 				const struct pipe_depth_stencil_alpha_state *templ)
 {
+   struct svga_context *svga = svga_context(pipe);
   struct svga_depth_stencil_state *ds = CALLOC_STRUCT( svga_depth_stencil_state );

   /* Don't try to figure out CW/CCW correspondence with
@ -92,10 +151,18 @@ svga_create_depth_stencil_state(struct pipe_context *pipe,
      ds->stencil_mask      = templ->stencil[0].valuemask & 0xff;
      ds->stencil_writemask = templ->stencil[0].writemask & 0xff;
   }
+   else {
+      ds->stencil[0].func = SVGA3D_CMP_ALWAYS;
+      ds->stencil[0].fail = SVGA3D_STENCILOP_KEEP;
+      ds->stencil[0].zfail = SVGA3D_STENCILOP_KEEP;
+      ds->stencil[0].pass = SVGA3D_STENCILOP_KEEP;
+   }


   ds->stencil[1].enabled = templ->stencil[1].enabled;
   if (templ->stencil[1].enabled) {
+      assert(templ->stencil[0].enabled);
+      /* two-sided stencil */
      ds->stencil[1].func   = svga_translate_compare_func(templ->stencil[1].func);
      ds->stencil[1].fail   = svga_translate_stencil_op(templ->stencil[1].fail_op);
      ds->stencil[1].zfail  = svga_translate_stencil_op(templ->stencil[1].zfail_op);
@ -104,6 +171,13 @@ svga_create_depth_stencil_state(struct pipe_context *pipe,
      ds->stencil_mask      = templ->stencil[1].valuemask & 0xff;
      ds->stencil_writemask = templ->stencil[1].writemask & 0xff;
   }
+   else {
+      /* back face state is same as front-face state */
+      ds->stencil[1].func = ds->stencil[0].func;
+      ds->stencil[1].fail = ds->stencil[0].fail;
+      ds->stencil[1].zfail = ds->stencil[0].zfail;
+      ds->stencil[1].pass = ds->stencil[0].pass;
+   }


   ds->zenable = templ->depth.enabled;
@ -111,12 +185,22 @@ svga_create_depth_stencil_state(struct pipe_context *pipe,
      ds->zfunc = svga_translate_compare_func(templ->depth.func);
      ds->zwriteenable = templ->depth.writemask;
   }
+   else {
+      ds->zfunc = SVGA3D_CMP_ALWAYS;
+   }

   ds->alphatestenable = templ->alpha.enabled;
   if (ds->alphatestenable) {
      ds->alphafunc = svga_translate_compare_func(templ->alpha.func);
      ds->alpharef = templ->alpha.ref_value;
   }
+   else {
+      ds->alphafunc = SVGA3D_CMP_ALWAYS;
+   }
+
+   if (svga_have_vgpu10(svga)) {
+      define_depth_stencil_state_object(svga, ds);
+   }

   return ds;
 }
@ -126,13 +210,43 @@ static void svga_bind_depth_stencil_state(struct pipe_context *pipe,
 {
   struct svga_context *svga = svga_context(pipe);

+   if (svga_have_vgpu10(svga)) {
+      /* flush any previously queued drawing before changing state */
+      svga_hwtnl_flush_retry(svga);
+   }
+
   svga->curr.depth = (const struct svga_depth_stencil_state *)depth_stencil;
-   svga->dirty |= SVGA_NEW_DEPTH_STENCIL;
+   svga->dirty |= SVGA_NEW_DEPTH_STENCIL_ALPHA;
 }

 static void svga_delete_depth_stencil_state(struct pipe_context *pipe,
                                            void *depth_stencil)
 {
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_depth_stencil_state *ds =
+      (struct svga_depth_stencil_state *) depth_stencil;
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret;
+
+      svga_hwtnl_flush_retry(svga);
+
+      assert(ds->id != SVGA3D_INVALID_ID);
+
+      ret = SVGA3D_vgpu10_DestroyDepthStencilState(svga->swc, ds->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroyDepthStencilState(svga->swc, ds->id);
+         assert(ret == PIPE_OK);
+      }
+
+      if (ds->id == svga->state.hw_draw.depth_stencil_id)
+         svga->state.hw_draw.depth_stencil_id = SVGA3D_INVALID_ID;
+
+      util_bitmask_clear(svga->ds_object_id_bm, ds->id);
+      ds->id = SVGA3D_INVALID_ID;
+   }
+
   FREE(depth_stencil);
 }

@ -142,6 +256,11 @@ static void svga_set_stencil_ref( struct pipe_context *pipe,
 {
   struct svga_context *svga = svga_context(pipe);

+   if (svga_have_vgpu10(svga)) {
+      /* flush any previously queued drawing before changing state */
+      svga_hwtnl_flush_retry(svga);
+   }
+
   svga->curr.stencil_ref = *stencil_ref;

   svga->dirty |= SVGA_NEW_STENCIL_REF;
@ -151,6 +270,11 @@ static void
 svga_set_sample_mask(struct pipe_context *pipe,
                     unsigned sample_mask)
 {
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.sample_mask = sample_mask;
+
+   svga->dirty |= SVGA_NEW_BLEND; /* See emit_rss_vgpu10() */
 }


--- a/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@ -27,7 +27,9 @@
 #include "util/u_format.h"
 #include "util/u_inlines.h"
 #include "util/u_prim.h"
+#include "util/u_prim_restart.h"
 #include "util/u_time.h"
+#include "util/u_upload_mgr.h"
 #include "indices/u_indices.h"

 #include "svga_hw_reg.h"
@ -35,12 +37,12 @@
 #include "svga_context.h"
 #include "svga_screen.h"
 #include "svga_draw.h"
+#include "svga_shader.h"
 #include "svga_state.h"
 #include "svga_swtnl.h"
 #include "svga_debug.h"
 #include "svga_resource_buffer.h"

-
 static enum pipe_error
 retry_draw_range_elements( struct svga_context *svga,
                           struct pipe_resource *index_buffer,
@ -51,26 +53,31 @@ retry_draw_range_elements( struct svga_context *svga,
                           unsigned prim,
                           unsigned start,
                           unsigned count,
+                           unsigned start_instance,
                           unsigned instance_count,
                           boolean do_retry )
 {
   enum pipe_error ret = PIPE_OK;

-   svga_hwtnl_set_unfilled( svga->hwtnl,
-                            svga->curr.rast->hw_unfilled );
-
-   svga_hwtnl_set_flatshade( svga->hwtnl,
-                             svga->curr.rast->templ.flatshade,
-                             svga->curr.rast->templ.flatshade_first );
+   svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);

   ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
   if (ret != PIPE_OK)
      goto retry;

+   /** determine if flatshade is to be used after svga_update_state()
+    *  in case the fragment shader is changed.
+    */
+   svga_hwtnl_set_flatshade(svga->hwtnl,
+                            svga->curr.rast->templ.flatshade ||
+                            svga->state.hw_draw.fs->uses_flat_interp,
+                            svga->curr.rast->templ.flatshade_first);
+
   ret = svga_hwtnl_draw_range_elements( svga->hwtnl,
                                         index_buffer, index_size, index_bias,
                                         min_index, max_index,
-                                         prim, start, count );
+                                         prim, start, count,
+                                         start_instance, instance_count);
   if (ret != PIPE_OK)
      goto retry;

@ -85,7 +92,7 @@ retry:
                                        index_buffer, index_size, index_bias,
                                        min_index, max_index,
                                        prim, start, count,
-                                        instance_count, FALSE );
+                                        start_instance, instance_count, FALSE );
   }

   return ret;
@ -94,27 +101,28 @@ retry:

 static enum pipe_error
 retry_draw_arrays( struct svga_context *svga,
-                   unsigned prim,
-                   unsigned start,
-                   unsigned count,
-                   unsigned instance_count,
+                   unsigned prim, unsigned start, unsigned count,
+                   unsigned start_instance, unsigned instance_count,
                   boolean do_retry )
 {
   enum pipe_error ret;

-   svga_hwtnl_set_unfilled( svga->hwtnl,
-                            svga->curr.rast->hw_unfilled );
-
-   svga_hwtnl_set_flatshade( svga->hwtnl,
-                             svga->curr.rast->templ.flatshade,
-                             svga->curr.rast->templ.flatshade_first );
+   svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);

   ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
   if (ret != PIPE_OK)
      goto retry;

-   ret = svga_hwtnl_draw_arrays( svga->hwtnl, prim,
-                                 start, count );
+   /** determine if flatshade is to be used after svga_update_state()
+    *  in case the fragment shader is changed.
+    */
+   svga_hwtnl_set_flatshade(svga->hwtnl,
+                            svga->curr.rast->templ.flatshade ||
+                            svga->state.hw_draw.fs->uses_flat_interp,
+                            svga->curr.rast->templ.flatshade_first);
+
+   ret = svga_hwtnl_draw_arrays(svga->hwtnl, prim, start, count,
+                                start_instance, instance_count);
   if (ret != PIPE_OK)
      goto retry;

@ -125,18 +133,41 @@ retry:
   {
      svga_context_flush( svga, NULL );

-      return retry_draw_arrays( svga,
-                                prim,
-                                start,
-                                count,
-                                instance_count,
-                                FALSE );
+      return retry_draw_arrays(svga, prim, start, count,
+                               start_instance, instance_count,
+                               FALSE );
   }

   return ret;
 }


+/**
+ * Determine if we need to implement primitive restart with a fallback
+ * path which breaks the original primitive into sub-primitive at the
+ * restart indexes.
+ */
+static boolean
+need_fallback_prim_restart(const struct svga_context *svga,
+                           const struct pipe_draw_info *info)
+{
+   if (info->primitive_restart && info->indexed) {
+      if (!svga_have_vgpu10(svga))
+         return TRUE;
+      else if (!svga->state.sw.need_swtnl) {
+         if (svga->curr.ib.index_size == 1)
+            return TRUE; /* no device support for 1-byte indexes */
+         else if (svga->curr.ib.index_size == 2)
+            return info->restart_index != 0xffff;
+         else
+            return info->restart_index != 0xffffffff;
+      }
+   }
+
+   return FALSE;
+}
+
+
 static void
 svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 {
@ -148,7 +179,8 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)

   svga->num_draw_calls++;  /* for SVGA_QUERY_DRAW_CALLS */

-   if (!u_trim_pipe_prim( info->mode, &count ))
+   if (u_reduced_prim(info->mode) == PIPE_PRIM_TRIANGLES &&
+       svga->curr.rast->templ.cull_face == PIPE_FACE_FRONT_AND_BACK)
      return;

   /*
@ -165,6 +197,17 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
      svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE;
   }

+   if (need_fallback_prim_restart(svga, info)) {
+      enum pipe_error r;
+      r = util_draw_vbo_without_prim_restart(pipe, &svga->curr.ib, info);
+      assert(r == PIPE_OK);
+      (void) r;
+      return;
+   }
+
+   if (!u_trim_pipe_prim( info->mode, &count ))
+      return;
+
   needed_swtnl = svga->state.sw.need_swtnl;

   svga_update_state_retry( svga, SVGA_STATE_NEED_SWTNL );
@ -208,17 +251,15 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                                          info->max_index,
                                          info->mode,
                                          info->start + offset,
-                                          info->count,
+                                          count,
+                                          info->start_instance,
                                          info->instance_count,
                                          TRUE );
      }
      else {
-         ret = retry_draw_arrays( svga,
-                                  info->mode,
-                                  info->start,
-                                  info->count,
-                                  info->instance_count,
-                                  TRUE );
+         ret = retry_draw_arrays(svga, info->mode, info->start, count,
+                                 info->start_instance, info->instance_count,
+                                 TRUE);
      }
   }

--- a/src/gallium/drivers/svga/svga_pipe_fs.c
+++ b/src/gallium/drivers/svga/svga_pipe_fs.c
@ -31,7 +31,6 @@
 #include "draw/draw_context.h"

 #include "svga_context.h"
-#include "svga_tgsi.h"
 #include "svga_hw_reg.h"
 #include "svga_cmd.h"
 #include "svga_debug.h"
@ -63,12 +62,6 @@ svga_create_fs_state(struct pipe_context *pipe,

   fs->draw_shader = draw_create_fragment_shader(svga->swtnl.draw, templ);

-   if (SVGA_DEBUG & DEBUG_TGSI || 0) {
-      debug_printf("%s id: %u, inputs: %u, outputs: %u\n",
-                   __FUNCTION__, fs->base.id,
-                   fs->base.info.num_inputs, fs->base.info.num_outputs);
-   }
-
   return fs;
 }

@ -94,20 +87,30 @@ svga_delete_fs_state(struct pipe_context *pipe, void *shader)

   svga_hwtnl_flush_retry(svga);

+   assert(fs->base.parent == NULL);
+
   draw_delete_fragment_shader(svga->swtnl.draw, fs->draw_shader);

   for (variant = fs->base.variants; variant; variant = tmp) {
      tmp = variant->next;

-      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
-      (void) ret;  /* PIPE_ERROR_ not handled yet */
-
-      /*
-       * Remove stale references to this variant to ensure a new variant on the
-       * same address will be detected as a change.
-       */
-      if (variant == svga->state.hw_draw.fs)
+      /* Check if deleting currently bound shader */
+      if (variant == svga->state.hw_draw.fs) {
+         ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL);
+            assert(ret == PIPE_OK);
+         }
         svga->state.hw_draw.fs = NULL;
+      }
+
+      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+         assert(ret == PIPE_OK);
+      }
   }

   FREE((void *)fs->base.tokens);
--- a/src/gallium/drivers/svga/svga_pipe_gs.c
+++ b/src/gallium/drivers/svga/svga_pipe_gs.c
@ -84,7 +84,7 @@ svga_delete_gs_state(struct pipe_context *pipe, void *shader)
 {
   struct svga_context *svga = svga_context(pipe);
   struct svga_geometry_shader *gs = (struct svga_geometry_shader *)shader;
-   struct svga_geometry_shader *next_gs;  
+   struct svga_geometry_shader *next_gs;
   struct svga_shader_variant *variant, *tmp;
   enum pipe_error ret;

@ -96,9 +96,9 @@ svga_delete_gs_state(struct pipe_context *pipe, void *shader)

   /* Free the list of geometry shaders */
   while (gs) {
-      next_gs = (struct svga_geometry_shader *)gs->base.next;  
+      next_gs = (struct svga_geometry_shader *)gs->base.next;

-      if (gs->base.stream_output != NULL) 
+      if (gs->base.stream_output != NULL)
         svga_delete_stream_output(svga, gs->base.stream_output);

      draw_delete_geometry_shader(svga->swtnl.draw, gs->draw_shader);
--- a/src/gallium/drivers/svga/svga_pipe_misc.c
+++ b/src/gallium/drivers/svga/svga_pipe_misc.c
@ -27,6 +27,7 @@

 #include "util/u_framebuffer.h"
 #include "util/u_inlines.h"
+#include "util/u_pstipple.h"

 #include "svga_context.h"
 #include "svga_screen.h"
@ -46,10 +47,37 @@ static void svga_set_scissor_states( struct pipe_context *pipe,
 }


-static void svga_set_polygon_stipple( struct pipe_context *pipe,
-                                      const struct pipe_poly_stipple *stipple )
+static void
+svga_set_polygon_stipple(struct pipe_context *pipe,
+                         const struct pipe_poly_stipple *stipple)
 {
-   /* overridden by the draw module */
+   struct svga_context *svga = svga_context(pipe);
+
+   /* release old texture */
+   pipe_resource_reference(&svga->polygon_stipple.texture, NULL);
+
+   /* release old sampler view */
+   if (svga->polygon_stipple.sampler_view) {
+      pipe->sampler_view_destroy(pipe,
+                                 &svga->polygon_stipple.sampler_view->base);
+   }
+
+   /* create new stipple texture */
+   svga->polygon_stipple.texture =
+      util_pstipple_create_stipple_texture(pipe, stipple->stipple);
+
+   /* create new sampler view */
+   svga->polygon_stipple.sampler_view =
+      (struct svga_pipe_sampler_view *)
+      util_pstipple_create_sampler_view(pipe,
+                                        svga->polygon_stipple.texture);
+
+   /* allocate sampler state, if first time */
+   if (!svga->polygon_stipple.sampler) {
+      svga->polygon_stipple.sampler = util_pstipple_create_sampler(pipe);
+   }
+
+   svga->dirty |= SVGA_NEW_STIPPLE;
 }


@ -83,6 +111,11 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
   boolean propagate = FALSE;
   unsigned i;

+   /* make sure any pending drawing calls are flushed before changing
+    * the framebuffer state
+    */
+   svga_hwtnl_flush_retry(svga);
+
   dst->width = fb->width;
   dst->height = fb->height;
   dst->nr_cbufs = fb->nr_cbufs;
@ -99,9 +132,6 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
   }

   if (propagate) {
-      /* make sure that drawing calls comes before propagation calls */
-      svga_hwtnl_flush_retry( svga );
-   
      for (i = 0; i < dst->nr_cbufs; i++) {
         struct pipe_surface *s = i < fb->nr_cbufs ? fb->cbufs[i] : NULL;
         if (dst->cbufs[i] && dst->cbufs[i] != s)
@ -109,13 +139,30 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
      }
   }

-   /* XXX: Actually the virtual hardware may support rendertargets with
-    * different size, depending on the host API and driver, but since we cannot
-    * know that make no such assumption here. */
-   for(i = 0; i < fb->nr_cbufs; ++i) {
-      if (fb->zsbuf && fb->cbufs[i]) {
-         assert(fb->zsbuf->width == fb->cbufs[i]->width); 
-         assert(fb->zsbuf->height == fb->cbufs[i]->height); 
+   /* Check that all surfaces are the same size.
+    * Actually, the virtual hardware may support rendertargets with
+    * different size, depending on the host API and driver,
+    */
+   {
+      int width = 0, height = 0;
+      if (fb->zsbuf) {
+         width = fb->zsbuf->width;
+         height = fb->zsbuf->height;
+      }
+      for (i = 0; i < fb->nr_cbufs; ++i) {
+         if (fb->cbufs[i]) {
+            if (width && height) {
+               if (fb->cbufs[i]->width != width ||
+                   fb->cbufs[i]->height != height) {
+                  debug_warning("Mixed-size color and depth/stencil surfaces "
+                                "may not work properly");
+               }
+            }
+            else {
+               width = fb->cbufs[i]->width;
+               height = fb->cbufs[i]->height;
+            }
+         }
      }
   }

--- a/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
--- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c
+++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
@ -23,16 +23,18 @@
 *
 **********************************************************/

-#include "draw/draw_context.h"
-#include "util/u_inlines.h"
 #include "pipe/p_defines.h"
+#include "draw/draw_context.h"
+#include "util/u_bitmask.h"
+#include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"

+#include "svga_cmd.h"
 #include "svga_context.h"
+#include "svga_hw_reg.h"
 #include "svga_screen.h"

-#include "svga_hw_reg.h"

 /* Hardware frontwinding is always set up as SVGA3D_FRONTWINDING_CW.
 */
@ -61,6 +63,96 @@ static SVGA3dShadeMode svga_translate_flatshade( unsigned mode )
 }


+static unsigned
+translate_fill_mode(unsigned fill)
+{
+   switch (fill) {
+   case PIPE_POLYGON_MODE_POINT:
+      return SVGA3D_FILLMODE_POINT;
+   case PIPE_POLYGON_MODE_LINE:
+      return SVGA3D_FILLMODE_LINE;
+   case PIPE_POLYGON_MODE_FILL:
+      return SVGA3D_FILLMODE_FILL;
+   default:
+      assert(!"Bad fill mode");
+      return SVGA3D_FILLMODE_FILL;
+   }
+}
+
+
+static unsigned
+translate_cull_mode(unsigned cull)
+{
+   switch (cull) {
+   case PIPE_FACE_NONE:
+      return SVGA3D_CULL_NONE;
+   case PIPE_FACE_FRONT:
+      return SVGA3D_CULL_FRONT;
+   case PIPE_FACE_BACK:
+      return SVGA3D_CULL_BACK;
+   case PIPE_FACE_FRONT_AND_BACK:
+      /* NOTE: we simply no-op polygon drawing in svga_draw_vbo() */
+      return SVGA3D_CULL_NONE;
+   default:
+      assert(!"Bad cull mode");
+      return SVGA3D_CULL_NONE;
+   }
+}
+
+
+static void
+define_rasterizer_object(struct svga_context *svga,
+                         struct svga_rasterizer_state *rast)
+{
+   unsigned fill_mode = translate_fill_mode(rast->templ.fill_front);
+   unsigned cull_mode = translate_cull_mode(rast->templ.cull_face);
+   int depth_bias = rast->templ.offset_units;
+   float slope_scaled_depth_bias =  rast->templ.offset_scale;
+   float depth_bias_clamp = 0.0; /* XXX fix me */
+   unsigned try;
+   const float line_width = rast->templ.line_width > 0.0f ?
+      rast->templ.line_width : 1.0f;
+   const uint8 line_factor = rast->templ.line_stipple_enable ?
+      rast->templ.line_stipple_factor : 0;
+   const uint16 line_pattern = rast->templ.line_stipple_enable ?
+      rast->templ.line_stipple_pattern : 0;
+
+   rast->id = util_bitmask_add(svga->rast_object_id_bm);
+
+   if (rast->templ.fill_front != rast->templ.fill_back) {
+      /* The VGPU10 device can't handle different front/back fill modes.
+       * We'll handle that with a swtnl/draw fallback.  But we need to
+       * make sure we always fill triangles in that case.
+       */
+      fill_mode = SVGA3D_FILLMODE_FILL;
+   }
+
+   for (try = 0; try < 2; try++) {
+      enum pipe_error ret =
+         SVGA3D_vgpu10_DefineRasterizerState(svga->swc,
+                                             rast->id,
+                                             fill_mode,
+                                             cull_mode,
+                                             rast->templ.front_ccw,
+                                             depth_bias,
+                                             depth_bias_clamp,
+                                             slope_scaled_depth_bias,
+                                             rast->templ.depth_clip,
+                                             rast->templ.scissor,
+                                             rast->templ.multisample,
+                                             rast->templ.line_smooth,
+                                             line_width,
+                                             rast->templ.line_stipple_enable,
+                                             line_factor,
+                                             line_pattern,
+                                             !rast->templ.flatshade_first);
+      if (ret == PIPE_OK)
+         return;
+      svga_context_flush(svga, NULL);
+   }
+}
+
+
 static void *
 svga_create_rasterizer_state(struct pipe_context *pipe,
                             const struct pipe_rasterizer_state *templ)
@ -92,17 +184,24 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
   rast->antialiasedlineenable = templ->line_smooth;
   rast->lastpixel = templ->line_last_pixel;
   rast->pointsprite = templ->sprite_coord_enable != 0x0;
-   rast->pointsize = templ->point_size;
-   rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
+
+   if (templ->point_smooth) {
+      /* For smooth points we need to generate fragments for at least
+       * a 2x2 region.  Otherwise the quad we draw may be too small and
+       * we may generate no fragments at all.
+       */
+      rast->pointsize = MAX2(2.0f, templ->point_size);
+   }
+   else {
+      rast->pointsize = templ->point_size;
+   }
+
+   rast->hw_fillmode = PIPE_POLYGON_MODE_FILL;

   /* Use swtnl + decomposition implement these:
    */
-   if (templ->poly_stipple_enable) {
-      rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
-      rast->need_pipeline_tris_str = "poly stipple";
-   }

-   if (screen->maxLineWidth > 1.0F) {
+   if (templ->line_width <= screen->maxLineWidth) {
      /* pass line width to device */
      rast->linewidth = MAX2(1.0F, templ->line_width);
   }
@ -129,7 +228,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
      }
   } 

-   if (templ->point_smooth) {
+   if (!svga_have_vgpu10(svga) && templ->point_smooth) {
      rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS;
      rast->need_pipeline_points_str = "smooth points";
   }
@ -231,13 +330,13 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
         rast->depthbias = templ->offset_units;
      }

-      rast->hw_unfilled = fill;
+      rast->hw_fillmode = fill;
   }

   if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) {
      /* Turn off stuff which will get done in the draw module:
       */
-      rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
+      rast->hw_fillmode = PIPE_POLYGON_MODE_FILL;
      rast->slopescaledepthbias = 0;
      rast->depthbias = 0;
   }
@ -249,6 +348,10 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
      debug_printf(" tris: %s \n", rast->need_pipeline_tris_str);
   }

+   if (svga_have_vgpu10(svga)) {
+      define_rasterizer_object(svga, rast);
+   }
+
   return rast;
 }

@ -258,18 +361,37 @@ static void svga_bind_rasterizer_state( struct pipe_context *pipe,
   struct svga_context *svga = svga_context(pipe);
   struct svga_rasterizer_state *raster = (struct svga_rasterizer_state *)state;

-
-   draw_set_rasterizer_state(svga->swtnl.draw, raster ? &raster->templ : NULL,
-                             state);
   svga->curr.rast = raster;

   svga->dirty |= SVGA_NEW_RAST;
+
+   if (raster && raster->templ.poly_stipple_enable) {
+      svga->dirty |= SVGA_NEW_STIPPLE;
+   }
 }

-static void svga_delete_rasterizer_state(struct pipe_context *pipe,
-                                         void *raster)
+static void
+svga_delete_rasterizer_state(struct pipe_context *pipe, void *state)
 {
-   FREE(raster);
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_rasterizer_state *raster =
+      (struct svga_rasterizer_state *) state;
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret =
+         SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id);
+      }
+
+      if (raster->id == svga->state.hw_draw.rasterizer_id)
+         svga->state.hw_draw.rasterizer_id = SVGA3D_INVALID_ID;
+
+      util_bitmask_clear(svga->rast_object_id_bm, raster->id);
+   }
+
+   FREE(state);
 }


--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@ -23,17 +23,19 @@
 *
 **********************************************************/

-#include "util/u_inlines.h"
 #include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
 #include "util/u_format.h"
+#include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "tgsi/tgsi_parse.h"

 #include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
 #include "svga_resource_texture.h"

-#include "svga_debug.h"

 static inline unsigned
 translate_wrap_mode(unsigned wrap)
@ -91,6 +93,126 @@ static inline unsigned translate_mip_filter( unsigned filter )
   }
 }

+
+static uint8
+translate_comparison_func(unsigned func)
+{
+   switch (func) {
+   case PIPE_FUNC_NEVER:
+      return SVGA3D_COMPARISON_NEVER;
+   case PIPE_FUNC_LESS:
+      return SVGA3D_COMPARISON_LESS;
+   case PIPE_FUNC_EQUAL:
+      return SVGA3D_COMPARISON_EQUAL;
+   case PIPE_FUNC_LEQUAL:
+      return SVGA3D_COMPARISON_LESS_EQUAL;
+   case PIPE_FUNC_GREATER:
+      return SVGA3D_COMPARISON_GREATER;
+   case PIPE_FUNC_NOTEQUAL:
+      return SVGA3D_COMPARISON_NOT_EQUAL;
+   case PIPE_FUNC_GEQUAL:
+      return SVGA3D_COMPARISON_GREATER_EQUAL;
+   case PIPE_FUNC_ALWAYS:
+      return SVGA3D_COMPARISON_ALWAYS;
+   default:
+      assert(!"Invalid comparison function");
+      return SVGA3D_COMPARISON_ALWAYS;
+   }
+}
+
+
+/**
+ * Translate filtering state to vgpu10 format.
+ */
+static SVGA3dFilter
+translate_filter_mode(unsigned img_filter,
+                      unsigned min_filter,
+                      unsigned mag_filter,
+                      boolean anisotropic,
+                      boolean compare)
+{
+   SVGA3dFilter mode = 0;
+
+   if (img_filter == PIPE_TEX_FILTER_LINEAR)
+      mode |= SVGA3D_FILTER_MIP_LINEAR;
+   if (min_filter == PIPE_TEX_FILTER_LINEAR)
+      mode |= SVGA3D_FILTER_MIN_LINEAR;
+   if (mag_filter == PIPE_TEX_FILTER_LINEAR)
+      mode |= SVGA3D_FILTER_MAG_LINEAR;
+   if (anisotropic)
+      mode |= SVGA3D_FILTER_ANISOTROPIC;
+   if (compare)
+      mode |= SVGA3D_FILTER_COMPARE;
+
+   return mode;
+}
+
+
+/**
+ * Define a vgpu10 sampler state.
+ */
+static void
+define_sampler_state_object(struct svga_context *svga,
+                            struct svga_sampler_state *ss,
+                            const struct pipe_sampler_state *ps)
+{
+   uint8_t max_aniso = (uint8_t) 255; /* XXX fix me */
+   boolean anisotropic;
+   uint8 compare_func;
+   SVGA3dFilter filter;
+   SVGA3dRGBAFloat bcolor;
+   unsigned try;
+   float min_lod, max_lod;
+
+   assert(svga_have_vgpu10(svga));
+
+   anisotropic = ss->aniso_level > 1.0f;
+
+   filter = translate_filter_mode(ps->min_mip_filter,
+                                  ps->min_img_filter,
+                                  ps->mag_img_filter,
+                                  anisotropic,
+                                  ss->compare_mode);
+
+   compare_func = translate_comparison_func(ss->compare_func);
+
+   COPY_4V(bcolor.value, ps->border_color.f);
+
+   ss->id = util_bitmask_add(svga->sampler_object_id_bm);
+
+   assert(ps->min_lod <= ps->max_lod);
+
+   if (ps->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+      /* just use the base level image */
+      min_lod = max_lod = 0.0f;
+   }
+   else {
+      min_lod = ps->min_lod;
+      max_lod = ps->max_lod;
+   }
+
+   /* Loop in case command buffer is full and we need to flush and retry */
+   for (try = 0; try < 2; try++) {
+      enum pipe_error ret =
+         SVGA3D_vgpu10_DefineSamplerState(svga->swc,
+                                          ss->id,
+                                          filter,
+                                          ss->addressu,
+                                          ss->addressv,
+                                          ss->addressw,
+                                          ss->lod_bias, /* float */
+                                          max_aniso,
+                                          compare_func,
+                                          bcolor,
+                                          min_lod,       /* float */
+                                          max_lod);      /* float */
+      if (ret == PIPE_OK)
+         return;
+      svga_context_flush(svga, NULL);
+   }
+}
+
+
 static void *
 svga_create_sampler_state(struct pipe_context *pipe,
                          const struct pipe_sampler_state *sampler)
@ -141,6 +263,10 @@ svga_create_sampler_state(struct pipe_context *pipe,
      }
   }

+   if (svga_have_vgpu10(svga)) {
+      define_sampler_state_object(svga, cso, sampler);
+   }
+
   SVGA_DBG(DEBUG_VIEWS, "min %u, view(min %u, max %u) lod, mipfilter %s\n",
            cso->min_lod, cso->view_min_lod, cso->view_max_lod,
            cso->mipfilter == SVGA3D_TEX_FILTER_NONE ? "SVGA3D_TEX_FILTER_NONE" : "SOMETHING");
@ -161,19 +287,19 @@ svga_bind_sampler_states(struct pipe_context *pipe,
   assert(shader < PIPE_SHADER_TYPES);
   assert(start + num <= PIPE_MAX_SAMPLERS);

-   /* we only support fragment shader samplers at this time */
-   if (shader != PIPE_SHADER_FRAGMENT)
+   /* Pre-VGPU10 only supports FS textures */
+   if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT)
      return;

   for (i = 0; i < num; i++)
-      svga->curr.sampler[start + i] = samplers[i];
+      svga->curr.sampler[shader][start + i] = samplers[i];

   /* find highest non-null sampler[] entry */
   {
-      unsigned j = MAX2(svga->curr.num_samplers, start + num);
-      while (j > 0 && svga->curr.sampler[j - 1] == NULL)
+      unsigned j = MAX2(svga->curr.num_samplers[shader], start + num);
+      while (j > 0 && svga->curr.sampler[shader][j - 1] == NULL)
         j--;
-      svga->curr.num_samplers = j;
+      svga->curr.num_samplers[shader] = j;
   }

   svga->dirty |= SVGA_NEW_SAMPLER;
@ -183,6 +309,22 @@ svga_bind_sampler_states(struct pipe_context *pipe,
 static void svga_delete_sampler_state(struct pipe_context *pipe,
                                      void *sampler)
 {
+   struct svga_sampler_state *ss = (struct svga_sampler_state *) sampler;
+   struct svga_context *svga = svga_context(pipe);
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret;
+
+      svga_hwtnl_flush_retry(svga);
+
+      ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id);
+      }
+      util_bitmask_clear(svga->sampler_object_id_bm, ss->id);
+   }
+
   FREE(sampler);
 }

@ -192,17 +334,21 @@ svga_create_sampler_view(struct pipe_context *pipe,
                         struct pipe_resource *texture,
                         const struct pipe_sampler_view *templ)
 {
-   struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
+   struct svga_pipe_sampler_view *sv = CALLOC_STRUCT(svga_pipe_sampler_view);

-   if (view) {
-      *view = *templ;
-      view->reference.count = 1;
-      view->texture = NULL;
-      pipe_resource_reference(&view->texture, texture);
-      view->context = pipe;
+   if (!sv) {
+      return NULL;
   }

-   return view;
+   sv->base = *templ;
+   sv->base.reference.count = 1;
+   sv->base.texture = NULL;
+   pipe_resource_reference(&sv->base.texture, texture);
+
+   sv->base.context = pipe;
+   sv->id = SVGA3D_INVALID_ID;
+
+   return &sv->base;
 }


@ -210,8 +356,37 @@ static void
 svga_sampler_view_destroy(struct pipe_context *pipe,
                          struct pipe_sampler_view *view)
 {
-   pipe_resource_reference(&view->texture, NULL);
-   FREE(view);
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_pipe_sampler_view *sv = svga_pipe_sampler_view(view);
+
+   if (svga_have_vgpu10(svga) && sv->id != SVGA3D_INVALID_ID) {
+      if (view->context != pipe) {
+         /* The SVGA3D device will generate an error (and on Linux, cause
+          * us to abort) if we try to destroy a shader resource view from
+          * a context other than the one it was created with.  Skip the
+          * SVGA3D_vgpu10_DestroyShaderResourceView() and leak the sampler
+          * view for now.  This should only sometimes happen when a shared
+          * texture is deleted.
+          */
+         _debug_printf("context mismatch in %s\n", __func__);
+      }
+      else {
+         enum pipe_error ret;
+
+         svga_hwtnl_flush_retry(svga); /* XXX is this needed? */
+
+         ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id);
+         }
+         util_bitmask_clear(svga->sampler_view_id_bm, sv->id);
+      }
+   }
+
+   pipe_resource_reference(&sv->base.texture, NULL);
+
+   FREE(sv);
 }

 static void
@ -227,20 +402,20 @@ svga_set_sampler_views(struct pipe_context *pipe,
   uint i;

   assert(shader < PIPE_SHADER_TYPES);
-   assert(start + num <= Elements(svga->curr.sampler_views));
+   assert(start + num <= Elements(svga->curr.sampler_views[shader]));

-   /* we only support fragment shader sampler views at this time */
-   if (shader != PIPE_SHADER_FRAGMENT)
+   /* Pre-VGPU10 only supports FS textures */
+   if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT)
      return;

   for (i = 0; i < num; i++) {
-      if (svga->curr.sampler_views[start + i] != views[i]) {
+      if (svga->curr.sampler_views[shader][start + i] != views[i]) {
         /* Note: we're using pipe_sampler_view_release() here to work around
          * a possible crash when the old view belongs to another context that
          * was already destroyed.
          */
-         pipe_sampler_view_release(pipe, &svga->curr.sampler_views[start + i]);
-         pipe_sampler_view_reference(&svga->curr.sampler_views[start + i],
+         pipe_sampler_view_release(pipe, &svga->curr.sampler_views[shader][start + i]);
+         pipe_sampler_view_reference(&svga->curr.sampler_views[shader][start + i],
                                     views[i]);
      }

@ -256,10 +431,10 @@ svga_set_sampler_views(struct pipe_context *pipe,

   /* find highest non-null sampler_views[] entry */
   {
-      unsigned j = MAX2(svga->curr.num_sampler_views, start + num);
-      while (j > 0 && svga->curr.sampler_views[j - 1] == NULL)
+      unsigned j = MAX2(svga->curr.num_sampler_views[shader], start + num);
+      while (j > 0 && svga->curr.sampler_views[shader][j - 1] == NULL)
         j--;
-      svga->curr.num_sampler_views = j;
+      svga->curr.num_sampler_views[shader] = j;
   }

   svga->dirty |= SVGA_NEW_TEXTURE_BINDING;
--- a/src/gallium/drivers/svga/svga_pipe_streamout.c
+++ b/src/gallium/drivers/svga/svga_pipe_streamout.c
@ -38,7 +38,7 @@ struct svga_stream_output_target {
 };

 /** cast wrapper */
-static INLINE struct svga_stream_output_target *
+static inline struct svga_stream_output_target *
 svga_stream_output_target(struct pipe_stream_output_target *s)
 {
   return (struct svga_stream_output_target *)s;
--- a/src/gallium/drivers/svga/svga_pipe_vertex.c
+++ b/src/gallium/drivers/svga/svga_pipe_vertex.c
@ -23,17 +23,21 @@
 *
 **********************************************************/

+#include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
+#include "util/u_format.h"
 #include "util/u_helpers.h"
 #include "util/u_inlines.h"
-#include "pipe/p_defines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_transfer.h"
 #include "tgsi/tgsi_parse.h"

-#include "svga_screen.h"
-#include "svga_resource_buffer.h"
 #include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_format.h"
+#include "svga_resource_buffer.h"
+#include "svga_screen.h"


 static void svga_set_vertex_buffers(struct pipe_context *pipe,
@ -55,59 +59,7 @@ static void svga_set_index_buffer(struct pipe_context *pipe,
 {
   struct svga_context *svga = svga_context(pipe);

-   if (ib) {
-      pipe_resource_reference(&svga->curr.ib.buffer, ib->buffer);
-      memcpy(&svga->curr.ib, ib, sizeof(svga->curr.ib));
-   }
-   else {
-      pipe_resource_reference(&svga->curr.ib.buffer, NULL);
-      memset(&svga->curr.ib, 0, sizeof(svga->curr.ib));
-   }
-
-   /* TODO make this more like a state */
-}
-
-
-/**
- * Given a gallium vertex element format, return the corresponding SVGA3D
- * format.  Return SVGA3D_DECLTYPE_MAX for unsupported gallium formats.
- */
-static SVGA3dDeclType
-translate_vertex_format(enum pipe_format format)
-{
-   switch (format) {
-   case PIPE_FORMAT_R32_FLOAT:            return SVGA3D_DECLTYPE_FLOAT1;
-   case PIPE_FORMAT_R32G32_FLOAT:         return SVGA3D_DECLTYPE_FLOAT2;
-   case PIPE_FORMAT_R32G32B32_FLOAT:      return SVGA3D_DECLTYPE_FLOAT3;
-   case PIPE_FORMAT_R32G32B32A32_FLOAT:   return SVGA3D_DECLTYPE_FLOAT4;
-   case PIPE_FORMAT_B8G8R8A8_UNORM:       return SVGA3D_DECLTYPE_D3DCOLOR;
-   case PIPE_FORMAT_R8G8B8A8_USCALED:     return SVGA3D_DECLTYPE_UBYTE4;
-   case PIPE_FORMAT_R16G16_SSCALED:       return SVGA3D_DECLTYPE_SHORT2;
-   case PIPE_FORMAT_R16G16B16A16_SSCALED: return SVGA3D_DECLTYPE_SHORT4;
-   case PIPE_FORMAT_R8G8B8A8_UNORM:       return SVGA3D_DECLTYPE_UBYTE4N;
-   case PIPE_FORMAT_R16G16_SNORM:         return SVGA3D_DECLTYPE_SHORT2N;
-   case PIPE_FORMAT_R16G16B16A16_SNORM:   return SVGA3D_DECLTYPE_SHORT4N;
-   case PIPE_FORMAT_R16G16_UNORM:         return SVGA3D_DECLTYPE_USHORT2N;
-   case PIPE_FORMAT_R16G16B16A16_UNORM:   return SVGA3D_DECLTYPE_USHORT4N;
-   case PIPE_FORMAT_R10G10B10X2_USCALED:  return SVGA3D_DECLTYPE_UDEC3;
-   case PIPE_FORMAT_R10G10B10X2_SNORM:    return SVGA3D_DECLTYPE_DEC3N;
-   case PIPE_FORMAT_R16G16_FLOAT:         return SVGA3D_DECLTYPE_FLOAT16_2;
-   case PIPE_FORMAT_R16G16B16A16_FLOAT:   return SVGA3D_DECLTYPE_FLOAT16_4;
-
-   /* See attrib_needs_adjustment() and attrib_needs_w_to_1() below */
-   case PIPE_FORMAT_R8G8B8_SNORM:         return SVGA3D_DECLTYPE_UBYTE4N;
-
-   /* See attrib_needs_w_to_1() below */
-   case PIPE_FORMAT_R16G16B16_SNORM:      return SVGA3D_DECLTYPE_SHORT4N;
-   case PIPE_FORMAT_R16G16B16_UNORM:      return SVGA3D_DECLTYPE_USHORT4N;
-   case PIPE_FORMAT_R8G8B8_UNORM:         return SVGA3D_DECLTYPE_UBYTE4N;
-
-   default:
-      /* There are many formats without hardware support.  This case
-       * will be hit regularly, meaning we'll need swvfetch.
-       */
-      return SVGA3D_DECLTYPE_MAX;
-   }
+   util_set_index_buffer(&svga->curr.ib, ib);
 }


@ -129,20 +81,163 @@ attrib_needs_range_adjustment(enum pipe_format format)


 /**
- * Does the given vertex attrib format need to have the W component set
- * to one in the VS?
+ * Given a gallium vertex element format, return the corresponding
+ * SVGA3dDeclType.
 */
-static boolean
-attrib_needs_w_to_1(enum pipe_format format)
+static SVGA3dDeclType
+translate_vertex_format_to_decltype(enum pipe_format format)
 {
   switch (format) {
-   case PIPE_FORMAT_R8G8B8_SNORM:
-   case PIPE_FORMAT_R8G8B8_UNORM:
-   case PIPE_FORMAT_R16G16B16_SNORM:
-   case PIPE_FORMAT_R16G16B16_UNORM:
-      return TRUE;
+   case PIPE_FORMAT_R32_FLOAT:            return SVGA3D_DECLTYPE_FLOAT1;
+   case PIPE_FORMAT_R32G32_FLOAT:         return SVGA3D_DECLTYPE_FLOAT2;
+   case PIPE_FORMAT_R32G32B32_FLOAT:      return SVGA3D_DECLTYPE_FLOAT3;
+   case PIPE_FORMAT_R32G32B32A32_FLOAT:   return SVGA3D_DECLTYPE_FLOAT4;
+   case PIPE_FORMAT_B8G8R8A8_UNORM:       return SVGA3D_DECLTYPE_D3DCOLOR;
+   case PIPE_FORMAT_R8G8B8A8_USCALED:     return SVGA3D_DECLTYPE_UBYTE4;
+   case PIPE_FORMAT_R16G16_SSCALED:       return SVGA3D_DECLTYPE_SHORT2;
+   case PIPE_FORMAT_R16G16B16A16_SSCALED: return SVGA3D_DECLTYPE_SHORT4;
+   case PIPE_FORMAT_R8G8B8A8_UNORM:       return SVGA3D_DECLTYPE_UBYTE4N;
+   case PIPE_FORMAT_R16G16_SNORM:         return SVGA3D_DECLTYPE_SHORT2N;
+   case PIPE_FORMAT_R16G16B16A16_SNORM:   return SVGA3D_DECLTYPE_SHORT4N;
+   case PIPE_FORMAT_R16G16_UNORM:         return SVGA3D_DECLTYPE_USHORT2N;
+   case PIPE_FORMAT_R16G16B16A16_UNORM:   return SVGA3D_DECLTYPE_USHORT4N;
+   case PIPE_FORMAT_R10G10B10X2_USCALED:  return SVGA3D_DECLTYPE_UDEC3;
+   case PIPE_FORMAT_R10G10B10X2_SNORM:    return SVGA3D_DECLTYPE_DEC3N;
+   case PIPE_FORMAT_R16G16_FLOAT:         return SVGA3D_DECLTYPE_FLOAT16_2;
+   case PIPE_FORMAT_R16G16B16A16_FLOAT:   return SVGA3D_DECLTYPE_FLOAT16_4;
+
+   /* See attrib_needs_adjustment() and attrib_needs_w_to_1() above */
+   case PIPE_FORMAT_R8G8B8_SNORM:         return SVGA3D_DECLTYPE_UBYTE4N;
+
+   /* See attrib_needs_w_to_1() above */
+   case PIPE_FORMAT_R16G16B16_SNORM:      return SVGA3D_DECLTYPE_SHORT4N;
+   case PIPE_FORMAT_R16G16B16_UNORM:      return SVGA3D_DECLTYPE_USHORT4N;
+   case PIPE_FORMAT_R8G8B8_UNORM:         return SVGA3D_DECLTYPE_UBYTE4N;
+
   default:
-      return FALSE;
+      /* There are many formats without hardware support.  This case
+       * will be hit regularly, meaning we'll need swvfetch.
+       */
+      return SVGA3D_DECLTYPE_MAX;
+   }
+}
+
+
+static void
+define_input_element_object(struct svga_context *svga,
+                            struct svga_velems_state *velems)
+{
+   SVGA3dInputElementDesc elements[PIPE_MAX_ATTRIBS];
+   enum pipe_error ret;
+   unsigned i;
+
+   assert(velems->count <= PIPE_MAX_ATTRIBS);
+   assert(svga_have_vgpu10(svga));
+
+   for (i = 0; i < velems->count; i++) {
+      const struct pipe_vertex_element *elem = velems->velem + i;
+      SVGA3dSurfaceFormat svga_format;
+      unsigned vf_flags;
+
+      svga_translate_vertex_format_vgpu10(elem->src_format,
+                                          &svga_format, &vf_flags);
+
+      velems->decl_type[i] =
+         translate_vertex_format_to_decltype(elem->src_format);
+      elements[i].inputSlot = elem->vertex_buffer_index;
+      elements[i].alignedByteOffset = elem->src_offset;
+      elements[i].format = svga_format;
+
+      if (elem->instance_divisor) {
+         elements[i].inputSlotClass = SVGA3D_INPUT_PER_INSTANCE_DATA;
+         elements[i].instanceDataStepRate = elem->instance_divisor;
+      }
+      else {
+         elements[i].inputSlotClass = SVGA3D_INPUT_PER_VERTEX_DATA;
+         elements[i].instanceDataStepRate = 0;
+      }
+      elements[i].inputRegister = i;
+
+      if (elements[i].format == SVGA3D_FORMAT_INVALID) {
+         velems->need_swvfetch = TRUE;
+      }
+
+      if (util_format_is_pure_integer(elem->src_format)) {
+         velems->attrib_is_pure_int |= (1 << i);
+      }
+
+      if (vf_flags & VF_W_TO_1) {
+         velems->adjust_attrib_w_1 |= (1 << i);
+      }
+
+      if (vf_flags & VF_U_TO_F_CAST) {
+         velems->adjust_attrib_utof |= (1 << i);
+      }
+      else if (vf_flags & VF_I_TO_F_CAST) {
+         velems->adjust_attrib_itof |= (1 << i);
+      }
+
+      if (vf_flags & VF_BGRA) {
+         velems->attrib_is_bgra |= (1 << i);
+      }
+
+      if (vf_flags & VF_PUINT_TO_SNORM) {
+         velems->attrib_puint_to_snorm |= (1 << i);
+      }
+      else if (vf_flags & VF_PUINT_TO_USCALED) {
+         velems->attrib_puint_to_uscaled |= (1 << i);
+      }
+      else if (vf_flags & VF_PUINT_TO_SSCALED) {
+         velems->attrib_puint_to_sscaled |= (1 << i);
+      }
+   }
+
+   velems->id = util_bitmask_add(svga->input_element_object_id_bm);
+
+   ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, velems->count,
+                                           velems->id, elements);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, velems->count,
+                                              velems->id, elements);
+      assert(ret == PIPE_OK);
+   }
+}
+
+
+/**
+ * Translate the vertex element types to SVGA3dDeclType and check
+ * for VS-based vertex attribute adjustments.
+ */
+static void
+translate_vertex_decls(struct svga_context *svga,
+                       struct svga_velems_state *velems)
+{
+   unsigned i;
+
+   assert(!svga_have_vgpu10(svga));
+
+   for (i = 0; i < velems->count; i++) {
+      const enum pipe_format f = velems->velem[i].src_format;
+      SVGA3dSurfaceFormat svga_format;
+      unsigned vf_flags;
+
+      svga_translate_vertex_format_vgpu10(f, &svga_format, &vf_flags);
+
+      velems->decl_type[i] = translate_vertex_format_to_decltype(f);
+      if (velems->decl_type[i] == SVGA3D_DECLTYPE_MAX) {
+         /* Unsupported format - use software fetch */
+         velems->need_swvfetch = TRUE;
+      }
+
+      /* Check for VS-based adjustments */
+      if (attrib_needs_range_adjustment(f)) {
+         velems->adjust_attrib_range |= (1 << i);
+      }
+
+      if (vf_flags & VF_W_TO_1) {
+         velems->adjust_attrib_w_1 |= (1 << i);
+      }
   }
 }

@ -152,53 +247,73 @@ svga_create_vertex_elements_state(struct pipe_context *pipe,
                                  unsigned count,
                                  const struct pipe_vertex_element *attribs)
 {
+   struct svga_context *svga = svga_context(pipe);
   struct svga_velems_state *velems;
+
   assert(count <= PIPE_MAX_ATTRIBS);
   velems = (struct svga_velems_state *) MALLOC(sizeof(struct svga_velems_state));
   if (velems) {
-      unsigned i;
-
      velems->count = count;
      memcpy(velems->velem, attribs, sizeof(*attribs) * count);

      velems->need_swvfetch = FALSE;
      velems->adjust_attrib_range = 0x0;
+      velems->attrib_is_pure_int = 0x0;
      velems->adjust_attrib_w_1 = 0x0;
+      velems->adjust_attrib_itof = 0x0;
+      velems->adjust_attrib_utof = 0x0;
+      velems->attrib_is_bgra = 0x0;
+      velems->attrib_puint_to_snorm = 0x0;
+      velems->attrib_puint_to_uscaled = 0x0;
+      velems->attrib_puint_to_sscaled = 0x0;

-      /* Translate Gallium vertex format to SVGA3dDeclType */
-      for (i = 0; i < count; i++) {
-         enum pipe_format f = attribs[i].src_format;
-         velems->decl_type[i] = translate_vertex_format(f);
-         if (velems->decl_type[i] == SVGA3D_DECLTYPE_MAX) {
-            /* Unsupported format - use software fetch */
-            velems->need_swvfetch = TRUE;
-            break;
-         }
-
-         if (attrib_needs_range_adjustment(f)) {
-            velems->adjust_attrib_range |= (1 << i);
-         }
-         if (attrib_needs_w_to_1(f)) {
-            velems->adjust_attrib_w_1 |= (1 << i);
-         }
+      if (svga_have_vgpu10(svga)) {
+         define_input_element_object(svga, velems);
+      }
+      else {
+         translate_vertex_decls(svga, velems);
      }
   }
   return velems;
 }

-static void svga_bind_vertex_elements_state(struct pipe_context *pipe,
-                                            void *velems)
+
+static void
+svga_bind_vertex_elements_state(struct pipe_context *pipe, void *state)
 {
   struct svga_context *svga = svga_context(pipe);
-   struct svga_velems_state *svga_velems = (struct svga_velems_state *) velems;
+   struct svga_velems_state *velems = (struct svga_velems_state *) state;

-   svga->curr.velems = svga_velems;
+   svga->curr.velems = velems;
   svga->dirty |= SVGA_NEW_VELEMENT;
 }

-static void svga_delete_vertex_elements_state(struct pipe_context *pipe,
-                                              void *velems)
+
+static void
+svga_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
 {
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_velems_state *velems = (struct svga_velems_state *) state;
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret;
+
+      svga_hwtnl_flush_retry(svga);
+
+      ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, velems->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, velems->id);
+         assert(ret == PIPE_OK);
+      }
+
+      if (velems->id == svga->state.hw_draw.layout_id)
+         svga->state.hw_draw.layout_id = SVGA3D_INVALID_ID;
+
+      util_bitmask_clear(svga->input_element_object_id_bm, velems->id);
+      velems->id = SVGA3D_INVALID_ID;
+   }
+
   FREE(velems);
 }

@ -219,5 +334,3 @@ void svga_init_vertex_functions( struct svga_context *svga )
   svga->pipe.bind_vertex_elements_state = svga_bind_vertex_elements_state;
   svga->pipe.delete_vertex_elements_state = svga_delete_vertex_elements_state;
 }
-
-
--- a/src/gallium/drivers/svga/svga_pipe_vs.c
+++ b/src/gallium/drivers/svga/svga_pipe_vs.c
@ -32,11 +32,11 @@
 #include "tgsi/tgsi_text.h"

 #include "svga_context.h"
-#include "svga_tgsi.h"
 #include "svga_hw_reg.h"
 #include "svga_cmd.h"
 #include "svga_debug.h"
 #include "svga_shader.h"
+#include "svga_streamout.h"


 /**
@ -100,6 +100,7 @@ svga_create_vs_state(struct pipe_context *pipe,
 {
   struct svga_context *svga = svga_context(pipe);
   struct svga_vertex_shader *vs = CALLOC_STRUCT(svga_vertex_shader);
+
   if (!vs)
      return NULL;

@ -123,10 +124,12 @@ svga_create_vs_state(struct pipe_context *pipe,

   vs->base.id = svga->debug.shader_id++;

-   if (SVGA_DEBUG & DEBUG_TGSI || 0) {
-      debug_printf("%s id: %u, inputs: %u, outputs: %u\n",
-                   __FUNCTION__, vs->base.id,
-                   vs->base.info.num_inputs, vs->base.info.num_outputs);
+   vs->generic_outputs = svga_get_generic_outputs_mask(&vs->base.info);
+
+   /* check for any stream output declarations */
+   if (templ->stream_output.num_outputs) {
+      vs->base.stream_output = svga_create_stream_output(svga, &vs->base,
+                                                         &templ->stream_output);
   }

   return vs;
@ -139,6 +142,17 @@ svga_bind_vs_state(struct pipe_context *pipe, void *shader)
   struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader;
   struct svga_context *svga = svga_context(pipe);

+   if (vs == svga->curr.vs)
+      return;
+
+   /* If the currently bound vertex shader has a generated geometry shader,
+    * then unbind the geometry shader before binding a new vertex shader.
+    * We need to unbind the geometry shader here because there is no
+    * pipe_shader associated with the generated geometry shader.
+    */
+   if (svga->curr.vs != NULL && svga->curr.vs->gs != NULL)
+      svga->pipe.bind_gs_state(&svga->pipe, NULL);
+
   svga->curr.vs = vs;
   svga->dirty |= SVGA_NEW_VS;
 }
@ -154,20 +168,40 @@ svga_delete_vs_state(struct pipe_context *pipe, void *shader)

   svga_hwtnl_flush_retry(svga);

+   assert(vs->base.parent == NULL);
+
+   /* Check if there is a generated geometry shader to go with this
+    * vertex shader. If there is, then delete the geometry shader as well.
+    */
+   if (vs->gs != NULL) {
+      svga->pipe.delete_gs_state(&svga->pipe, vs->gs);
+   }
+
+   if (vs->base.stream_output != NULL)
+      svga_delete_stream_output(svga, vs->base.stream_output);
+
   draw_delete_vertex_shader(svga->swtnl.draw, vs->draw_shader);

   for (variant = vs->base.variants; variant; variant = tmp) {
      tmp = variant->next;

-      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
-      (void) ret;  /* PIPE_ERROR_ not handled yet */
-
-      /*
-       * Remove stale references to this variant to ensure a new variant on the
-       * same address will be detected as a change.
-       */
-      if (variant == svga->state.hw_draw.vs)
+      /* Check if deleting currently bound shader */
+      if (variant == svga->state.hw_draw.vs) {
+         ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL);
+            assert(ret == PIPE_OK);
+         }
         svga->state.hw_draw.vs = NULL;
+      }
+
+      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+         assert(ret == PIPE_OK);
+      }
   }

   FREE((void *)vs->base.tokens);
--- a/src/gallium/drivers/svga/svga_resource.c
+++ b/src/gallium/drivers/svga/svga_resource.c
@ -69,18 +69,21 @@ svga_can_create_resource(struct pipe_screen *screen,
   struct svga_winsys_screen *sws = svgascreen->sws;
   SVGA3dSurfaceFormat format;
   SVGA3dSize base_level_size;
-   uint32 numFaces;
   uint32 numMipLevels;
+   uint32 arraySize;

   if (res->target == PIPE_BUFFER) {
      format = SVGA3D_BUFFER;
      base_level_size.width = res->width0;
      base_level_size.height = 1;
      base_level_size.depth = 1;
-      numFaces = 1;
      numMipLevels = 1;
+      arraySize = 1;

   } else {
+      if (res->target == PIPE_TEXTURE_CUBE)
+         assert(res->array_size == 6);
+
      format = svga_translate_format(svgascreen, res->format, res->bind);
      if (format == SVGA3D_FORMAT_INVALID)
         return FALSE;
@ -88,12 +91,12 @@ svga_can_create_resource(struct pipe_screen *screen,
      base_level_size.width = res->width0;
      base_level_size.height = res->height0;
      base_level_size.depth = res->depth0;
-      numFaces = (res->target == PIPE_TEXTURE_CUBE) ? 6 : 1;
      numMipLevels = res->last_level + 1;
+      arraySize = res->array_size;
   }

   return sws->surface_can_create(sws, format, base_level_size, 
-                                  numFaces, numMipLevels);
+                                  arraySize, numMipLevels);
 }


--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@ -48,7 +48,8 @@
 static inline boolean
 svga_buffer_needs_hw_storage(unsigned usage)
 {
-   return usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER);
+   return (usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
+                    PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT)) != 0;
 }


@ -87,6 +88,26 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
   transfer->usage = usage;
   transfer->box = *box;

+   if ((usage & PIPE_TRANSFER_READ) && sbuf->dirty) {
+      /* Only need to test for vgpu10 since only vgpu10 features (streamout,
+       * buffer copy) can modify buffers on the device.
+       */
+      if (svga_have_vgpu10(svga)) {
+         enum pipe_error ret;
+         assert(sbuf->handle);
+         ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0);
+            assert(ret == PIPE_OK);
+         }
+
+         svga_context_finish(svga);
+
+         sbuf->dirty = FALSE;
+      }
+   }
+
   if (usage & PIPE_TRANSFER_WRITE) {
      if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
         /*
@ -343,13 +364,43 @@ svga_buffer_create(struct pipe_screen *screen,
   sbuf->b.vtbl = &svga_buffer_vtbl;
   pipe_reference_init(&sbuf->b.b.reference, 1);
   sbuf->b.b.screen = screen;
+   sbuf->bind_flags = template->bind;
+
+   if (template->bind & PIPE_BIND_CONSTANT_BUFFER) {
+      /* Constant buffers can only have the PIPE_BIND_CONSTANT_BUFFER
+       * flag set.
+       */
+      if (ss->sws->have_vgpu10) {
+         sbuf->bind_flags = PIPE_BIND_CONSTANT_BUFFER;
+
+         /* Constant buffer size needs to be in multiples of 16. */
+         sbuf->b.b.width0 = align(sbuf->b.b.width0, 16);
+      }
+   }

   if(svga_buffer_needs_hw_storage(template->bind)) {
+
+      /* If the buffer will be used for vertex/index/stream data, set all
+       * the flags so that the buffer will be accepted for all those uses.
+       * Note that the PIPE_BIND_ flags we get from the state tracker are
+       * just a hint about how the buffer may be used.  And OpenGL buffer
+       * object may be used for many different things.
+       */
+      if (!(template->bind & PIPE_BIND_CONSTANT_BUFFER)) {
+         /* Not a constant buffer.  The buffer may be used for vertex data,
+          * indexes or stream-out.
+          */
+         sbuf->bind_flags |= (PIPE_BIND_VERTEX_BUFFER |
+                              PIPE_BIND_INDEX_BUFFER);
+         if (ss->sws->have_vgpu10)
+            sbuf->bind_flags |= PIPE_BIND_STREAM_OUTPUT;
+      }
+
      if(svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK)
         goto error2;
   }
   else {
-      sbuf->swbuf = align_malloc(template->width0, 64);
+      sbuf->swbuf = align_malloc(sbuf->b.b.width0, 64);
      if(!sbuf->swbuf)
         goto error2;
   }
@ -357,7 +408,7 @@ svga_buffer_create(struct pipe_screen *screen,
   debug_reference(&sbuf->b.b.reference,
                   (debug_reference_descriptor)debug_describe_resource, 0);

-   sbuf->size = util_resource_size(template);
+   sbuf->size = util_resource_size(&sbuf->b.b);
   ss->total_resource_bytes += sbuf->size;

   return &sbuf->b.b; 
@ -391,6 +442,7 @@ svga_user_buffer_create(struct pipe_screen *screen,
   sbuf->b.b.depth0 = 1;
   sbuf->b.b.array_size = 1;

+   sbuf->bind_flags = bind;
   sbuf->swbuf = ptr;
   sbuf->user = TRUE;

--- a/src/gallium/drivers/svga/svga_resource_buffer.h
+++ b/src/gallium/drivers/svga/svga_resource_buffer.h
@ -65,6 +65,9 @@ struct svga_buffer
 {
   struct u_resource b;

+   /** This is a superset of b.b.bind */
+   unsigned bind_flags;
+
   /**
    * Regular (non DMA'able) memory.
    * 
@ -187,6 +190,8 @@ struct svga_buffer
   struct list_head head;

   unsigned size;  /**< Approximate size in bytes */
+
+   boolean dirty;  /**< Need to do a readback before mapping? */
 };


--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@ -149,10 +149,22 @@ svga_buffer_create_host_surface(struct svga_screen *ss,
      sbuf->key.flags = 0;

      sbuf->key.format = SVGA3D_BUFFER;
-      if (sbuf->b.b.bind & PIPE_BIND_VERTEX_BUFFER)
+      if (sbuf->bind_flags & PIPE_BIND_VERTEX_BUFFER) {
         sbuf->key.flags |= SVGA3D_SURFACE_HINT_VERTEXBUFFER;
-      if (sbuf->b.b.bind & PIPE_BIND_INDEX_BUFFER)
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_VERTEX_BUFFER;
+      }
+      if (sbuf->bind_flags & PIPE_BIND_INDEX_BUFFER) {
         sbuf->key.flags |= SVGA3D_SURFACE_HINT_INDEXBUFFER;
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_INDEX_BUFFER;
+      }
+      if (sbuf->bind_flags & PIPE_BIND_CONSTANT_BUFFER)
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_CONSTANT_BUFFER;
+
+      if (sbuf->bind_flags & PIPE_BIND_STREAM_OUTPUT)
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_STREAM_OUTPUT;
+
+      if (sbuf->bind_flags & PIPE_BIND_SAMPLER_VIEW)
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_SHADER_RESOURCE;

      sbuf->key.size.width = sbuf->b.b.width0;
      sbuf->key.size.height = 1;
@ -161,10 +173,12 @@ svga_buffer_create_host_surface(struct svga_screen *ss,
      sbuf->key.numFaces = 1;
      sbuf->key.numMipLevels = 1;
      sbuf->key.cachable = 1;
+      sbuf->key.arraySize = 1;

      SVGA_DBG(DEBUG_DMA, "surface_create for buffer sz %d\n", sbuf->b.b.width0);

-      sbuf->handle = svga_screen_surface_create(ss, &sbuf->key);
+      sbuf->handle = svga_screen_surface_create(ss, sbuf->b.b.bind,
+                                                sbuf->b.b.usage, &sbuf->key);
      if (!sbuf->handle)
         return PIPE_ERROR_OUT_OF_MEMORY;

@ -203,8 +217,8 @@ svga_buffer_upload_gb_command(struct svga_context *svga,
 			      struct svga_buffer *sbuf)
 {
   struct svga_winsys_context *swc = svga->swc;
-   SVGA3dCmdUpdateGBImage *cmd;
-   struct svga_3d_update_gb_image *ccmd = NULL;
+   SVGA3dCmdUpdateGBImage *update_cmd;
+   struct svga_3d_update_gb_image *whole_update_cmd = NULL;
   uint32 numBoxes = sbuf->map.num_ranges;
   struct pipe_resource *dummy;
   unsigned int i;
@ -214,68 +228,78 @@ svga_buffer_upload_gb_command(struct svga_context *svga,

   if (sbuf->dma.flags.discard) {
      struct svga_3d_invalidate_gb_image *cicmd = NULL;
-      SVGA3dCmdInvalidateGBImage *icmd;
+      SVGA3dCmdInvalidateGBImage *invalidate_cmd;
+      const unsigned total_commands_size =
+         sizeof(*invalidate_cmd) + numBoxes * sizeof(*whole_update_cmd);

      /* Allocate FIFO space for one INVALIDATE_GB_IMAGE command followed by
       * 'numBoxes' UPDATE_GB_IMAGE commands.  Allocate all at once rather
       * than with separate commands because we need to properly deal with
       * filling the command buffer.
       */
-      icmd = SVGA3D_FIFOReserve(swc,
-				SVGA_3D_CMD_INVALIDATE_GB_IMAGE,
-				sizeof *icmd + numBoxes * sizeof *ccmd,
-				2);
-      if (!icmd)
+      invalidate_cmd = SVGA3D_FIFOReserve(swc,
+                                          SVGA_3D_CMD_INVALIDATE_GB_IMAGE,
+                                          total_commands_size, 1 + numBoxes);
+      if (!invalidate_cmd)
 	 return PIPE_ERROR_OUT_OF_MEMORY;

-      cicmd = container_of(icmd, cicmd, body);
-      cicmd->header.size = sizeof *icmd;
-      swc->surface_relocation(swc, &icmd->image.sid, NULL, sbuf->handle,
+      cicmd = container_of(invalidate_cmd, cicmd, body);
+      cicmd->header.size = sizeof(*invalidate_cmd);
+      swc->surface_relocation(swc, &invalidate_cmd->image.sid, NULL, sbuf->handle,
                              (SVGA_RELOC_WRITE |
                               SVGA_RELOC_INTERNAL |
                               SVGA_RELOC_DMA));
-      icmd->image.face = 0;
-      icmd->image.mipmap = 0;
+      invalidate_cmd->image.face = 0;
+      invalidate_cmd->image.mipmap = 0;

+      /* The whole_update_command is a SVGA3dCmdHeader plus the
+       * SVGA3dCmdUpdateGBImage command.
+       */
+      whole_update_cmd = (struct svga_3d_update_gb_image *) &invalidate_cmd[1];
      /* initialize the first UPDATE_GB_IMAGE command */
-      ccmd = (struct svga_3d_update_gb_image *) &icmd[1];
-      ccmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
-      cmd = &ccmd->body;
+      whole_update_cmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
+      update_cmd = &whole_update_cmd->body;

   } else {
      /* Allocate FIFO space for 'numBoxes' UPDATE_GB_IMAGE commands */
-      cmd = SVGA3D_FIFOReserve(swc,
-			       SVGA_3D_CMD_UPDATE_GB_IMAGE,
-			       sizeof *cmd + (numBoxes - 1) * sizeof *ccmd,
-			       1);
-      if (!cmd)
+      const unsigned total_commands_size =
+         sizeof(*update_cmd) + (numBoxes - 1) * sizeof(*whole_update_cmd);
+
+      update_cmd = SVGA3D_FIFOReserve(swc,
+                                      SVGA_3D_CMD_UPDATE_GB_IMAGE,
+                                      total_commands_size, numBoxes);
+      if (!update_cmd)
 	 return PIPE_ERROR_OUT_OF_MEMORY;

-      ccmd = container_of(cmd, ccmd, body);
+      /* The whole_update_command is a SVGA3dCmdHeader plus the
+       * SVGA3dCmdUpdateGBImage command.
+       */
+      whole_update_cmd = container_of(update_cmd, whole_update_cmd, body);
   }

   /* Init the first UPDATE_GB_IMAGE command */
-   ccmd->header.size = sizeof *cmd;
-   swc->surface_relocation(swc, &cmd->image.sid, NULL, sbuf->handle,
+   whole_update_cmd->header.size = sizeof(*update_cmd);
+   swc->surface_relocation(swc, &update_cmd->image.sid, NULL, sbuf->handle,
 			   SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
-   cmd->image.face = 0;
-   cmd->image.mipmap = 0;
+   update_cmd->image.face = 0;
+   update_cmd->image.mipmap = 0;

   /* Save pointer to the first UPDATE_GB_IMAGE command so that we can
    * fill in the box info below.
    */
-   sbuf->dma.updates = ccmd;
+   sbuf->dma.updates = whole_update_cmd;

   /*
-    * Copy the relocation info, face and mipmap to all
-    * subsequent commands. NOTE: For winsyses that actually
-    * patch the image.sid member at flush time, this will fail
-    * miserably. For those we need to add as many relocations
-    * as there are copy boxes.
+    * Copy the face, mipmap, etc. info to all subsequent commands.
+    * Also do the surface relocation for each subsequent command.
    */
-
   for (i = 1; i < numBoxes; ++i) {
-      memcpy(++ccmd, sbuf->dma.updates, sizeof *ccmd);
+      whole_update_cmd++;
+      memcpy(whole_update_cmd, sbuf->dma.updates, sizeof(*whole_update_cmd));
+
+      swc->surface_relocation(swc, &whole_update_cmd->body.image.sid, NULL,
+                              sbuf->handle,
+                              SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
   }

   /* Increment reference count */
--- a/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/src/gallium/drivers/svga/svga_resource_texture.c
@ -46,12 +46,6 @@
 #include "svga_debug.h"


-/* XXX: This isn't a real hardware flag, but just a hack for kernel to
- * know about primary surfaces. Find a better way to accomplish this.
- */
-#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
-
-
 static void
 svga_transfer_dma_band(struct svga_context *svga,
                       struct svga_transfer *st,
@ -59,10 +53,10 @@ svga_transfer_dma_band(struct svga_context *svga,
                       unsigned y, unsigned h, unsigned srcy,
                       SVGA3dSurfaceDMAFlags flags)
 {
-   struct svga_texture *texture = svga_texture(st->base.resource); 
+   struct svga_texture *texture = svga_texture(st->base.resource);
   SVGA3dCopyBox box;
   enum pipe_error ret;
- 
+
   assert(!st->use_direct_map);

   box.x = st->base.box.x;
@ -75,28 +69,23 @@ svga_transfer_dma_band(struct svga_context *svga,
   box.srcy = srcy;
   box.srcz = 0;

-   if (st->base.resource->target == PIPE_TEXTURE_CUBE) {
-      st->face = st->base.box.z;
-      box.z = 0;
-   }
-   else
-      st->face = 0;
-
-   SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n",
-                transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from", 
-                texture->handle,
-                st->face,
-                st->base.box.x,
-                y,
-                box.z,
-                st->base.box.x + st->base.box.width,
-                y + h,
-                box.z + 1,
-                util_format_get_blocksize(texture->b.b.format) * 8 /
-                (util_format_get_blockwidth(texture->b.b.format)*util_format_get_blockheight(texture->b.b.format)));
+   SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - "
+            "(%u, %u, %u), %ubpp\n",
+            transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from",
+            texture->handle,
+            st->slice,
+            st->base.box.x,
+            y,
+            box.z,
+            st->base.box.x + st->base.box.width,
+            y + h,
+            box.z + 1,
+            util_format_get_blocksize(texture->b.b.format) * 8 /
+            (util_format_get_blockwidth(texture->b.b.format)
+             * util_format_get_blockheight(texture->b.b.format)));

   ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags);
-   if(ret != PIPE_OK) {
+   if (ret != PIPE_OK) {
      svga_context_flush(svga, NULL);
      ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags);
      assert(ret == PIPE_OK);
@ -110,7 +99,7 @@ svga_transfer_dma(struct svga_context *svga,
                  SVGA3dTransferType transfer,
                  SVGA3dSurfaceDMAFlags flags)
 {
-   struct svga_texture *texture = svga_texture(st->base.resource); 
+   struct svga_texture *texture = svga_texture(st->base.resource);
   struct svga_screen *screen = svga_screen(texture->b.b.screen);
   struct svga_winsys_screen *sws = screen->sws;
   struct pipe_fence_handle *fence = NULL;
@ -126,14 +115,13 @@ svga_transfer_dma(struct svga_context *svga,
    */
   svga_surfaces_flush( svga );

-   if(!st->swbuf) {
+   if (!st->swbuf) {
      /* Do the DMA transfer in a single go */
-
      svga_transfer_dma_band(svga, st, transfer,
                             st->base.box.y, st->base.box.height, 0,
                             flags);

-      if(transfer == SVGA3D_READ_HOST_VRAM) {
+      if (transfer == SVGA3D_READ_HOST_VRAM) {
         svga_context_flush(svga, &fence);
         sws->fence_finish(sws, fence, 0);
         sws->fence_reference(sws, &fence, NULL);
@ -141,10 +129,13 @@ svga_transfer_dma(struct svga_context *svga,
   }
   else {
      int y, h, srcy;
-      unsigned blockheight = util_format_get_blockheight(st->base.resource->format);
+      unsigned blockheight =
+         util_format_get_blockheight(st->base.resource->format);
+
      h = st->hw_nblocksy * blockheight;
      srcy = 0;
-      for(y = 0; y < st->base.box.height; y += h) {
+
+      for (y = 0; y < st->base.box.height; y += h) {
         unsigned offset, length;
         void *hw, *sw;

@ -158,7 +149,7 @@ svga_transfer_dma(struct svga_context *svga,
         offset = y * st->base.stride / blockheight;
         length = h * st->base.stride / blockheight;

-         sw = (uint8_t *)st->swbuf + offset;
+         sw = (uint8_t *) st->swbuf + offset;

         if (transfer == SVGA3D_WRITE_HOST_VRAM) {
            unsigned usage = PIPE_TRANSFER_WRITE;
@ -184,16 +175,15 @@ svga_transfer_dma(struct svga_context *svga,
          * Prevent the texture contents to be discarded on the next band
          * upload.
          */
-
         flags.discard = FALSE;

-         if(transfer == SVGA3D_READ_HOST_VRAM) {
+         if (transfer == SVGA3D_READ_HOST_VRAM) {
            svga_context_flush(svga, &fence);
            sws->fence_finish(sws, fence, 0);

            hw = sws->buffer_map(sws, st->hwbuf, PIPE_TRANSFER_READ);
            assert(hw);
-            if(hw) {
+            if (hw) {
               memcpy(sw, hw, length);
               sws->buffer_unmap(sws, st->hwbuf);
            }
@ -203,19 +193,22 @@ svga_transfer_dma(struct svga_context *svga,
 }


-static boolean 
+static boolean
 svga_texture_get_handle(struct pipe_screen *screen,
-                               struct pipe_resource *texture,
-                               struct winsys_handle *whandle)
+                        struct pipe_resource *texture,
+                        struct winsys_handle *whandle)
 {
   struct svga_winsys_screen *sws = svga_winsys_screen(texture->screen);
   unsigned stride;

   assert(svga_texture(texture)->key.cachable == 0);
   svga_texture(texture)->key.cachable = 0;
+
   stride = util_format_get_nblocksx(texture->format, texture->width0) *
            util_format_get_blocksize(texture->format);
-   return sws->surface_get_handle(sws, svga_texture(texture)->handle, stride, whandle);
+
+   return sws->surface_get_handle(sws, svga_texture(texture)->handle,
+                                  stride, whandle);
 }


@ -238,6 +231,7 @@ svga_texture_destroy(struct pipe_screen *screen,

   ss->total_resource_bytes -= tex->size;

+   FREE(tex->defined);
   FREE(tex->rendered_to);
   FREE(tex);
 }
@ -274,10 +268,43 @@ need_tex_readback(struct pipe_transfer *transfer)
 }


+static enum pipe_error
+readback_image_vgpu9(struct svga_context *svga,
+                   struct svga_winsys_surface *surf,
+                   unsigned slice,
+                   unsigned level)
+{
+   enum pipe_error ret;
+
+   ret = SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level);
+   }
+   return ret;
+}
+
+
+static enum pipe_error
+readback_image_vgpu10(struct svga_context *svga,
+                    struct svga_winsys_surface *surf,
+                    unsigned slice,
+                    unsigned level,
+                    unsigned numMipLevels)
+{
+   enum pipe_error ret;
+   unsigned subResource;
+
+   subResource = slice * numMipLevels + level;
+   ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf, subResource);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf, subResource);
+   }
+   return ret;
+}
+

-/* XXX: Still implementing this as if it was a screen function, but
- * can now modify it to queue transfers on the context.
- */
 static void *
 svga_texture_transfer_map(struct pipe_context *pipe,
                          struct pipe_resource *texture,
@ -289,6 +316,7 @@ svga_texture_transfer_map(struct pipe_context *pipe,
   struct svga_context *svga = svga_context(pipe);
   struct svga_screen *ss = svga_screen(pipe->screen);
   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_texture *tex = svga_texture(texture);
   struct svga_transfer *st;
   unsigned nblocksx, nblocksy;
   boolean use_direct_map = svga_have_gb_objects(svga) &&
@ -326,25 +354,34 @@ svga_texture_transfer_map(struct pipe_context *pipe,
   }

   pipe_resource_reference(&st->base.resource, texture);
+
   st->base.level = level;
   st->base.usage = usage;
   st->base.box = *box;
   st->base.stride = nblocksx*util_format_get_blocksize(texture->format);
   st->base.layer_stride = st->base.stride * nblocksy;

+   switch (tex->b.b.target) {
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_1D_ARRAY:
+      st->slice = st->base.box.z;
+      st->base.box.z = 0;   /* so we don't apply double offsets below */
+      break;
+   default:
+      st->slice = 0;
+      break;
+   }
+
   if (!use_direct_map) {
      /* Use a DMA buffer */
      st->hw_nblocksy = nblocksy;

-      st->hwbuf = svga_winsys_buffer_create(svga,
-                                            1, 
-                                            0,
-                                            st->hw_nblocksy * st->base.stride * d);
+      st->hwbuf = svga_winsys_buffer_create(svga, 1, 0,
+                                   st->hw_nblocksy * st->base.stride * d);
      while(!st->hwbuf && (st->hw_nblocksy /= 2)) {
-         st->hwbuf = svga_winsys_buffer_create(svga,
-                                               1, 
-                                               0,
-                                               st->hw_nblocksy * st->base.stride * d);
+         st->hwbuf = svga_winsys_buffer_create(svga, 1, 0,
+                                   st->hw_nblocksy * st->base.stride * d);
      }

      if (!st->hwbuf) {
@ -352,8 +389,8 @@ svga_texture_transfer_map(struct pipe_context *pipe,
         return NULL;
      }

-      if(st->hw_nblocksy < nblocksy) {
-         /* We couldn't allocate a hardware buffer big enough for the transfer, 
+      if (st->hw_nblocksy < nblocksy) {
+         /* We couldn't allocate a hardware buffer big enough for the transfer,
          * so allocate regular malloc memory instead */
         if (0) {
            debug_printf("%s: failed to allocate %u KB of DMA, "
@ -379,45 +416,27 @@ svga_texture_transfer_map(struct pipe_context *pipe,
      }
   } else {
      struct pipe_transfer *transfer = &st->base;
-      struct svga_texture *tex = svga_texture(transfer->resource);
      struct svga_winsys_surface *surf = tex->handle;
-      unsigned face;

-      assert(surf);
-
-      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
-	 face = transfer->box.z;
-      } else {
-	 face = 0;
+      if (!surf) {
+         FREE(st);
+         return NULL;
      }

      if (need_tex_readback(transfer)) {
-	 SVGA3dBox box;
 	 enum pipe_error ret;

-	 box.x = transfer->box.x;
-	 box.y = transfer->box.y;
-	 box.w = transfer->box.width;
-	 box.h = transfer->box.height;
-	 box.d = transfer->box.depth;
-	 if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
-	    box.z = 0;
-	 }
-	 else {
-	    box.z = transfer->box.z;
-	 }
-
-         (void) box;  /* not used at this time */
-
         svga_surfaces_flush(svga);

-	 ret = SVGA3D_ReadbackGBImage(svga->swc, surf, face, transfer->level);
+         if (svga_have_vgpu10(svga)) {
+            ret = readback_image_vgpu10(svga, surf, st->slice, transfer->level,
+                                        tex->b.b.last_level + 1);
+         } else {
+            ret = readback_image_vgpu9(svga, surf, st->slice, transfer->level);
+         }

-	 if (ret != PIPE_OK) {
-	    svga_context_flush(svga, NULL);
-	    ret = SVGA3D_ReadbackGBImage(svga->swc, surf, face, transfer->level);
-	    assert(ret == PIPE_OK);
-	 }
+         assert(ret == PIPE_OK);
+         (void) ret;

 	 svga_context_flush(svga, NULL);

@ -425,7 +444,7 @@ svga_texture_transfer_map(struct pipe_context *pipe,
          * Note: if PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE were specified
          * we could potentially clear the flag for all faces/layers/mips.
          */
-         svga_clear_texture_rendered_to(tex, face, transfer->level);
+         svga_clear_texture_rendered_to(tex, st->slice, transfer->level);
      }
      else {
 	 assert(transfer->usage & PIPE_TRANSFER_WRITE);
@ -451,17 +470,15 @@ svga_texture_transfer_map(struct pipe_context *pipe,
      return sws->buffer_map(sws, st->hwbuf, usage);
   }
   else {
-      struct svga_screen *screen = svga_screen(svga->pipe.screen);
-      SVGA3dSurfaceFormat format;
      SVGA3dSize baseLevelSize;
      struct svga_texture *tex = svga_texture(texture);
      struct svga_winsys_surface *surf = tex->handle;
      uint8_t *map;
      boolean retry;
-      unsigned face, offset, mip_width, mip_height;
-      unsigned xoffset = box->x;
-      unsigned yoffset = box->y;
-      unsigned zoffset = box->z;
+      unsigned offset, mip_width, mip_height;
+      unsigned xoffset = st->base.box.x;
+      unsigned yoffset = st->base.box.y;
+      unsigned zoffset = st->base.box.z;

      map = svga->swc->surface_map(svga->swc, surf, usage, &retry);
      if (map == NULL && retry) {
@ -484,21 +501,13 @@ svga_texture_transfer_map(struct pipe_context *pipe,
      /**
       * Compute the offset to the specific texture slice in the buffer.
       */
-      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
-         face = zoffset;
-         zoffset = 0;
-      } else {
-         face = 0;
-      }
-
-      format = svga_translate_format(screen, tex->b.b.format, 0);
      baseLevelSize.width = tex->b.b.width0;
      baseLevelSize.height = tex->b.b.height0;
      baseLevelSize.depth = tex->b.b.depth0;

-      offset = svga3dsurface_get_image_offset(format, baseLevelSize,
+      offset = svga3dsurface_get_image_offset(tex->key.format, baseLevelSize,
                                              tex->b.b.last_level + 1, /* numMips */
-                                              face, level);
+                                              st->slice, level);
      if (level > 0) {
         assert(offset > 0);
      }
@ -506,7 +515,8 @@ svga_texture_transfer_map(struct pipe_context *pipe,
      mip_width = u_minify(tex->b.b.width0, level);
      mip_height = u_minify(tex->b.b.height0, level);

-      offset += svga3dsurface_get_pixel_offset(format, mip_width, mip_height,
+      offset += svga3dsurface_get_pixel_offset(tex->key.format,
+                                               mip_width, mip_height,
                                               xoffset, yoffset, zoffset);

      return (void *) (map + offset);
@ -541,9 +551,45 @@ svga_texture_surface_unmap(struct svga_context *svga,
 }


-/* XXX: Still implementing this as if it was a screen function, but
- * can now modify it to queue transfers on the context.
- */
+static enum pipe_error
+update_image_vgpu9(struct svga_context *svga,
+                   struct svga_winsys_surface *surf,
+                   const SVGA3dBox *box,
+                   unsigned slice,
+                   unsigned level)
+{
+   enum pipe_error ret;
+
+   ret = SVGA3D_UpdateGBImage(svga->swc, surf, box, slice, level);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_UpdateGBImage(svga->swc, surf, box, slice, level);
+   }
+   return ret;
+}
+
+
+static enum pipe_error
+update_image_vgpu10(struct svga_context *svga,
+                    struct svga_winsys_surface *surf,
+                    const SVGA3dBox *box,
+                    unsigned slice,
+                    unsigned level,
+                    unsigned numMipLevels)
+{
+   enum pipe_error ret;
+   unsigned subResource;
+
+   subResource = slice * numMipLevels + level;
+   ret = SVGA3D_vgpu10_UpdateSubResource(svga->swc, surf, box, subResource);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_UpdateSubResource(svga->swc, surf, box, subResource);
+   }
+   return ret;
+}
+
+
 static void
 svga_texture_transfer_unmap(struct pipe_context *pipe,
 			    struct pipe_transfer *transfer)
@ -579,26 +625,25 @@ svga_texture_transfer_unmap(struct pipe_context *pipe,
   } else if (transfer->usage & PIPE_TRANSFER_WRITE) {
      struct svga_winsys_surface *surf =
 	 svga_texture(transfer->resource)->handle;
-      unsigned face;
      SVGA3dBox box;
      enum pipe_error ret;

      assert(svga_have_gb_objects(svga));

      /* update the effected region */
-      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
-	 face = transfer->box.z;
-      } else {
-	 face = 0;
-      }
-
      box.x = transfer->box.x;
      box.y = transfer->box.y;
-      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
+      switch (tex->b.b.target) {
+      case PIPE_TEXTURE_CUBE:
+      case PIPE_TEXTURE_2D_ARRAY:
         box.z = 0;
-      }
-      else {
+         break;
+      case PIPE_TEXTURE_1D_ARRAY:
+         box.y = box.z = 0;
+         break;
+      default:
         box.z = transfer->box.z;
+         break;
      }
      box.w = transfer->box.width;
      box.h = transfer->box.height;
@ -610,18 +655,21 @@ svga_texture_transfer_unmap(struct pipe_context *pipe,
                      box.x, box.y, box.z,
                      box.w, box.h, box.d);

-      ret = SVGA3D_UpdateGBImage(svga->swc, surf, &box, face, transfer->level);
-      if (ret != PIPE_OK) {
-         svga_context_flush(svga, NULL);
-         ret = SVGA3D_UpdateGBImage(svga->swc, surf, &box, face, transfer->level);
-         assert(ret == PIPE_OK);
+      if (svga_have_vgpu10(svga)) {
+         ret = update_image_vgpu10(svga, surf, &box, st->slice, transfer->level,
+                                   tex->b.b.last_level + 1);
+      } else {
+         ret = update_image_vgpu9(svga, surf, &box, st->slice, transfer->level);
      }
+
+      assert(ret == PIPE_OK);
+      (void) ret;
   }

   ss->texture_timestamp++;
   svga_age_texture_view(tex, transfer->level);
   if (transfer->resource->target == PIPE_TEXTURE_CUBE)
-      svga_define_texture_level(tex, transfer->box.z, transfer->level);
+      svga_define_texture_level(tex, st->slice, transfer->level);
   else
      svga_define_texture_level(tex, 0, transfer->level);

@ -635,7 +683,18 @@ svga_texture_transfer_unmap(struct pipe_context *pipe,
 }


-struct u_resource_vtbl svga_texture_vtbl = 
+/**
+ * Does format store depth values?
+ */
+static inline boolean
+format_has_depth(enum pipe_format format)
+{
+   const struct util_format_description *desc = util_format_description(format);
+   return util_format_has_depth(desc);
+}
+
+
+struct u_resource_vtbl svga_texture_vtbl =
 {
   svga_texture_get_handle,	      /* get_handle */
   svga_texture_destroy,	      /* resource_destroy */
@ -651,57 +710,119 @@ svga_texture_create(struct pipe_screen *screen,
                    const struct pipe_resource *template)
 {
   struct svga_screen *svgascreen = svga_screen(screen);
-   struct svga_texture *tex = CALLOC_STRUCT(svga_texture);
+   struct svga_texture *tex;
+   unsigned bindings = template->bind;

-   if (!tex)
-      goto error1;
+   assert(template->last_level < SVGA_MAX_TEXTURE_LEVELS);
+   if (template->last_level >= SVGA_MAX_TEXTURE_LEVELS) {
+      return NULL;
+   }
+
+   tex = CALLOC_STRUCT(svga_texture);
+   if (!tex) {
+      return NULL;
+   }
+
+   tex->defined = CALLOC(template->depth0 * template->array_size,
+                         sizeof(tex->defined[0]));
+   if (!tex->defined) {
+      FREE(tex);
+      return NULL;
+   }
+
+   tex->rendered_to = CALLOC(template->depth0 * template->array_size,
+                             sizeof(tex->rendered_to[0]));
+   if (!tex->rendered_to) {
+      FREE(tex->defined);
+      FREE(tex);
+      return NULL;
+   }

   tex->b.b = *template;
   tex->b.vtbl = &svga_texture_vtbl;
   pipe_reference_init(&tex->b.b.reference, 1);
   tex->b.b.screen = screen;

-   assert(template->last_level < SVGA_MAX_TEXTURE_LEVELS);
-   if(template->last_level >= SVGA_MAX_TEXTURE_LEVELS)
-      goto error2;
-   
   tex->key.flags = 0;
   tex->key.size.width = template->width0;
   tex->key.size.height = template->height0;
   tex->key.size.depth = template->depth0;
+   tex->key.arraySize = 1;
+   tex->key.numFaces = 1;
+   tex->key.sampleCount = template->nr_samples;

-   if(template->target == PIPE_TEXTURE_CUBE) {
-      tex->key.flags |= SVGA3D_SURFACE_CUBEMAP;
-      tex->key.numFaces = 6;
+   if (template->nr_samples > 1) {
+      tex->key.flags |= SVGA3D_SURFACE_MASKABLE_ANTIALIAS;
+   }
+
+   if (svgascreen->sws->have_vgpu10) {
+      switch (template->target) {
+      case PIPE_TEXTURE_1D:
+         tex->key.flags |= SVGA3D_SURFACE_1D;
+         break;
+      case PIPE_TEXTURE_1D_ARRAY:
+         tex->key.flags |= SVGA3D_SURFACE_1D;
+         /* fall-through */
+      case PIPE_TEXTURE_2D_ARRAY:
+         tex->key.flags |= SVGA3D_SURFACE_ARRAY;
+         tex->key.arraySize = template->array_size;
+         break;
+      case PIPE_TEXTURE_3D:
+         tex->key.flags |= SVGA3D_SURFACE_VOLUME;
+         break;
+      case PIPE_TEXTURE_CUBE:
+         tex->key.flags |= (SVGA3D_SURFACE_CUBEMAP | SVGA3D_SURFACE_ARRAY);
+         tex->key.numFaces = 6;
+         break;
+      default:
+         break;
+      }
   }
   else {
-      tex->key.numFaces = 1;
-   }
-
-   if (template->target == PIPE_TEXTURE_3D) {
-      tex->key.flags |= SVGA3D_SURFACE_VOLUME;
+      switch (template->target) {
+      case PIPE_TEXTURE_3D:
+         tex->key.flags |= SVGA3D_SURFACE_VOLUME;
+         break;
+      case PIPE_TEXTURE_CUBE:
+         tex->key.flags |= SVGA3D_SURFACE_CUBEMAP;
+         tex->key.numFaces = 6;
+         break;
+      default:
+         break;
+      }
   }

   tex->key.cachable = 1;

-   if (template->bind & PIPE_BIND_SAMPLER_VIEW)
+   if (bindings & PIPE_BIND_SAMPLER_VIEW) {
      tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE;
+      tex->key.flags |= SVGA3D_SURFACE_BIND_SHADER_RESOURCE;

-   if (template->bind & PIPE_BIND_DISPLAY_TARGET) {
+      if (!(bindings & PIPE_BIND_RENDER_TARGET)) {
+         /* Also check if the format is renderable */
+         if (screen->is_format_supported(screen, template->format,
+                                         template->target,
+                                         template->nr_samples,
+                                         PIPE_BIND_RENDER_TARGET)) {
+            bindings |= PIPE_BIND_RENDER_TARGET;
+         }
+      }
+   }
+
+   if (bindings & PIPE_BIND_DISPLAY_TARGET) {
      tex->key.cachable = 0;
   }

-   if (template->bind & PIPE_BIND_SHARED) {
+   if (bindings & PIPE_BIND_SHARED) {
      tex->key.cachable = 0;
   }

-   if (template->bind & (PIPE_BIND_SCANOUT |
-                         PIPE_BIND_CURSOR)) {
-      tex->key.flags |= SVGA3D_SURFACE_HINT_SCANOUT;
+   if (bindings & (PIPE_BIND_SCANOUT | PIPE_BIND_CURSOR)) {
+      tex->key.scanout = 1;
      tex->key.cachable = 0;
   }

-   /* 
+   /*
    * Note: Previously we never passed the
    * SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot
    * know beforehand whether a texture will be used as a rendertarget or not
@ -712,23 +833,55 @@ svga_texture_create(struct pipe_screen *screen,
    * (XA for example) uses it accurately and certain device versions
    * relies on it in certain situations to render correctly.
    */
-   if((template->bind & PIPE_BIND_RENDER_TARGET) &&
-      !util_format_is_s3tc(template->format))
+   if ((bindings & PIPE_BIND_RENDER_TARGET) &&
+       !util_format_is_s3tc(template->format)) {
      tex->key.flags |= SVGA3D_SURFACE_HINT_RENDERTARGET;
-   
-   if(template->bind & PIPE_BIND_DEPTH_STENCIL)
+      tex->key.flags |= SVGA3D_SURFACE_BIND_RENDER_TARGET;
+   }
+
+   if (bindings & PIPE_BIND_DEPTH_STENCIL) {
      tex->key.flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
-   
+      tex->key.flags |= SVGA3D_SURFACE_BIND_DEPTH_STENCIL;
+   }
+
   tex->key.numMipLevels = template->last_level + 1;
-   
-   tex->key.format = svga_translate_format(svgascreen, template->format, template->bind);
-   if(tex->key.format == SVGA3D_FORMAT_INVALID)
-      goto error2;
+
+   tex->key.format = svga_translate_format(svgascreen, template->format,
+                                           bindings);
+   if (tex->key.format == SVGA3D_FORMAT_INVALID) {
+      FREE(tex->defined);
+      FREE(tex->rendered_to);
+      FREE(tex);
+      return NULL;
+   }
+
+   /* Use typeless formats for sRGB and depth resources.  Typeless
+    * formats can be reinterpreted as other formats.  For example,
+    * SVGA3D_R8G8B8A8_UNORM_TYPELESS can be interpreted as
+    * SVGA3D_R8G8B8A8_UNORM_SRGB or SVGA3D_R8G8B8A8_UNORM.
+    */
+   if (svgascreen->sws->have_vgpu10 &&
+       (util_format_is_srgb(template->format) ||
+        format_has_depth(template->format))) {
+      SVGA3dSurfaceFormat typeless = svga_typeless_format(tex->key.format);
+      if (0) {
+         debug_printf("Convert resource type %s -> %s (bind 0x%x)\n",
+                      svga_format_name(tex->key.format),
+                      svga_format_name(typeless),
+                      bindings);
+      }
+      tex->key.format = typeless;
+   }

   SVGA_DBG(DEBUG_DMA, "surface_create for texture\n", tex->handle);
-   tex->handle = svga_screen_surface_create(svgascreen, &tex->key);
-   if (!tex->handle)
-       goto error2;
+   tex->handle = svga_screen_surface_create(svgascreen, bindings,
+                                            tex->b.b.usage, &tex->key);
+   if (!tex->handle) {
+      FREE(tex->defined);
+      FREE(tex->rendered_to);
+      FREE(tex);
+      return NULL;
+   }

   SVGA_DBG(DEBUG_DMA, "  --> got sid %p (texture)\n", tex->handle);

@ -738,18 +891,7 @@ svga_texture_create(struct pipe_screen *screen,
   tex->size = util_resource_size(template);
   svgascreen->total_resource_bytes += tex->size;

-   tex->rendered_to = CALLOC(template->depth0 * template->array_size,
-                             sizeof(tex->rendered_to[0]));
-   if (!tex->rendered_to)
-      goto error2;
-
   return &tex->b.b;
-
-error2:
-   FREE(tex->rendered_to);
-   FREE(tex);
-error1:
-   return NULL;
 }


@ -777,16 +919,28 @@ svga_texture_from_handle(struct pipe_screen *screen,
   if (!srf)
      return NULL;

-   if (svga_translate_format(svga_screen(screen), template->format, template->bind) != format) {
-      unsigned f1 = svga_translate_format(svga_screen(screen), template->format, template->bind);
+   if (svga_translate_format(svga_screen(screen), template->format,
+                             template->bind) != format) {
+      unsigned f1 = svga_translate_format(svga_screen(screen),
+                                          template->format, template->bind);
      unsigned f2 = format;

-      /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up */
-      if ( !( (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) ||
+      /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up.
+       */
+      if (f1 == SVGA3D_B8G8R8A8_UNORM)
+         f1 = SVGA3D_A8R8G8B8;
+      if (f1 == SVGA3D_B8G8R8X8_UNORM)
+         f1 = SVGA3D_X8R8G8B8;
+
+      if ( !( (f1 == f2) ||
+              (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) ||
+              (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_B8G8R8X8_UNORM) ||
              (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_X8R8G8B8) ||
+              (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_B8G8R8A8_UNORM) ||
              (f1 == SVGA3D_Z_D24X8 && f2 == SVGA3D_Z_D24S8) ||
              (f1 == SVGA3D_Z_DF24 && f2 == SVGA3D_Z_D24S8_INT) ) ) {
-         debug_printf("%s wrong format %u != %u\n", __FUNCTION__, f1, f2);
+         debug_printf("%s wrong format %s != %s\n", __FUNCTION__,
+                      svga_format_name(f1), svga_format_name(f2));
         return NULL;
      }
   }
@ -795,6 +949,13 @@ svga_texture_from_handle(struct pipe_screen *screen,
   if (!tex)
      return NULL;

+   tex->defined = CALLOC(template->depth0 * template->array_size,
+                         sizeof(tex->defined[0]));
+   if (!tex->defined) {
+      FREE(tex);
+      return NULL;
+   }
+
   tex->b.b = *template;
   tex->b.vtbl = &svga_texture_vtbl;
   pipe_reference_init(&tex->b.b.reference, 1);
@ -803,9 +964,11 @@ svga_texture_from_handle(struct pipe_screen *screen,
   SVGA_DBG(DEBUG_DMA, "wrap surface sid %p\n", srf);

   tex->key.cachable = 0;
+   tex->key.format = format;
   tex->handle = srf;

   tex->rendered_to = CALLOC(1, sizeof(tex->rendered_to[0]));
+   tex->imported = TRUE;

   return &tex->b.b;
 }
--- a/src/gallium/drivers/svga/svga_resource_texture.h
+++ b/src/gallium/drivers/svga/svga_resource_texture.h
@ -51,7 +51,7 @@ struct svga_texture
 {
   struct u_resource b;

-   boolean defined[6][SVGA_MAX_TEXTURE_LEVELS];
+   ushort *defined;
   
   struct svga_sampler_view *cached_view;

@ -77,6 +77,12 @@ struct svga_texture
    */
   struct svga_winsys_surface *handle;

+   /**
+    * Whether the host side surface is imported and not created by this
+    * driver.
+    */
+   boolean imported;
+
   unsigned size;  /**< Approximate size in bytes */

   /** array indexed by cube face or 3D/array slice, one bit per mipmap level */
@ -91,7 +97,7 @@ struct svga_transfer
 {
   struct pipe_transfer base;

-   unsigned face;
+   unsigned slice;  /**< array slice or cube face */

   struct svga_winsys_buffer *hwbuf;

@ -135,29 +141,6 @@ svga_age_texture_view(struct svga_texture *tex, unsigned level)
 }


-/**
- * Mark the given texture face/level as being defined.
- */
-static inline void
-svga_define_texture_level(struct svga_texture *tex,
-                          unsigned face,unsigned level)
-{
-   assert(face < Elements(tex->defined));
-   assert(level < Elements(tex->defined[0]));
-   tex->defined[face][level] = TRUE;
-}
-
-
-static inline bool
-svga_is_texture_level_defined(const struct svga_texture *tex,
-                              unsigned face, unsigned level)
-{
-   assert(face < Elements(tex->defined));
-   assert(level < Elements(tex->defined[0]));
-   return tex->defined[face][level];
-}
-
-
 /** For debugging, check that face and level are legal */
 static inline void
 check_face_level(const struct svga_texture *tex,
@ -177,6 +160,27 @@ check_face_level(const struct svga_texture *tex,
 }


+/**
+ * Mark the given texture face/level as being defined.
+ */
+static inline void
+svga_define_texture_level(struct svga_texture *tex,
+                          unsigned face,unsigned level)
+{
+   check_face_level(tex, face, level);
+   tex->defined[face] |= 1 << level;
+}
+
+
+static inline bool
+svga_is_texture_level_defined(const struct svga_texture *tex,
+                              unsigned face, unsigned level)
+{
+   check_face_level(tex, face, level);
+   return (tex->defined[face] & (1 << level)) != 0;
+}
+
+
 static inline void
 svga_set_texture_rendered_to(struct svga_texture *tex,
                             unsigned face, unsigned level)
--- a/src/gallium/drivers/svga/svga_sampler_view.c
+++ b/src/gallium/drivers/svga/svga_sampler_view.c
@ -67,7 +67,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
   assert(pt);
   assert(min_lod <= max_lod);
   assert(max_lod <= pt->last_level);
-
+   assert(!svga_have_vgpu10(svga));

   /* Is a view needed */
   {
@ -143,10 +143,12 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
            pt->last_level);

   sv->age = tex->age;
-   sv->handle = svga_texture_view_surface(svga, tex, flags, format,
+   sv->handle = svga_texture_view_surface(svga, tex,
+                                          PIPE_BIND_SAMPLER_VIEW,
+                                          flags, format,
                                          min_lod,
                                          max_lod - min_lod + 1,
-                                          -1, -1,
+                                          -1, 1, -1,
                                          &sv->key);

   if (!sv->handle) {
@ -177,6 +179,7 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *
   unsigned k;

   assert(svga);
+   assert(!svga_have_vgpu10(svga));

   if (v->handle == tex->handle)
      return;
--- a/src/gallium/drivers/svga/svga_sampler_view.h
+++ b/src/gallium/drivers/svga/svga_sampler_view.h
@ -36,6 +36,7 @@ struct pipe_context;
 struct pipe_screen;
 struct svga_context;
 struct svga_winsys_surface;
+struct svga_surface;
 enum SVGA3dSurfaceFormat;


@ -97,5 +98,8 @@ svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_
   *ptr = v;
 }

+boolean
+svga_check_sampler_view_resource_collision(struct svga_context *svga,
+                                           struct svga_winsys_surface *res);

 #endif
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@ -34,31 +34,37 @@
 #include "svga_context.h"
 #include "svga_format.h"
 #include "svga_screen.h"
+#include "svga_tgsi.h"
 #include "svga_resource_texture.h"
 #include "svga_resource.h"
 #include "svga_debug.h"

 #include "svga3d_shaderdefs.h"
+#include "VGPU10ShaderTokens.h"

+/* NOTE: this constant may get moved into a svga3d*.h header file */
+#define SVGA3D_DX_MAX_RESOURCE_SIZE (128 * 1024 * 1024)

 #ifdef DEBUG
 int SVGA_DEBUG = 0;

 static const struct debug_named_value svga_debug_flags[] = {
-   { "dma",      DEBUG_DMA, NULL },
-   { "tgsi",     DEBUG_TGSI, NULL },
-   { "pipe",     DEBUG_PIPE, NULL },
-   { "state",    DEBUG_STATE, NULL },
-   { "screen",   DEBUG_SCREEN, NULL },
-   { "tex",      DEBUG_TEX, NULL },
-   { "swtnl",    DEBUG_SWTNL, NULL },
-   { "const",    DEBUG_CONSTS, NULL },
-   { "viewport", DEBUG_VIEWPORT, NULL },
-   { "views",    DEBUG_VIEWS, NULL },
-   { "perf",     DEBUG_PERF, NULL },
-   { "flush",    DEBUG_FLUSH, NULL },
-   { "sync",     DEBUG_SYNC, NULL },
-   { "cache",    DEBUG_CACHE, NULL },
+   { "dma",         DEBUG_DMA, NULL },
+   { "tgsi",        DEBUG_TGSI, NULL },
+   { "pipe",        DEBUG_PIPE, NULL },
+   { "state",       DEBUG_STATE, NULL },
+   { "screen",      DEBUG_SCREEN, NULL },
+   { "tex",         DEBUG_TEX, NULL },
+   { "swtnl",       DEBUG_SWTNL, NULL },
+   { "const",       DEBUG_CONSTS, NULL },
+   { "viewport",    DEBUG_VIEWPORT, NULL },
+   { "views",       DEBUG_VIEWS, NULL },
+   { "perf",        DEBUG_PERF, NULL },
+   { "flush",       DEBUG_FLUSH, NULL },
+   { "sync",        DEBUG_SYNC, NULL },
+   { "cache",       DEBUG_CACHE, NULL },
+   { "streamout",   DEBUG_STREAMOUT, NULL },
+   { "query",       DEBUG_QUERY, NULL },
   DEBUG_NAMED_VALUE_END
 };
 #endif
@ -80,18 +86,52 @@ svga_get_name( struct pipe_screen *pscreen )
    */
   build = "build: DEBUG;";
   mutex = "mutex: " PIPE_ATOMIC ";";
-#ifdef HAVE_LLVM
-   llvm = "LLVM;";
-#endif
 #else
   build = "build: RELEASE;";
 #endif
+#ifdef HAVE_LLVM
+   llvm = "LLVM;";
+#endif

   util_snprintf(name, sizeof(name), "SVGA3D; %s %s %s", build, mutex, llvm);
   return name;
 }


+/** Helper for querying float-valued device cap */
+static float
+get_float_cap(struct svga_winsys_screen *sws, unsigned cap, float defaultVal)
+{
+   SVGA3dDevCapResult result;
+   if (sws->get_cap(sws, cap, &result))
+      return result.f;
+   else
+      return defaultVal;
+}
+
+
+/** Helper for querying uint-valued device cap */
+static unsigned
+get_uint_cap(struct svga_winsys_screen *sws, unsigned cap, unsigned defaultVal)
+{
+   SVGA3dDevCapResult result;
+   if (sws->get_cap(sws, cap, &result))
+      return result.u;
+   else
+      return defaultVal;
+}
+
+
+/** Helper for querying boolean-valued device cap */
+static boolean
+get_bool_cap(struct svga_winsys_screen *sws, unsigned cap, boolean defaultVal)
+{
+   SVGA3dDevCapResult result;
+   if (sws->get_cap(sws, cap, &result))
+      return result.b;
+   else
+      return defaultVal;
+}


 static float
@ -99,7 +139,6 @@ svga_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
 {
   struct svga_screen *svgascreen = svga_screen(screen);
   struct svga_winsys_screen *sws = svgascreen->sws;
-   SVGA3dDevCapResult result;

   switch (param) {
   case PIPE_CAPF_MAX_LINE_WIDTH:
@ -113,12 +152,11 @@ svga_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
      return svgascreen->maxPointSize;

   case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
-      if(!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY, &result))
-         return 4.0f;
-      return (float) result.u;
+      return (float) get_uint_cap(sws, SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY, 4);

   case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
      return 15.0;
+
   case PIPE_CAPF_GUARD_BAND_LEFT:
   case PIPE_CAPF_GUARD_BAND_TOP:
   case PIPE_CAPF_GUARD_BAND_RIGHT:
@ -145,7 +183,12 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
   case PIPE_CAP_TWO_SIDED_STENCIL:
      return 1;
   case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
-      return 0;
+      /*
+       * "In virtually every OpenGL implementation and hardware,
+       * GL_MAX_DUAL_SOURCE_DRAW_BUFFERS is 1"
+       * http://www.opengl.org/wiki/Blending
+       */
+      return sws->have_vgpu10 ? 1 : 0;
   case PIPE_CAP_ANISOTROPIC_FILTER:
      return 1;
   case PIPE_CAP_POINT_SPRITE:
@ -158,6 +201,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
      return 1;
   case PIPE_CAP_QUERY_TIME_ELAPSED:
      return 0;
+   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+      return sws->have_vgpu10;
   case PIPE_CAP_TEXTURE_SHADOW_MAP:
      return 1;
   case PIPE_CAP_TEXTURE_SWIZZLE:
@ -170,7 +215,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
   case PIPE_CAP_USER_CONSTANT_BUFFERS:
      return 1;
   case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
-      return 16;
+      return 256;

   case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
      {
@ -199,17 +244,20 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
      return MIN2(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS),
                  12 /* 2048x2048 */);

+   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+      return sws->have_vgpu10 ? SVGA3D_MAX_SURFACE_ARRAYSIZE : 0;
+
   case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */
      return 1;

   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
      return 1;
   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
-      return 0;
+      return sws->have_vgpu10;
   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
      return 0;
   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
-      return 1;
+      return !sws->have_vgpu10;

   case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
      return 1; /* The color outputs of vertex shaders are not clamped */
@ -222,7 +270,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
      return 1; /* expected for GL_ARB_framebuffer_object */

   case PIPE_CAP_GLSL_FEATURE_LEVEL:
-      return 120;
+      return sws->have_vgpu10 ? 330 : 120;

   case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
      return 0;
@ -230,49 +278,65 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
   case PIPE_CAP_SM3:
      return 1;

+   case PIPE_CAP_DEPTH_CLIP_DISABLE:
+   case PIPE_CAP_INDEP_BLEND_ENABLE:
+   case PIPE_CAP_CONDITIONAL_RENDER:
+   case PIPE_CAP_QUERY_TIMESTAMP:
+   case PIPE_CAP_TGSI_INSTANCEID:
+   case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+   case PIPE_CAP_SEAMLESS_CUBE_MAP:
+   case PIPE_CAP_FAKE_SW_MSAA:
+      return sws->have_vgpu10;
+
+   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+      return sws->have_vgpu10 ? SVGA3D_DX_MAX_SOTARGETS : 0;
+   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+      return sws->have_vgpu10 ? 4 : 0;
+   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+      return sws->have_vgpu10 ? SVGA3D_MAX_STREAMOUT_DECLS : 0;
+   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+      return 0;
+   case PIPE_CAP_TEXTURE_MULTISAMPLE:
+      return svgascreen->ms_samples ? 1 : 0;
+
+   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+      return SVGA3D_DX_MAX_RESOURCE_SIZE;
+
+   case PIPE_CAP_MIN_TEXEL_OFFSET:
+      return sws->have_vgpu10 ? VGPU10_MIN_TEXEL_FETCH_OFFSET : 0;
+   case PIPE_CAP_MAX_TEXEL_OFFSET:
+      return sws->have_vgpu10 ? VGPU10_MAX_TEXEL_FETCH_OFFSET : 0;
+
+   case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+   case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
+      return 0;
+
+   case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+      return sws->have_vgpu10 ? 256 : 0;
+   case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+      return sws->have_vgpu10 ? 1024 : 0;
+
+   case PIPE_CAP_PRIMITIVE_RESTART:
+      return 1; /* may be a sw fallback, depending on restart index */
+
   /* Unsupported features */
   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
   case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
   case PIPE_CAP_SHADER_STENCIL_EXPORT:
-   case PIPE_CAP_DEPTH_CLIP_DISABLE:
-   case PIPE_CAP_SEAMLESS_CUBE_MAP:
   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
-   case PIPE_CAP_INDEP_BLEND_ENABLE:
   case PIPE_CAP_INDEP_BLEND_FUNC:
-   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
-   case PIPE_CAP_PRIMITIVE_RESTART:
-   case PIPE_CAP_TGSI_INSTANCEID:
-   case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
-   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
-   case PIPE_CAP_MIN_TEXEL_OFFSET:
-   case PIPE_CAP_MAX_TEXEL_OFFSET:
-   case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
-   case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
-   case PIPE_CAP_CONDITIONAL_RENDER:
   case PIPE_CAP_TEXTURE_BARRIER:
-   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
-   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
-   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
-   case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
-   case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
   case PIPE_CAP_MAX_VERTEX_STREAMS:
   case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
-   case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
-   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
   case PIPE_CAP_COMPUTE:
   case PIPE_CAP_START_INSTANCE:
-   case PIPE_CAP_QUERY_TIMESTAMP:
-   case PIPE_CAP_TEXTURE_MULTISAMPLE:
   case PIPE_CAP_CUBE_MAP_ARRAY:
-   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
   case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
   case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
-   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
   case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
   case PIPE_CAP_TEXTURE_GATHER_SM5:
   case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
-   case PIPE_CAP_FAKE_SW_MSAA:
   case PIPE_CAP_TEXTURE_QUERY_LOD:
   case PIPE_CAP_SAMPLE_SHADING:
   case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
@ -288,8 +352,10 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
      return 0;
   case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
      return 64;
+   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+   case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
   case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
-      return 1;
+      return 1;  /* need 4-byte alignment for all offsets and strides */
   case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
      return 2048;
   case PIPE_CAP_MAX_VIEWPORTS:
@ -320,11 +386,16 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
   return 0;
 }

-static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param)
+
+static int
+vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
+                       enum pipe_shader_cap param)
 {
   struct svga_screen *svgascreen = svga_screen(screen);
   struct svga_winsys_screen *sws = svgascreen->sws;
-   SVGA3dDevCapResult result;
+   unsigned val;
+
+   assert(!sws->have_vgpu10);

   switch (shader)
   {
@ -347,9 +418,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
      case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
         return 1;
      case PIPE_SHADER_CAP_MAX_TEMPS:
-         if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS, &result))
-            return 32;
-         return MIN2(result.u, SVGA3D_TEMPREG_MAX);
+         val = get_uint_cap(sws, SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS, 32);
+         return MIN2(val, SVGA3D_TEMPREG_MAX);
      case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
 	 /* 
 	  * Although PS 3.0 has some addressing abilities it can only represent
@ -392,9 +462,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
      {
      case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
      case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
-         if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS, &result))
-            return 512;
-         return result.u;
+         return get_uint_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS,
+                             512);
      case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
      case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
         /* XXX: until we have vertex texture support */
@ -410,9 +479,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
      case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
         return 1;
      case PIPE_SHADER_CAP_MAX_TEMPS:
-         if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS, &result))
-            return 32;
-         return MIN2(result.u, SVGA3D_TEMPREG_MAX);
+         val = get_uint_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS, 32);
+         return MIN2(val, SVGA3D_TEMPREG_MAX);
      case PIPE_SHADER_CAP_MAX_PREDS:
         return 1;
      case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
@ -459,8 +527,102 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
 }


+static int
+vgpu10_get_shader_param(struct pipe_screen *screen, unsigned shader,
+                        enum pipe_shader_cap param)
+{
+   struct svga_screen *svgascreen = svga_screen(screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+
+   assert(sws->have_vgpu10);
+   (void) sws;  /* silence unused var warnings in non-debug builds */
+
+   /* Only VS, GS, FS supported */
+   if (shader != PIPE_SHADER_VERTEX &&
+       shader != PIPE_SHADER_GEOMETRY &&
+       shader != PIPE_SHADER_FRAGMENT) {
+      return 0;
+   }
+
+   /* NOTE: we do not query the device for any caps/limits at this time */
+
+   /* Generally the same limits for vertex, geometry and fragment shaders */
+   switch (param) {
+   case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+      return 64 * 1024;
+   case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+      return 64;
+   case PIPE_SHADER_CAP_MAX_INPUTS:
+      if (shader == PIPE_SHADER_FRAGMENT)
+         return VGPU10_MAX_FS_INPUTS;
+      else if (shader == PIPE_SHADER_GEOMETRY)
+         return VGPU10_MAX_GS_INPUTS;
+      else
+         return VGPU10_MAX_VS_INPUTS;
+   case PIPE_SHADER_CAP_MAX_OUTPUTS:
+      if (shader == PIPE_SHADER_FRAGMENT)
+         return VGPU10_MAX_FS_OUTPUTS;
+      else if (shader == PIPE_SHADER_GEOMETRY)
+         return VGPU10_MAX_GS_OUTPUTS;
+      else
+         return VGPU10_MAX_VS_OUTPUTS;
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+      return VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT * sizeof(float[4]);
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+      return svgascreen->max_const_buffers;
+   case PIPE_SHADER_CAP_MAX_TEMPS:
+      return VGPU10_MAX_TEMPS;
+   case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+      return TRUE; /* XXX verify */
+   case PIPE_SHADER_CAP_MAX_PREDS:
+      return 0;
+   case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+   case PIPE_SHADER_CAP_SUBROUTINES:
+   case PIPE_SHADER_CAP_INTEGERS:
+      return TRUE;
+   case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+   case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+      return SVGA3D_DX_MAX_SAMPLERS;
+   case PIPE_SHADER_CAP_PREFERRED_IR:
+      return PIPE_SHADER_IR_TGSI;
+   case PIPE_SHADER_CAP_DOUBLES:
+   case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+      return 0;
+   default:
+      debug_printf("Unexpected vgpu10 shader query %u\n", param);
+      return 0;
+   }
+   return 0;
+}
+
+
+static int
+svga_get_shader_param(struct pipe_screen *screen, unsigned shader,
+                      enum pipe_shader_cap param)
+{
+   struct svga_screen *svgascreen = svga_screen(screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   if (sws->have_vgpu10) {
+      return vgpu10_get_shader_param(screen, shader, param);
+   }
+   else {
+      return vgpu9_get_shader_param(screen, shader, param);
+   }
+}
+
+
 /**
- * Implemnt pipe_screen::is_format_supported().
+ * Implement pipe_screen::is_format_supported().
 * \param bindings  bitmask of PIPE_BIND_x flags
 */
 static boolean
@ -478,7 +640,12 @@ svga_is_format_supported( struct pipe_screen *screen,
   assert(bindings);

   if (sample_count > 1) {
-      return FALSE;
+      /* In ms_samples, if bit N is set it means that we support
+       * multisample with N+1 samples per pixel.
+       */
+      if ((ss->ms_samples & (1 << (sample_count - 1))) == 0) {
+         return FALSE;
+      }
   }

   svga_format = svga_translate_format(ss, format, bindings);
@ -486,6 +653,22 @@ svga_is_format_supported( struct pipe_screen *screen,
      return FALSE;
   }

+   /* we don't support sRGB rendering into display targets */
+   if (util_format_is_srgb(format) && (bindings & PIPE_BIND_DISPLAY_TARGET)) {
+      return FALSE;
+   }
+
+   /*
+    * For VGPU10 vertex formats, skip querying host capabilities
+    */
+
+   if (ss->sws->have_vgpu10 && (bindings & PIPE_BIND_VERTEX_BUFFER)) {
+      SVGA3dSurfaceFormat svga_format;
+      unsigned flags;
+      svga_translate_vertex_format_vgpu10(format, &svga_format, &flags);
+      return svga_format != SVGA3D_FORMAT_INVALID;
+   }
+
   /*
    * Override host capabilities, so that we end up with the same
    * visuals for all virtual hardware implementations.
@ -498,6 +681,12 @@ svga_is_format_supported( struct pipe_screen *screen,
      case SVGA3D_R5G6B5:
         break;

+      /* VGPU10 formats */
+      case SVGA3D_B8G8R8A8_UNORM:
+      case SVGA3D_B8G8R8X8_UNORM:
+      case SVGA3D_B5G6R5_UNORM:
+         break;
+
      /* Often unsupported/problematic. This means we end up with the same
       * visuals for all virtual hardware implementations.
       */
@ -516,22 +705,32 @@ svga_is_format_supported( struct pipe_screen *screen,

   svga_get_format_cap(ss, svga_format, &caps);

+   if (bindings & PIPE_BIND_RENDER_TARGET) {
+      /* Check that the color surface is blendable, unless it's an
+       * integer format.
+       */
+      if (!svga_format_is_integer(svga_format) &&
+          (caps.value & SVGA3DFORMAT_OP_NOALPHABLEND)) {
+         return FALSE;
+      }
+   }
+
   mask.value = 0;
   if (bindings & PIPE_BIND_RENDER_TARGET) {
-      mask.offscreenRenderTarget = 1;
+      mask.value |= SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET;
   }
   if (bindings & PIPE_BIND_DEPTH_STENCIL) {
-      mask.zStencil = 1;
+      mask.value |= SVGA3DFORMAT_OP_ZSTENCIL;
   }
   if (bindings & PIPE_BIND_SAMPLER_VIEW) {
-      mask.texture = 1;
+      mask.value |= SVGA3DFORMAT_OP_TEXTURE;
   }

   if (target == PIPE_TEXTURE_CUBE) {
-      mask.cubeTexture = 1;
+      mask.value |= SVGA3DFORMAT_OP_CUBETEXTURE;
   }
-   if (target == PIPE_TEXTURE_3D) {
-      mask.volumeTexture = 1;
+   else if (target == PIPE_TEXTURE_3D) {
+      mask.value |= SVGA3DFORMAT_OP_VOLUMETEXTURE;
   }

   return (caps.value & mask.value) == mask.value;
@ -611,8 +810,6 @@ svga_screen_create(struct svga_winsys_screen *sws)
 {
   struct svga_screen *svgascreen;
   struct pipe_screen *screen;
-   SVGA3dDevCapResult result;
-   boolean use_vs30, use_ps30;

 #ifdef DEBUG
   SVGA_DEBUG = debug_get_flags_option("SVGA_DEBUG", svga_debug_flags, 0 );
@ -642,6 +839,7 @@ svga_screen_create(struct svga_winsys_screen *sws)
   screen->get_param = svga_get_param;
   screen->get_shader_param = svga_get_shader_param;
   screen->get_paramf = svga_get_paramf;
+   screen->get_timestamp = NULL;
   screen->is_format_supported = svga_is_format_supported;
   screen->context_create = svga_context_create;
   screen->fence_reference = svga_fence_reference;
@ -657,18 +855,6 @@ svga_screen_create(struct svga_winsys_screen *sws)
      svgascreen->hw_version = SVGA3D_HWVERSION_WS65_B1;
   }

-   use_ps30 =
-      sws->get_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, &result) &&
-      result.u >= SVGA3DPSVERSION_30 ? TRUE : FALSE;
-
-   use_vs30 =
-      sws->get_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION, &result) &&
-      result.u >= SVGA3DVSVERSION_30 ? TRUE : FALSE;
-
-   /* we require Shader model 3.0 or later */
-   if (!use_ps30 || !use_vs30)
-      goto error2;
-
   /*
    * The D16, D24X8, and D24S8 formats always do an implicit shadow compare
    * when sampled from, where as the DF16, DF24, and D24S8_INT do not.  So
@ -716,46 +902,77 @@ svga_screen_create(struct svga_winsys_screen *sws)

   /* Query device caps
    */
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_LINE_STIPPLE, &result))
-      svgascreen->haveLineStipple = FALSE;
-   else
-      svgascreen->haveLineStipple = result.u;
+   if (sws->have_vgpu10) {
+      svgascreen->haveProvokingVertex
+         = get_bool_cap(sws, SVGA3D_DEVCAP_DX_PROVOKING_VERTEX, FALSE);
+      svgascreen->haveLineSmooth = TRUE;
+      svgascreen->maxPointSize = 80.0F;
+      svgascreen->max_color_buffers = SVGA3D_DX_MAX_RENDER_TARGETS;

-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_LINE_AA, &result))
-      svgascreen->haveLineSmooth = FALSE;
-   else
-      svgascreen->haveLineSmooth = result.u;
+      /* Multisample samples per pixel */
+      svgascreen->ms_samples =
+         get_uint_cap(sws, SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES, 0);

-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_LINE_WIDTH, &result))
-      svgascreen->maxLineWidth = 1.0F;
-   else
-      svgascreen->maxLineWidth = result.f;
+      /* Maximum number of constant buffers */
+      svgascreen->max_const_buffers =
+         get_uint_cap(sws, SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS, 1);
+      assert(svgascreen->max_const_buffers <= SVGA_MAX_CONST_BUFS);
+   }
+   else {
+      /* VGPU9 */
+      unsigned vs_ver = get_uint_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION,
+                                     SVGA3DVSVERSION_NONE);
+      unsigned fs_ver = get_uint_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION,
+                                     SVGA3DPSVERSION_NONE);

-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH, &result))
-      svgascreen->maxLineWidthAA = 1.0F;
-   else
-      svgascreen->maxLineWidthAA = result.f;
+      /* we require Shader model 3.0 or later */
+      if (fs_ver < SVGA3DPSVERSION_30 || vs_ver < SVGA3DVSVERSION_30) {
+         goto error2;
+      }

-   if (0)
+      svgascreen->haveProvokingVertex = FALSE;
+
+      svgascreen->haveLineSmooth =
+         get_bool_cap(sws, SVGA3D_DEVCAP_LINE_AA, FALSE);
+
+      svgascreen->maxPointSize =
+         get_float_cap(sws, SVGA3D_DEVCAP_MAX_POINT_SIZE, 1.0f);
+      /* Keep this to a reasonable size to avoid failures in conform/pntaa.c */
+      svgascreen->maxPointSize = MIN2(svgascreen->maxPointSize, 80.0f);
+
+      /* The SVGA3D device always supports 4 targets at this time, regardless
+       * of what querying SVGA3D_DEVCAP_MAX_RENDER_TARGETS might return.
+       */
+      svgascreen->max_color_buffers = 4;
+
+      /* Only support one constant buffer
+       */
+      svgascreen->max_const_buffers = 1;
+
+      /* No multisampling */
+      svgascreen->ms_samples = 0;
+   }
+
+   /* common VGPU9 / VGPU10 caps */
+   svgascreen->haveLineStipple =
+      get_bool_cap(sws, SVGA3D_DEVCAP_LINE_STIPPLE, FALSE);
+
+   svgascreen->maxLineWidth =
+      get_float_cap(sws, SVGA3D_DEVCAP_MAX_LINE_WIDTH, 1.0f);
+
+   svgascreen->maxLineWidthAA =
+      get_float_cap(sws, SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH, 1.0f);
+
+   if (0) {
+      debug_printf("svga: haveProvokingVertex %u\n",
+                   svgascreen->haveProvokingVertex);
      debug_printf("svga: haveLineStip %u  "
                   "haveLineSmooth %u  maxLineWidth %f\n",
                   svgascreen->haveLineStipple, svgascreen->haveLineSmooth,
                   svgascreen->maxLineWidth);
-
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_POINT_SIZE, &result)) {
-      svgascreen->maxPointSize = 1.0F;
-   } else {
-      /* Keep this to a reasonable size to avoid failures in
-       * conform/pntaa.c:
-       */
-      svgascreen->maxPointSize = MIN2(result.f, 80.0f);
+      debug_printf("svga: maxPointSize %g\n", svgascreen->maxPointSize);
   }

-   /* The SVGA3D device always supports 4 targets at this time, regardless
-    * of what querying SVGA3D_DEVCAP_MAX_RENDER_TARGETS might return.
-    */
-   svgascreen->max_color_buffers = 4;
-
   pipe_mutex_init(svgascreen->tex_mutex);
   pipe_mutex_init(svgascreen->swc_mutex);

--- a/src/gallium/drivers/svga/svga_screen.h
+++ b/src/gallium/drivers/svga/svga_screen.h
@ -1,4 +1,4 @@
-/**********************************************************
+ /**********************************************************
 * Copyright 2008-2009 VMware, Inc.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person
@ -48,10 +48,13 @@ struct svga_screen
   SVGA3dHardwareVersion hw_version;

   /** Device caps */
+   boolean haveProvokingVertex;
   boolean haveLineStipple, haveLineSmooth;
   float maxLineWidth, maxLineWidthAA;
   float maxPointSize;
   unsigned max_color_buffers;
+   unsigned max_const_buffers;
+   unsigned ms_samples;

   struct {
      boolean force_level_surface_view;
@ -69,6 +72,7 @@ struct svga_screen
   /* which formats to translate depth formats into */
   struct {
     enum SVGA3dSurfaceFormat z16;
+
     /* note gallium order */
     enum SVGA3dSurfaceFormat x8z24;
     enum SVGA3dSurfaceFormat s8z24;
--- a/src/gallium/drivers/svga/svga_screen_cache.c
+++ b/src/gallium/drivers/svga/svga_screen_cache.c
@ -115,8 +115,14 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen,

      assert(entry->handle);

+      /* If the key matches and the fence is signalled (the surface is no
+       * longer needed) the lookup was successful.  We found a surface that
+       * can be reused.
+       * We unlink the surface from the cache entry and we add the entry to
+       * the 'empty' list.
+       */
      if (memcmp(&entry->key, key, sizeof *key) == 0 &&
-         sws->fence_signalled(sws, entry->fence, 0) == 0) {
+          sws->fence_signalled(sws, entry->fence, 0) == 0) {
         unsigned surf_size;

         assert(sws->surface_is_flushed(sws, entry->handle));
@ -124,10 +130,13 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen,
         handle = entry->handle; /* Reference is transfered here. */
         entry->handle = NULL;

+         /* Remove from hash table */
         LIST_DEL(&entry->bucket_head);

+         /* remove from LRU list */
         LIST_DEL(&entry->head);

+         /* Add the cache entry (but not the surface!) to the empty list */
         LIST_ADD(&entry->head, &cache->empty);

         /* update the cache size */
@ -195,7 +204,8 @@ svga_screen_cache_shrink(struct svga_screen *svgascreen,


 /**
- * Transfers a handle reference.
+ * Add a surface to the cache.  This is done when the driver deletes
+ * the surface.  Note: transfers a handle reference.
 */
 static void
 svga_screen_cache_add(struct svga_screen *svgascreen,
@ -207,17 +217,17 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
   struct svga_host_surface_cache_entry *entry = NULL;
   struct svga_winsys_surface *handle = *p_handle;
   unsigned surf_size;
-   
+
   assert(key->cachable);

   if (!handle)
      return;
-   
+
   surf_size = surface_size(key);

   *p_handle = NULL;
   pipe_mutex_lock(cache->mutex);
-   
+
   if (surf_size >= SVGA_HOST_SURFACE_CACHE_BYTES) {
      /* this surface is too large to cache, just free it */
      sws->surface_reference(sws, &handle, NULL);
@ -245,10 +255,13 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
   }

   if (!LIST_IS_EMPTY(&cache->empty)) {
-      /* use the first empty entry */
+      /* An empty entry has no surface associated with it.
+       * Use the first empty entry.
+       */
      entry = LIST_ENTRY(struct svga_host_surface_cache_entry,
                         cache->empty.next, head);

+      /* Remove from LRU list */
      LIST_DEL(&entry->head);
   }
   else if (!LIST_IS_EMPTY(&cache->unused)) {
@ -262,12 +275,15 @@ svga_screen_cache_add(struct svga_screen *svgascreen,

      sws->surface_reference(sws, &entry->handle, NULL);

+      /* Remove from hash table */
      LIST_DEL(&entry->bucket_head);

+      /* Remove from LRU list */
      LIST_DEL(&entry->head);
   }

   if (entry) {
+      assert(entry->handle == NULL);
      entry->handle = handle;
      memcpy(&entry->key, key, sizeof entry->key);

@ -304,6 +320,7 @@ svga_screen_cache_flush(struct svga_screen *svgascreen,

   pipe_mutex_lock(cache->mutex);

+   /* Loop over entries in the validated list */
   curr = cache->validated.next;
   next = curr->next;
   while (curr != &cache->validated) {
@ -312,12 +329,15 @@ svga_screen_cache_flush(struct svga_screen *svgascreen,
      assert(entry->handle);

      if (sws->surface_is_flushed(sws, entry->handle)) {
+         /* remove entry from LRU list */
         LIST_DEL(&entry->head);

         svgascreen->sws->fence_reference(svgascreen->sws, &entry->fence, fence);

+         /* Add entry to the unused list */
         LIST_ADD(&entry->head, &cache->unused);

+         /* Add entry to the hash table bucket */
         bucket = svga_screen_cache_bucket(&entry->key);
         LIST_ADD(&entry->bucket_head, &cache->bucket[bucket]);
      }
@ -388,9 +408,12 @@ svga_screen_cache_init(struct svga_screen *svgascreen)
 * Allocate a new host-side surface.  If the surface is marked as cachable,
 * first try re-using a surface in the cache of freed surfaces.  Otherwise,
 * allocate a new surface.
+ * \param bind_flags  bitmask of PIPE_BIND_x flags
+ * \param usage  one of PIPE_USAGE_x values
 */
 struct svga_winsys_surface *
 svga_screen_surface_create(struct svga_screen *svgascreen,
+                           unsigned bind_flags, unsigned usage,
                           struct svga_host_surface_cache_key *key)
 {
   struct svga_winsys_screen *sws = svgascreen->sws;
@ -398,17 +421,20 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
   boolean cachable = SVGA_SURFACE_CACHE_ENABLED && key->cachable;

   SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
-            "%s sz %dx%dx%d mips %d faces %d cachable %d\n",
+            "%s sz %dx%dx%d mips %d faces %d arraySize %d cachable %d\n",
            __FUNCTION__,
            key->size.width,
            key->size.height,
            key->size.depth,
            key->numMipLevels,
            key->numFaces,
+            key->arraySize,
            key->cachable);

   if (cachable) {
      if (key->format == SVGA3D_BUFFER) {
+         SVGA3dSurfaceFlags hint_flag;
+
         /* For buffers, round the buffer size up to the nearest power
          * of two to increase the probability of cache hits.  Keep
          * texture surface dimensions unchanged.
@ -417,15 +443,33 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
         while (size < key->size.width)
            size <<= 1;
         key->size.width = size;
-	 /* Since we're reusing buffers we're effectively transforming all
-	  * of them into dynamic buffers.
-	  *
-	  * It would be nice to not cache long lived static buffers. But there
-	  * is no way to detect the long lived from short lived ones yet. A
-	  * good heuristic would be buffer size.
-	  */
-	 key->flags &= ~SVGA3D_SURFACE_HINT_STATIC;
-	 key->flags |= SVGA3D_SURFACE_HINT_DYNAMIC;
+
+         /* Determine whether the buffer is static or dynamic.
+          * This is a bit of a heuristic which can be tuned as needed.
+          */
+         if (usage == PIPE_USAGE_DEFAULT ||
+             usage == PIPE_USAGE_IMMUTABLE) {
+            hint_flag = SVGA3D_SURFACE_HINT_STATIC;
+         }
+         else if (bind_flags & PIPE_BIND_INDEX_BUFFER) {
+            /* Index buffers don't change too often.  Mark them as static.
+             */
+            hint_flag = SVGA3D_SURFACE_HINT_STATIC;
+         }
+         else {
+            /* Since we're reusing buffers we're effectively transforming all
+             * of them into dynamic buffers.
+             *
+             * It would be nice to not cache long lived static buffers. But there
+             * is no way to detect the long lived from short lived ones yet. A
+             * good heuristic would be buffer size.
+             */
+            hint_flag = SVGA3D_SURFACE_HINT_DYNAMIC;
+         }
+
+         key->flags &= ~(SVGA3D_SURFACE_HINT_STATIC |
+                         SVGA3D_SURFACE_HINT_DYNAMIC);
+         key->flags |= hint_flag;
      }

      handle = svga_screen_cache_lookup(svgascreen, key);
@ -436,25 +480,32 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
                     key->size.width);
         else
            SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
-                     "reuse sid %p sz %dx%dx%d mips %d faces %d\n", handle,
+                     "reuse sid %p sz %dx%dx%d mips %d faces %d arraySize %d\n", handle,
                     key->size.width,
                     key->size.height,
                     key->size.depth,
                     key->numMipLevels,
-                     key->numFaces);
+                     key->numFaces,
+                     key->arraySize);
      }
   }

   if (!handle) {
+      unsigned usage = 0;
+
+      if (!key->cachable)
+         usage |= SVGA_SURFACE_USAGE_SHARED;
+      if (key->scanout)
+         usage |= SVGA_SURFACE_USAGE_SCANOUT;
+
      handle = sws->surface_create(sws,
                                   key->flags,
                                   key->format,
-                                   key->cachable ?
-                                   0 : SVGA_SURFACE_USAGE_SHARED,
+                                   usage,
                                   key->size,
-                                   key->numFaces,
+                                   key->numFaces * key->arraySize,
                                   key->numMipLevels,
-                                   0);
+                                   key->sampleCount);
      if (handle)
         SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
                  "  CREATE sid %p sz %dx%dx%d\n",
--- a/src/gallium/drivers/svga/svga_screen_cache.h
+++ b/src/gallium/drivers/svga/svga_screen_cache.h
@ -62,9 +62,12 @@ struct svga_host_surface_cache_key
   SVGA3dSurfaceFlags flags;
   SVGA3dSurfaceFormat format;
   SVGA3dSize size;
-   uint32_t numFaces:24;
-   uint32_t numMipLevels:7;
+   uint32_t numFaces:3;
+   uint32_t arraySize:16;
+   uint32_t numMipLevels:6;
   uint32_t cachable:1;         /* False if this is a shared surface */
+   uint32_t sampleCount:5;
+   uint32_t scanout:1;
 };


@ -137,6 +140,7 @@ svga_screen_cache_init(struct svga_screen *svgascreen);

 struct svga_winsys_surface *
 svga_screen_surface_create(struct svga_screen *svgascreen,
+                           unsigned bind_flags, unsigned usage,
                           struct svga_host_surface_cache_key *key);

 void
--- a/src/gallium/drivers/svga/svga_shader.c
+++ b/src/gallium/drivers/svga/svga_shader.c
@ -27,14 +27,318 @@
 #include "util/u_memory.h"
 #include "svga_context.h"
 #include "svga_cmd.h"
+#include "svga_format.h"
 #include "svga_shader.h"


+/**
+ * This bit isn't really used anywhere.  It only serves to help
+ * generate a unique "signature" for the vertex shader output bitmask.
+ * Shader input/output signatures are used to resolve shader linking
+ * issues.
+ */
+#define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
+
+
+/**
+ * Use the shader info to generate a bitmask indicating which generic
+ * inputs are used by the shader.  A set bit indicates that GENERIC[i]
+ * is used.
+ */
+uint64_t
+svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
+{
+   unsigned i;
+   uint64_t mask = 0x0;
+
+   for (i = 0; i < info->num_inputs; i++) {
+      if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
+         unsigned j = info->input_semantic_index[i];
+         assert(j < sizeof(mask) * 8);
+         mask |= ((uint64_t) 1) << j;
+      }
+   }
+
+   return mask;
+}
+
+
+/**
+ * Scan shader info to return a bitmask of written outputs.
+ */
+uint64_t
+svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
+{
+   unsigned i;
+   uint64_t mask = 0x0;
+
+   for (i = 0; i < info->num_outputs; i++) {
+      switch (info->output_semantic_name[i]) {
+      case TGSI_SEMANTIC_GENERIC:
+         {
+            unsigned j = info->output_semantic_index[i];
+            assert(j < sizeof(mask) * 8);
+            mask |= ((uint64_t) 1) << j;
+         }
+         break;
+      case TGSI_SEMANTIC_FOG:
+         mask |= FOG_GENERIC_BIT;
+         break;
+      }
+   }
+
+   return mask;
+}
+
+
+
+/**
+ * Given a mask of used generic variables (as returned by the above functions)
+ * fill in a table which maps those indexes to small integers.
+ * This table is used by the remap_generic_index() function in
+ * svga_tgsi_decl_sm30.c
+ * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
+ * GENERIC[3] are used.  The remap_table will contain:
+ *   table[1] = 0;
+ *   table[3] = 1;
+ * The remaining table entries will be filled in with the next unused
+ * generic index (in this example, 2).
+ */
+void
+svga_remap_generics(uint64_t generics_mask,
+                    int8_t remap_table[MAX_GENERIC_VARYING])
+{
+   /* Note texcoord[0] is reserved so start at 1 */
+   unsigned count = 1, i;
+
+   for (i = 0; i < MAX_GENERIC_VARYING; i++) {
+      remap_table[i] = -1;
+   }
+
+   /* for each bit set in generic_mask */
+   while (generics_mask) {
+      unsigned index = ffsll(generics_mask) - 1;
+      remap_table[index] = count++;
+      generics_mask &= ~((uint64_t) 1 << index);
+   }
+}
+
+
+/**
+ * Use the generic remap table to map a TGSI generic varying variable
+ * index to a small integer.  If the remapping table doesn't have a
+ * valid value for the given index (the table entry is -1) it means
+ * the fragment shader doesn't use that VS output.  Just allocate
+ * the next free value in that case.  Alternately, we could cull
+ * VS instructions that write to register, or replace the register
+ * with a dummy temp register.
+ * XXX TODO: we should do one of the later as it would save precious
+ * texcoord registers.
+ */
+int
+svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
+                         int generic_index)
+{
+   assert(generic_index < MAX_GENERIC_VARYING);
+
+   if (generic_index >= MAX_GENERIC_VARYING) {
+      /* just don't return a random/garbage value */
+      generic_index = MAX_GENERIC_VARYING - 1;
+   }
+
+   if (remap_table[generic_index] == -1) {
+      /* This is a VS output that has no matching PS input.  Find a
+       * free index.
+       */
+      int i, max = 0;
+      for (i = 0; i < MAX_GENERIC_VARYING; i++) {
+         max = MAX2(max, remap_table[i]);
+      }
+      remap_table[generic_index] = max + 1;
+   }
+
+   return remap_table[generic_index];
+}
+
+
+/**
+ * Initialize the shader-neutral fields of svga_compile_key from context
+ * state.  This is basically the texture-related state.
+ */
+void
+svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
+                            struct svga_compile_key *key)
+{
+   unsigned i, idx = 0;
+
+   assert(shader < Elements(svga->curr.num_sampler_views));
+
+   for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
+      struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
+      if (view) {
+         assert(svga->curr.sampler[shader][i]);
+         assert(view->texture);
+         assert(view->texture->target < (1 << 4)); /* texture_target:4 */
+
+         key->tex[i].texture_target = view->texture->target;
+
+         /* 1D/2D array textures with one slice are treated as non-arrays
+          * by the SVGA3D device.  Convert the texture type here so that
+          * we emit the right TEX/SAMPLE instruction in the shader.
+          */
+         if (view->texture->array_size == 1) {
+            if (view->texture->target == PIPE_TEXTURE_1D_ARRAY) {
+               key->tex[i].texture_target = PIPE_TEXTURE_1D;
+            }
+            else if (view->texture->target == PIPE_TEXTURE_2D_ARRAY) {
+               key->tex[i].texture_target = PIPE_TEXTURE_2D;
+            }
+         }
+
+         key->tex[i].texture_msaa = view->texture->nr_samples > 1;
+         if (!svga->curr.sampler[shader][i]->normalized_coords) {
+            assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
+            key->tex[i].width_height_idx = idx++;
+            key->tex[i].unnormalized = TRUE;
+            ++key->num_unnormalized_coords;
+         }
+
+         key->tex[i].swizzle_r = view->swizzle_r;
+         key->tex[i].swizzle_g = view->swizzle_g;
+         key->tex[i].swizzle_b = view->swizzle_b;
+         key->tex[i].swizzle_a = view->swizzle_a;
+
+         key->tex[i].return_type = svga_get_texture_datatype(view->format);
+      }
+   }
+   key->num_textures = svga->curr.num_sampler_views[shader];
+}
+
+
+/** Search for a compiled shader variant with the same compile key */
+struct svga_shader_variant *
+svga_search_shader_key(const struct svga_shader *shader,
+                       const struct svga_compile_key *key)
+{
+   struct svga_shader_variant *variant = shader->variants;
+
+   assert(key);
+
+   for ( ; variant; variant = variant->next) {
+      if (svga_compile_keys_equal(key, &variant->key))
+         return variant;
+   }
+   return NULL;
+}
+
+/** Search for a shader with the same token key */
+struct svga_shader *
+svga_search_shader_token_key(struct svga_shader *pshader,
+                             const struct svga_token_key *key)
+{
+   struct svga_shader *shader = pshader;
+
+   assert(key);
+
+   for ( ; shader; shader = shader->next) {
+      if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
+         return shader;
+   }
+   return NULL;
+}
+
+/**
+ * Helper function to define a gb shader for non-vgpu10 device
+ */
+static enum pipe_error
+define_gb_shader_vgpu9(struct svga_context *svga,
+                       SVGA3dShaderType type,
+                       struct svga_shader_variant *variant,
+                       unsigned codeLen)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret;
+
+   /**
+    * Create gb memory for the shader and upload the shader code.
+    * Kernel module will allocate an id for the shader and issue
+    * the DefineGBShader command.
+    */
+   variant->gb_shader = sws->shader_create(sws, type,
+                                           variant->tokens, codeLen);
+
+   if (!variant->gb_shader)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
+
+   return ret;
+}
+
+/**
+ * Helper function to define a gb shader for vgpu10 device
+ */
+static enum pipe_error
+define_gb_shader_vgpu10(struct svga_context *svga,
+                        SVGA3dShaderType type,
+                        struct svga_shader_variant *variant,
+                        unsigned codeLen)
+{
+   struct svga_winsys_context *swc = svga->swc;
+   enum pipe_error ret;
+
+   /**
+    * Shaders in VGPU10 enabled device reside in the device COTable.
+    * SVGA driver will allocate an integer ID for the shader and
+    * issue DXDefineShader and DXBindShader commands.
+    */
+   variant->id = util_bitmask_add(svga->shader_id_bm);
+   if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+
+   /* Create gb memory for the shader and upload the shader code */
+   variant->gb_shader = swc->shader_create(swc,
+                                           variant->id, type,
+                                           variant->tokens, codeLen);
+
+   if (!variant->gb_shader) {
+      /* Free the shader ID */
+      assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
+      goto fail_no_allocation;
+   }
+
+   /**
+    * Since we don't want to do any flush within state emission to avoid
+    * partial state in a command buffer, it's important to make sure that
+    * there is enough room to send both the DXDefineShader & DXBindShader
+    * commands in the same command buffer. So let's send both
+    * commands in one command reservation. If it fails, we'll undo
+    * the shader creation and return an error.
+    */
+   ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
+                                           variant->id, type, codeLen);
+
+   if (ret != PIPE_OK)
+      goto fail;
+
+   return PIPE_OK;
+
+fail:
+   swc->shader_destroy(swc, variant->gb_shader);
+   variant->gb_shader = NULL;
+
+fail_no_allocation:
+   util_bitmask_clear(svga->shader_id_bm, variant->id);
+   variant->id = UTIL_BITMASK_INVALID_INDEX;
+
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}

 /**
 * Issue the SVGA3D commands to define a new shader.
- * \param result  contains the shader tokens, etc.  The result->id field will
- *                be set here.
+ * \param variant  contains the shader tokens, etc.  The result->id field will
+ *                 be set here.
 */
 enum pipe_error
 svga_define_shader(struct svga_context *svga,
@ -42,27 +346,17 @@ svga_define_shader(struct svga_context *svga,
                   struct svga_shader_variant *variant)
 {
   unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
+   enum pipe_error ret;
+
+   variant->id = UTIL_BITMASK_INVALID_INDEX;

   if (svga_have_gb_objects(svga)) {
-      struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
-      enum pipe_error ret;
-
-      variant->gb_shader = sws->shader_create(sws, type,
-                                              variant->tokens, codeLen);
-      if (!variant->gb_shader)
-         return PIPE_ERROR_OUT_OF_MEMORY;
-
-      ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
-      if (ret != PIPE_OK) {
-         sws->shader_destroy(sws, variant->gb_shader);
-         variant->gb_shader = NULL;
-      }
-
-      return ret;
+      if (svga_have_vgpu10(svga))
+         return define_gb_shader_vgpu10(svga, type, variant, codeLen);
+      else
+         return define_gb_shader_vgpu9(svga, type, variant, codeLen);
   }
   else {
-      enum pipe_error ret;
-
      /* Allocate an integer ID for the shader */
      variant->id = util_bitmask_add(svga->shader_id_bm);
      if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
@ -80,14 +374,45 @@ svga_define_shader(struct svga_context *svga,
         assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
         util_bitmask_clear(svga->shader_id_bm, variant->id);
         variant->id = UTIL_BITMASK_INVALID_INDEX;
-         return ret;
      }
   }

-   return PIPE_OK;
+   return ret;
 }


+/**
+ * Issue the SVGA3D commands to set/bind a shader.
+ * \param result  the shader to bind.
+ */
+enum pipe_error
+svga_set_shader(struct svga_context *svga,
+                SVGA3dShaderType type,
+                struct svga_shader_variant *variant)
+{
+   enum pipe_error ret;
+   unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
+
+   assert(type == SVGA3D_SHADERTYPE_VS ||
+          type == SVGA3D_SHADERTYPE_GS ||
+          type == SVGA3D_SHADERTYPE_PS);
+
+   if (svga_have_gb_objects(svga)) {
+      struct svga_winsys_gb_shader *gbshader =
+         variant ? variant->gb_shader : NULL;
+
+      if (svga_have_vgpu10(svga))
+         ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
+      else
+         ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
+   }
+   else {
+      ret = SVGA3D_SetShader(svga->swc, type, id);
+   }
+
+   return ret;
+}
+

 enum pipe_error
 svga_destroy_shader_variant(struct svga_context *svga,
@ -96,32 +421,92 @@ svga_destroy_shader_variant(struct svga_context *svga,
 {
   enum pipe_error ret = PIPE_OK;

-   if (svga_have_gb_objects(svga)) {
-      struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
-
-      sws->shader_destroy(sws, variant->gb_shader);
-      variant->gb_shader = NULL;
-      goto end;
-   }
-
-   /* first try */
-   if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
-      ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
-
-      if (ret != PIPE_OK) {
-         /* flush and try again */
-         svga_context_flush(svga, NULL);
-
-         ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
-         assert(ret == PIPE_OK);
+   if (svga_have_gb_objects(svga) && variant->gb_shader) {
+      if (svga_have_vgpu10(svga)) {
+         struct svga_winsys_context *swc = svga->swc;
+         swc->shader_destroy(swc, variant->gb_shader);
+         ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
+         if (ret != PIPE_OK) {
+            /* flush and try again */
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
+         }
+         util_bitmask_clear(svga->shader_id_bm, variant->id);
+      }
+      else {
+         struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+         sws->shader_destroy(sws, variant->gb_shader);
+      }
+      variant->gb_shader = NULL;
+   }
+   else {
+      if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
+         ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
+         if (ret != PIPE_OK) {
+            /* flush and try again */
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
+            assert(ret == PIPE_OK);
+         }
+         util_bitmask_clear(svga->shader_id_bm, variant->id);
      }
-
-      util_bitmask_clear(svga->shader_id_bm, variant->id);
   }

-end:
   FREE((unsigned *)variant->tokens);
   FREE(variant);

   return ret;
 }
+
+/*
+ * Rebind shaders.
+ * Called at the beginning of every new command buffer to ensure that
+ * shaders are properly paged-in. Instead of sending the SetShader
+ * command, this function sends a private allocation command to
+ * page in a shader. This avoids emitting redundant state to the device
+ * just to page in a resource.
+ */
+enum pipe_error
+svga_rebind_shaders(struct svga_context *svga)
+{
+   struct svga_winsys_context *swc = svga->swc;
+   struct svga_hw_draw_state *hw = &svga->state.hw_draw;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+
+   /**
+    * If the underlying winsys layer does not need resource rebinding,
+    * just clear the rebind flags and return.
+    */
+   if (swc->resource_rebind == NULL) {
+      svga->rebind.flags.vs = 0;
+      svga->rebind.flags.gs = 0;
+      svga->rebind.flags.fs = 0;
+
+      return PIPE_OK;
+   }
+
+   if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.vs = 0;
+
+   if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.gs = 0;
+
+   if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.fs = 0;
+
+   return PIPE_OK;
+}
--- a/src/gallium/drivers/svga/svga_shader.h
+++ b/src/gallium/drivers/svga/svga_shader.h
@ -27,19 +27,259 @@
 #define SVGA_SHADER_H

 #include "svga3d_reg.h"
+#include "svga_context.h"
+#include "svga_streamout.h"

-struct svga_shader_variant;
+
+/**
+ * We use a 64-bit mask to keep track of the generic indexes.
+ * This is the maximum semantic index for a TGSI GENERIC[i] register.
+ */
+#define MAX_GENERIC_VARYING 64
+
+
+struct svga_context;
+
+
+struct svga_compile_key
+{
+   /* vertex shader only */
+   struct {
+      uint64_t fs_generic_inputs;
+      unsigned passthrough:1;
+      unsigned need_prescale:1;
+      unsigned undo_viewport:1;
+      unsigned allow_psiz:1;
+      /** The following are all 32-bit bitmasks (per VS input) */
+      unsigned adjust_attrib_range;
+      unsigned attrib_is_pure_int;
+      unsigned adjust_attrib_w_1;
+      unsigned adjust_attrib_itof;
+      unsigned adjust_attrib_utof;
+      unsigned attrib_is_bgra;
+      unsigned attrib_puint_to_snorm;
+      unsigned attrib_puint_to_uscaled;
+      unsigned attrib_puint_to_sscaled;
+   } vs;
+
+   /* geometry shader only */
+   struct {
+      uint64_t vs_generic_outputs;
+      unsigned need_prescale:1;
+      unsigned writes_psize:1;
+      unsigned wide_point:1;
+   } gs;
+
+   /* fragment shader only */
+   struct {
+      uint64_t vs_generic_outputs;
+      uint64_t gs_generic_outputs;
+      unsigned light_twoside:1;
+      unsigned front_ccw:1;
+      unsigned white_fragments:1;
+      unsigned flatshade:1;
+      unsigned pstipple:1;
+      unsigned alpha_func:4;  /**< SVGA3D_CMP_x */
+      unsigned write_color0_to_n_cbufs:4;
+      unsigned aa_point:1;
+      int aa_point_coord_index;
+      float alpha_ref;
+   } fs;
+
+   /* any shader type */
+   int8_t generic_remap_table[MAX_GENERIC_VARYING];
+   unsigned num_textures:8;
+   unsigned num_unnormalized_coords:8;
+   unsigned clip_plane_enable:PIPE_MAX_CLIP_PLANES;
+   unsigned sprite_origin_lower_left:1;
+   unsigned sprite_coord_enable;
+   struct {
+      unsigned compare_mode:1;
+      unsigned compare_func:3;
+      unsigned unnormalized:1;
+      unsigned width_height_idx:5; /**< texture unit */
+      unsigned texture_target:4;   /**< PIPE_TEXTURE_x */
+      unsigned texture_msaa:1;    /**< A multisample texture? */
+      unsigned sprite_texgen:1;
+      unsigned swizzle_r:3;
+      unsigned swizzle_g:3;
+      unsigned swizzle_b:3;
+      unsigned swizzle_a:3;
+      unsigned return_type:3;  /**< TGSI_RETURN_TYPE_x */
+   } tex[PIPE_MAX_SAMPLERS];
+   /* Note: svga_compile_keys_equal() depends on the variable-size
+    * tex[] array being at the end of this structure.
+    */
+};
+
+/* A key for a variant of token string of a shader */
+struct svga_token_key {
+   struct {
+      unsigned sprite_coord_enable:24;
+      unsigned sprite_origin_upper_left:1;
+      unsigned point_pos_stream_out:1;
+      unsigned writes_psize:1;
+      unsigned aa_point:1;
+   } gs;
+};
+
+/**
+ * A single TGSI shader may be compiled into different variants of
+ * SVGA3D shaders depending on the compile key.  Each user shader
+ * will have a linked list of these variants.
+ */
+struct svga_shader_variant
+{
+   const struct svga_shader *shader;
+
+   /** Parameters used to generate this variant */
+   struct svga_compile_key key;
+
+   /* Compiled shader tokens:
+    */
+   const unsigned *tokens;
+   unsigned nr_tokens;
+
+   /** Per-context shader identifier used with SVGA_3D_CMD_SHADER_DEFINE,
+    * SVGA_3D_CMD_SET_SHADER and SVGA_3D_CMD_SHADER_DESTROY.
+    */
+   unsigned id;
+
+   /** Start of extra constants (number of float[4] constants) */
+   unsigned extra_const_start;
+
+   /* GB object buffer containing the bytecode */
+   struct svga_winsys_gb_shader *gb_shader;
+
+   boolean uses_flat_interp;   /** TRUE if flat interpolation qualifier is
+                                *  applied to any of the varyings.
+                                */
+
+   /** For FS-based polygon stipple */
+   unsigned pstipple_sampler_unit;
+
+   /** Next variant */
+   struct svga_shader_variant *next;
+};
+
+
+struct svga_shader
+{
+   const struct tgsi_token *tokens;
+   struct svga_token_key token_key;     /* token key for the token string */
+   struct tgsi_shader_info info;
+
+   /* List of shaders with tokens derived from the same token string */
+   struct svga_shader *next;
+   struct svga_shader *parent;   /* shader with the original token string */
+
+   struct svga_stream_output *stream_output;
+
+   /** Head of linked list of compiled variants */
+   struct svga_shader_variant *variants;
+
+   unsigned id;  /**< for debugging only */
+};
+
+
+struct svga_fragment_shader
+{
+   struct svga_shader base;
+
+   struct draw_fragment_shader *draw_shader;
+
+   /** Mask of which generic varying variables are read by this shader */
+   uint64_t generic_inputs;
+
+   /** Table mapping original TGSI generic indexes to low integers */
+   int8_t generic_remap_table[MAX_GENERIC_VARYING];
+};
+
+
+struct svga_vertex_shader
+{
+   struct svga_shader base;
+
+   struct draw_vertex_shader *draw_shader;
+
+   /** Mask of which generic varying variables are written by this shader */
+   uint64_t generic_outputs;
+
+   /** Generated geometry shader that goes with this vertex shader */
+   struct svga_geometry_shader *gs;
+};
+
+
+struct svga_geometry_shader
+{
+   struct svga_shader base;
+
+   struct draw_geometry_shader *draw_shader;
+
+   /** Table mapping original TGSI generic indexes to low integers */
+   int8_t generic_remap_table[MAX_GENERIC_VARYING];
+   uint64_t generic_outputs;
+
+   unsigned aa_point_coord_index; /* generic index for aa point coord */
+
+   unsigned wide_point:1;      /* set if the shader emulates wide point */
+};
+
+
+static inline boolean
+svga_compile_keys_equal(const struct svga_compile_key *a,
+                        const struct svga_compile_key *b)
+{
+   unsigned key_size =
+      (const char *) &a->tex[a->num_textures] - (const char *) a;
+
+   return memcmp(a, b, key_size) == 0;
+}
+
+
+uint64_t
+svga_get_generic_inputs_mask(const struct tgsi_shader_info *info);
+
+uint64_t
+svga_get_generic_outputs_mask(const struct tgsi_shader_info *info);
+
+void
+svga_remap_generics(uint64_t generics_mask,
+                    int8_t remap_table[MAX_GENERIC_VARYING]);
+
+int
+svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
+                         int generic_index);
+
+void
+svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
+                            struct svga_compile_key *key);
+
+struct svga_shader_variant *
+svga_search_shader_key(const struct svga_shader *shader,
+                       const struct svga_compile_key *key);
+
+struct svga_shader *
+svga_search_shader_token_key(struct svga_shader *shader,
+                             const struct svga_token_key *key);

 enum pipe_error
 svga_define_shader(struct svga_context *svga,
                   SVGA3dShaderType type,
                   struct svga_shader_variant *variant);

+enum pipe_error
+svga_set_shader(struct svga_context *svga,
+                SVGA3dShaderType type,
+                struct svga_shader_variant *variant);
+
 enum pipe_error
 svga_destroy_shader_variant(struct svga_context *svga,
                            SVGA3dShaderType type,
                            struct svga_shader_variant *variant);

+enum pipe_error
+svga_rebind_shaders(struct svga_context *svga);

 /**
 * Check if a shader's bytecode exceeds the device limits.
@ -62,4 +302,40 @@ svga_shader_too_large(const struct svga_context *svga,
 }


+/**
+ * Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_*
+ */
+static inline SVGA3dShaderType
+svga_shader_type(unsigned shader)
+{
+   switch (shader) {
+   case PIPE_SHADER_VERTEX:
+      return SVGA3D_SHADERTYPE_VS;
+   case PIPE_SHADER_GEOMETRY:
+      return SVGA3D_SHADERTYPE_GS;
+   case PIPE_SHADER_FRAGMENT:
+      return SVGA3D_SHADERTYPE_PS;
+   default:
+      assert(!"Invalid shader type");
+      return SVGA3D_SHADERTYPE_VS;
+   }
+}
+
+
+/** Does the current VS have stream output? */
+static inline boolean
+svga_have_vs_streamout(const struct svga_context *svga)
+{
+   return svga->curr.vs != NULL && svga->curr.vs->base.stream_output != NULL;
+}
+
+
+/** Does the current GS have stream output? */
+static inline boolean
+svga_have_gs_streamout(const struct svga_context *svga)
+{
+   return svga->curr.gs != NULL && svga->curr.gs->base.stream_output != NULL;
+}
+
+
 #endif /* SVGA_SHADER_H */
--- a/src/gallium/drivers/svga/svga_state.c
+++ b/src/gallium/drivers/svga/svga_state.c
@ -23,6 +23,7 @@
 *
 **********************************************************/

+#include "util/u_bitmask.h"
 #include "util/u_debug.h"
 #include "pipe/p_defines.h"
 #include "util/u_memory.h"
@ -63,14 +64,19 @@ static const struct svga_tracked_state *hw_clear_state[] =
 */
 static const struct svga_tracked_state *hw_draw_state[] =
 {
+   &svga_need_tgsi_transform,
   &svga_hw_fs,
+   &svga_hw_gs,
   &svga_hw_vs,
   &svga_hw_rss,
-   &svga_hw_tss,
-   &svga_hw_tss_binding,
+   &svga_hw_sampler,           /* VGPU10 */
+   &svga_hw_sampler_bindings,  /* VGPU10 */
+   &svga_hw_tss,               /* pre-VGPU10 */
+   &svga_hw_tss_binding,       /* pre-VGPU10 */
   &svga_hw_clip_planes,
   &svga_hw_vdecl,
   &svga_hw_fs_constants,
+   &svga_hw_gs_constants,
   &svga_hw_vs_constants,
   NULL
 };
@ -255,23 +261,55 @@ do {                                            \
 */
 enum pipe_error svga_emit_initial_state( struct svga_context *svga )
 {
-   SVGA3dRenderState *rs;
-   unsigned count = 0;
-   const unsigned COUNT = 2;
-   enum pipe_error ret;
+   if (svga_have_vgpu10(svga)) {
+      SVGA3dRasterizerStateId id = util_bitmask_add(svga->rast_object_id_bm);
+      enum pipe_error ret;

-   ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT );
-   if (ret != PIPE_OK)
+      /* XXX preliminary code */
+      ret = SVGA3D_vgpu10_DefineRasterizerState(svga->swc,
+                                             id,
+                                             SVGA3D_FILLMODE_FILL,
+                                             SVGA3D_CULL_NONE,
+                                             1, /* frontCounterClockwise */
+                                             0, /* depthBias */
+                                             0.0f, /* depthBiasClamp */
+                                             0.0f, /* slopeScaledDepthBiasClamp */
+                                             0, /* depthClampEnable */
+                                             0, /* scissorEnable */
+                                             0, /* multisampleEnable */
+                                             0, /* aalineEnable */
+                                             1.0f, /* lineWidth */
+                                             0, /* lineStippleEnable */
+                                             0, /* lineStippleFactor */
+                                             0, /* lineStipplePattern */
+                                             0); /* provokingVertexLast */
+
+
+      assert(ret == PIPE_OK);
+
+      ret = SVGA3D_vgpu10_SetRasterizerState(svga->swc, id);
      return ret;
+   }
+   else {
+      SVGA3dRenderState *rs;
+      unsigned count = 0;
+      const unsigned COUNT = 2;
+      enum pipe_error ret;

-   /* Always use D3D style coordinate space as this is the only one
-    * which is implemented on all backends.
-    */
-   EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE, SVGA3D_COORDINATE_LEFTHANDED );
-   EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW );
-   
-   assert( COUNT == count );
-   SVGA_FIFOCommitAll( svga->swc );
+      ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT );
+      if (ret != PIPE_OK)
+         return ret;

-   return PIPE_OK;
+      /* Always use D3D style coordinate space as this is the only one
+       * which is implemented on all backends.
+       */
+      EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE,
+              SVGA3D_COORDINATE_LEFTHANDED );
+      EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW );
+
+      assert( COUNT == count );
+      SVGA_FIFOCommitAll( svga->swc );
+
+      return PIPE_OK;
+   }
 }
--- a/src/gallium/drivers/svga/svga_state.h
+++ b/src/gallium/drivers/svga/svga_state.h
@ -57,14 +57,20 @@ extern struct svga_tracked_state svga_hw_framebuffer;

 /* HW_DRAW
 */
+extern struct svga_tracked_state svga_need_tgsi_transform;
 extern struct svga_tracked_state svga_hw_vs;
 extern struct svga_tracked_state svga_hw_fs;
+extern struct svga_tracked_state svga_hw_gs;
 extern struct svga_tracked_state svga_hw_rss;
+extern struct svga_tracked_state svga_hw_pstipple;
+extern struct svga_tracked_state svga_hw_sampler;
+extern struct svga_tracked_state svga_hw_sampler_bindings;
 extern struct svga_tracked_state svga_hw_tss;
 extern struct svga_tracked_state svga_hw_tss_binding;
 extern struct svga_tracked_state svga_hw_clip_planes;
 extern struct svga_tracked_state svga_hw_vdecl;
 extern struct svga_tracked_state svga_hw_fs_constants;
+extern struct svga_tracked_state svga_hw_gs_constants;
 extern struct svga_tracked_state svga_hw_vs_constants;

 /* SWTNL_DRAW
@ -93,10 +99,14 @@ enum pipe_error svga_emit_initial_state( struct svga_context *svga );

 enum pipe_error svga_reemit_framebuffer_bindings( struct svga_context *svga );

+enum pipe_error svga_rebind_framebuffer_bindings( struct svga_context *svga );
+
 enum pipe_error svga_reemit_tss_bindings( struct svga_context *svga );

 enum pipe_error svga_reemit_vs_bindings(struct svga_context *svga);

 enum pipe_error svga_reemit_fs_bindings(struct svga_context *svga);

+enum pipe_error svga_reemit_gs_bindings(struct svga_context *svga);
+
 #endif
--- a/src/gallium/drivers/svga/svga_state_constants.c
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@ -1,3 +1,4 @@
+
 /**********************************************************
 * Copyright 2008-2009 VMware, Inc.  All rights reserved.
 *
@ -23,9 +24,11 @@
 *
 **********************************************************/

+#include "util/u_format.h"
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 #include "pipe/p_defines.h"
+#include "util/u_upload_mgr.h"

 #include "svga_screen.h"
 #include "svga_context.h"
@ -34,6 +37,7 @@
 #include "svga_tgsi.h"
 #include "svga_debug.h"
 #include "svga_resource_buffer.h"
+#include "svga_shader.h"

 #include "svga_hw_reg.h"

@ -52,49 +56,31 @@
 /** Guest-backed surface constant buffers must be this size */
 #define GB_CONSTBUF_SIZE (SVGA3D_CONSTREG_MAX)

-/**
- * Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_*
- */
-static unsigned
-svga_shader_type(unsigned shader)
-{
-   switch (shader) {
-   case PIPE_SHADER_VERTEX:
-      return SVGA3D_SHADERTYPE_VS;
-   case PIPE_SHADER_FRAGMENT:
-      return SVGA3D_SHADERTYPE_PS;
-   default:
-      assert(!"Unexpected shader type");
-      return SVGA3D_SHADERTYPE_VS;
-   }
-}
-

 /**
- * Emit any extra fragment shader constants into the buffer pointed
- * to by 'dest'.
- * In particular, these would be the scaling factors needed for handling
- * unnormalized texture coordinates for texture rectangles.
- * \return number of float[4] constants put into the dest buffer
+ * Emit any extra shader-type-independent shader constants into the buffer
+ * pointed to by 'dest'.
+ * \return number of float[4] constants put into the 'dest' buffer
 */
 static unsigned
-svga_get_extra_fs_constants(struct svga_context *svga, float *dest)
+svga_get_extra_constants_common(struct svga_context *svga,
+                                const struct svga_shader_variant *variant,
+                                unsigned shader, float *dest)
 {
-   const struct svga_shader_variant *variant = svga->state.hw_draw.fs;
-   const struct svga_fs_compile_key *key = &variant->key.fkey;
+   uint32_t *dest_u = (uint32_t *) dest;  // uint version of dest
+   unsigned i;
   unsigned count = 0;

-   /* SVGA_NEW_VS_VARIANT
-    */
-   if (key->num_unnormalized_coords) {
-      unsigned i;
-
-      for (i = 0; i < key->num_textures; i++) {
-         if (key->tex[i].unnormalized) {
-            struct pipe_resource *tex = svga->curr.sampler_views[i]->texture;
-
+   for (i = 0; i < variant->key.num_textures; i++) {
+      struct pipe_sampler_view *sv = svga->curr.sampler_views[shader][i];
+      if (sv) {
+         struct pipe_resource *tex = sv->texture;
+         /* Scaling factors needed for handling unnormalized texture coordinates
+          * for texture rectangles.
+          */
+         if (variant->key.tex[i].unnormalized) {
            /* debug/sanity check */
-            assert(key->tex[i].width_height_idx == count);
+            assert(variant->key.tex[i].width_height_idx == count);

            *dest++ = 1.0 / (float)tex->width0;
            *dest++ = 1.0 / (float)tex->height0;
@ -103,14 +89,102 @@ svga_get_extra_fs_constants(struct svga_context *svga, float *dest)

            count++;
         }
+
+         /* Store the sizes for texture buffers.
+         */
+         if (tex->target == PIPE_BUFFER) {
+            unsigned bytes_per_element = util_format_get_blocksize(sv->format);
+            *dest_u++ = tex->width0 / bytes_per_element;
+            *dest_u++ = 1;
+            *dest_u++ = 1;
+            *dest_u++ = 1;
+
+            count++;
+         }
      }
   }

+   return count;
+}
+
+
+/**
+ * Emit any extra fragment shader constants into the buffer pointed
+ * to by 'dest'.
+ * \return number of float[4] constants put into the dest buffer
+ */
+static unsigned
+svga_get_extra_fs_constants(struct svga_context *svga, float *dest)
+{
+   const struct svga_shader_variant *variant = svga->state.hw_draw.fs;
+   unsigned count = 0;
+
+   count += svga_get_extra_constants_common(svga, variant,
+                                            PIPE_SHADER_FRAGMENT, dest);
+
   assert(count <= MAX_EXTRA_CONSTS);

   return count;
 }

+/**
+ * Emit extra constants needed for prescale computation into the
+ * the buffer pointed to by '*dest'. The updated buffer pointer
+ * will be returned in 'dest'.
+ */
+static unsigned
+svga_get_prescale_constants(struct svga_context *svga, float **dest)
+{
+   memcpy(*dest, svga->state.hw_clear.prescale.scale, 4 * sizeof(float));
+   *dest += 4;
+
+   memcpy(*dest, svga->state.hw_clear.prescale.translate, 4 * sizeof(float));
+   *dest += 4;
+
+   return 2;
+}
+
+/**
+ * Emit extra constants needed for point sprite emulation.
+ */
+static unsigned
+svga_get_pt_sprite_constants(struct svga_context *svga, float **dest)
+{
+   struct svga_screen *screen = svga_screen(svga->pipe.screen);
+   float *dst = *dest;
+
+   dst[0] = 1.0 / (svga->curr.viewport.scale[0] * 2);
+   dst[1] = 1.0 / (svga->curr.viewport.scale[1] * 2);
+   dst[2] = svga->curr.rast->pointsize;
+   dst[3] = screen->maxPointSize;
+   *dest = *dest + 4;
+   return 1;
+}
+
+/**
+ * Emit user-defined clip plane coefficients into the buffer pointed to
+ * by '*dest'. The updated buffer pointer will be returned in 'dest'.
+ */
+static unsigned
+svga_get_clip_plane_constants(struct svga_context *svga,
+                              const struct svga_shader_variant *variant,
+                              float **dest)
+{
+   unsigned count = 0;
+
+   /* SVGA_NEW_CLIP */
+   if (svga_have_vgpu10(svga)) {
+      /* append user-defined clip plane coefficients onto constant buffer */
+      unsigned clip_planes = variant->key.clip_plane_enable;
+      while (clip_planes) {
+         int i = u_bit_scan(&clip_planes);
+         COPY_4V(*dest, svga->curr.clip.ucp[i]);
+         *dest += 4;
+         count += 1;
+      }
+   }
+   return count;
+}

 /**
 * Emit any extra vertex shader constants into the buffer pointed
@ -124,26 +198,71 @@ static unsigned
 svga_get_extra_vs_constants(struct svga_context *svga, float *dest)
 {
   const struct svga_shader_variant *variant = svga->state.hw_draw.vs;
-   const struct svga_vs_compile_key *key = &variant->key.vkey;
   unsigned count = 0;

   /* SVGA_NEW_VS_VARIANT
    */
-   if (key->need_prescale) {
-      memcpy(dest, svga->state.hw_clear.prescale.scale, 4 * sizeof(float));
-      dest += 4;
-
-      memcpy(dest, svga->state.hw_clear.prescale.translate, 4 * sizeof(float));
-      dest += 4;
-
-      count = 2;
+   if (variant->key.vs.need_prescale) {
+      count += svga_get_prescale_constants(svga, &dest);
   }

+   if (variant->key.vs.undo_viewport) {
+      /* Used to convert window coords back to NDC coords */
+      dest[0] = 1.0f / svga->curr.viewport.scale[0];
+      dest[1] = 1.0f / svga->curr.viewport.scale[1];
+      dest[2] = -svga->curr.viewport.translate[0];
+      dest[3] = -svga->curr.viewport.translate[1];
+      dest += 4;
+      count += 1;
+   }
+
+   /* SVGA_NEW_CLIP */
+   count += svga_get_clip_plane_constants(svga, variant, &dest);
+
+   /* common constants */
+   count += svga_get_extra_constants_common(svga, variant,
+                                            PIPE_SHADER_VERTEX, dest);
+
   assert(count <= MAX_EXTRA_CONSTS);

   return count;
 }

+/**
+ * Emit any extra geometry shader constants into the buffer pointed
+ * to by 'dest'.
+ */
+static unsigned
+svga_get_extra_gs_constants(struct svga_context *svga, float *dest)
+{
+   const struct svga_shader_variant *variant = svga->state.hw_draw.gs;
+   unsigned count = 0;
+
+   /* SVGA_NEW_GS_VARIANT
+    */
+
+   /* Constants for point sprite
+    * These are used in the transformed gs that supports point sprite.
+    * They need to be added before the prescale constants.
+    */
+   if (variant->key.gs.wide_point) {
+      count += svga_get_pt_sprite_constants(svga, &dest);
+   }
+
+   if (variant->key.gs.need_prescale) {
+      count += svga_get_prescale_constants(svga, &dest);
+   }
+
+   /* SVGA_NEW_CLIP */
+   count += svga_get_clip_plane_constants(svga, variant, &dest);
+
+   /* common constants */
+   count += svga_get_extra_constants_common(svga, variant,
+                                            PIPE_SHADER_GEOMETRY, dest);
+
+   assert(count <= MAX_EXTRA_CONSTS);
+   return count;
+}

 /**
 * Check and emit one shader constant register.
@ -159,6 +278,7 @@ emit_const(struct svga_context *svga, unsigned shader, unsigned i,

   assert(shader < PIPE_SHADER_TYPES);
   assert(i < SVGA3D_CONSTREG_MAX);
+   assert(!svga_have_vgpu10(svga));

   if (memcmp(svga->state.hw_draw.cb[shader][i], value,
              4 * sizeof(float)) != 0) {
@ -202,6 +322,10 @@ emit_const_range(struct svga_context *svga,
   unsigned i, j;
   enum pipe_error ret;

+   assert(shader == PIPE_SHADER_VERTEX ||
+          shader == PIPE_SHADER_FRAGMENT);
+   assert(!svga_have_vgpu10(svga));
+
 #ifdef DEBUG
   if (offset + count > SVGA3D_CONSTREG_MAX) {
      debug_printf("svga: too many constants (offset %u + count %u = %u (max = %u))\n",
@ -307,10 +431,12 @@ emit_const_range(struct svga_context *svga,

 /**
 * Emit all the constants in a constant buffer for a shader stage.
+ * On VGPU10, emit_consts_vgpu10 is used instead.
 */
 static enum pipe_error
-emit_consts(struct svga_context *svga, unsigned shader)
+emit_consts_vgpu9(struct svga_context *svga, unsigned shader)
 {
+   const struct pipe_constant_buffer *cbuf;
   struct svga_screen *ss = svga_screen(svga->pipe.screen);
   struct pipe_transfer *transfer = NULL;
   unsigned count;
@ -320,53 +446,284 @@ emit_consts(struct svga_context *svga, unsigned shader)
   const unsigned offset = 0;

   assert(shader < PIPE_SHADER_TYPES);
+   assert(!svga_have_vgpu10(svga));
+   /* Only one constant buffer per shader is supported before VGPU10.
+    * This is only an approximate check against that.
+    */
+   assert(svga->curr.constbufs[shader][1].buffer == NULL);

-   if (svga->curr.cbufs[shader].buffer == NULL)
-      goto done;
+   cbuf = &svga->curr.constbufs[shader][0];

-   data = (const float (*)[4])pipe_buffer_map(&svga->pipe,
-                                              svga->curr.cbufs[shader].buffer,
-                                              PIPE_TRANSFER_READ,
-					      &transfer);
-   if (data == NULL) {
-      ret = PIPE_ERROR_OUT_OF_MEMORY;
-      goto done;
+   if (svga->curr.constbufs[shader][0].buffer) {
+      /* emit user-provided constants */
+      data = (const float (*)[4])
+         pipe_buffer_map(&svga->pipe, svga->curr.constbufs[shader][0].buffer,
+                         PIPE_TRANSFER_READ, &transfer);
+      if (data == NULL) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+
+      /* sanity check */
+      assert(cbuf->buffer->width0 >=
+             cbuf->buffer_size);
+
+      /* Use/apply the constant buffer size and offsets here */
+      count = cbuf->buffer_size / (4 * sizeof(float));
+      data += cbuf->buffer_offset / (4 * sizeof(float));
+
+      if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
+         ret = emit_const_range( svga, shader, offset, count, data );
+      }
+      else {
+         for (i = 0; i < count; i++) {
+            ret = emit_const( svga, shader, offset + i, data[i] );
+            if (ret != PIPE_OK) {
+               break;
+            }
+         }
+      }
+
+      pipe_buffer_unmap(&svga->pipe, transfer);
+
+      if (ret != PIPE_OK) {
+         return ret;
+      }
   }

-   /* sanity check */
-   assert(svga->curr.cbufs[shader].buffer->width0 >=
-          svga->curr.cbufs[shader].buffer_size);
+   /* emit extra shader constants */
+   {
+      const struct svga_shader_variant *variant = NULL;
+      unsigned offset;
+      float extras[MAX_EXTRA_CONSTS][4];
+      unsigned count, i;

-   /* Use/apply the constant buffer size and offsets here */
-   count = svga->curr.cbufs[shader].buffer_size / (4 * sizeof(float));
-   data += svga->curr.cbufs[shader].buffer_offset / (4 * sizeof(float));
-
-   if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
-      ret = emit_const_range( svga, shader, offset, count, data );
-      if (ret != PIPE_OK) {
-         goto done;
+      switch (shader) {
+      case PIPE_SHADER_VERTEX:
+         variant = svga->state.hw_draw.vs;
+         count = svga_get_extra_vs_constants(svga, (float *) extras);
+         break;
+      case PIPE_SHADER_FRAGMENT:
+         variant = svga->state.hw_draw.fs;
+         count = svga_get_extra_fs_constants(svga, (float *) extras);
+         break;
+      default:
+         assert(!"Unexpected shader type");
+         count = 0;
      }
-   } else {
-      for (i = 0; i < count; i++) {
-         ret = emit_const( svga, shader, offset + i, data[i] );
-         if (ret != PIPE_OK) {
-            goto done;
+
+      assert(variant);
+      offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
+      assert(count <= Elements(extras));
+
+      if (count > 0) {
+         if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
+            ret = emit_const_range(svga, shader, offset, count,
+                                   (const float (*) [4])extras);
+         }
+         else {
+            for (i = 0; i < count; i++) {
+               ret = emit_const(svga, shader, offset + i, extras[i]);
+               if (ret != PIPE_OK)
+                  return ret;
+            }
         }
      }
   }

-done:
-   if (data)
-      pipe_buffer_unmap(&svga->pipe, transfer);
+   return ret;
+}
+
+
+
+static enum pipe_error
+emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader)
+{
+   const struct pipe_constant_buffer *cbuf;
+   struct pipe_resource *dst_buffer = NULL;
+   enum pipe_error ret = PIPE_OK;
+   struct pipe_transfer *src_transfer;
+   struct svga_winsys_surface *dst_handle;
+   float extras[MAX_EXTRA_CONSTS][4];
+   unsigned extra_count, extra_size, extra_offset;
+   unsigned new_buf_size;
+   void *src_map = NULL, *dst_map;
+   unsigned offset;
+   const struct svga_shader_variant *variant;
+
+   assert(shader == PIPE_SHADER_VERTEX ||
+          shader == PIPE_SHADER_GEOMETRY ||
+          shader == PIPE_SHADER_FRAGMENT);
+
+   cbuf = &svga->curr.constbufs[shader][0];
+
+   switch (shader) {
+   case PIPE_SHADER_VERTEX:
+      variant = svga->state.hw_draw.vs;
+      extra_count = svga_get_extra_vs_constants(svga, (float *) extras);
+      break;
+   case PIPE_SHADER_FRAGMENT:
+      variant = svga->state.hw_draw.fs;
+      extra_count = svga_get_extra_fs_constants(svga, (float *) extras);
+      break;
+   case PIPE_SHADER_GEOMETRY:
+      variant = svga->state.hw_draw.gs;
+      extra_count = svga_get_extra_gs_constants(svga, (float *) extras);
+      break;
+   default:
+      assert(!"Unexpected shader type");
+      /* Don't return an error code since we don't want to keep re-trying
+       * this function and getting stuck in an infinite loop.
+       */
+      return PIPE_OK;
+   }
+
+   assert(variant);
+
+   /* Compute extra constants size and offset in bytes */
+   extra_size = extra_count * 4 * sizeof(float);
+   extra_offset = 4 * sizeof(float) * variant->extra_const_start;
+
+   if (cbuf->buffer_size + extra_size == 0)
+      return PIPE_OK;  /* nothing to do */
+
+   /* Typically, the cbuf->buffer here is a user-space buffer so mapping
+    * it is really cheap.  If we ever get real HW buffers for constants
+    * we should void mapping and instead use a ResourceCopy command.
+    */
+   if (cbuf->buffer_size > 0) {
+      src_map = pipe_buffer_map_range(&svga->pipe, cbuf->buffer,
+                                      cbuf->buffer_offset, cbuf->buffer_size,
+                                      PIPE_TRANSFER_READ, &src_transfer);
+      assert(src_map);
+      if (!src_map) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+   }
+
+   /* The new/dest buffer's size must be large enough to hold the original,
+    * user-specified constants, plus the extra constants.
+    * The size of the original constant buffer _should_ agree with what the
+    * shader is expecting, but it might not (it's not enforced anywhere by
+    * gallium).
+    */
+   new_buf_size = MAX2(cbuf->buffer_size, extra_offset) + extra_size;
+
+   /* According to the DX10 spec, the constant buffer size must be
+    * in multiples of 16.
+    */
+   new_buf_size = align(new_buf_size, 16);
+
+   ret = u_upload_alloc(svga->const0_upload, 0, new_buf_size, &offset,
+                        &dst_buffer, &dst_map);
+   if (ret != PIPE_OK || !dst_map) {
+      if (src_map)
+         pipe_buffer_unmap(&svga->pipe, src_transfer);
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+
+   if (src_map) {
+      memcpy(dst_map, src_map, cbuf->buffer_size);
+      pipe_buffer_unmap(&svga->pipe, src_transfer);
+   }
+
+   if (extra_size) {
+      assert(extra_offset + extra_size <= new_buf_size);
+      memcpy((char *) dst_map + extra_offset, extras, extra_size);
+   }
+   u_upload_unmap(svga->const0_upload);
+
+   /* Issue the SetSingleConstantBuffer command */
+   dst_handle = svga_buffer_handle(svga, dst_buffer);
+   if (!dst_handle) {
+      pipe_resource_reference(&dst_buffer, NULL);
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+
+   assert(new_buf_size % 16 == 0);
+   ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc,
+                                               0, /* index */
+                                               svga_shader_type(shader),
+                                               dst_handle,
+                                               offset,
+                                               new_buf_size);
+
+   if (ret != PIPE_OK) {
+      pipe_resource_reference(&dst_buffer, NULL);
+      return ret;
+   }
+
+   /* Save this const buffer until it's replaced in the future.
+    * Otherwise, all references to the buffer will go away after the
+    * command buffer is submitted, it'll get recycled and we will have
+    * incorrect constant buffer bindings.
+    */
+   pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], dst_buffer);
+
+   svga->state.hw_draw.default_constbuf_size[shader] = new_buf_size;
+
+   pipe_resource_reference(&dst_buffer, NULL);

   return ret;
 }


+static enum pipe_error
+emit_consts_vgpu10(struct svga_context *svga, unsigned shader)
+{
+   enum pipe_error ret;
+   unsigned dirty_constbufs;
+   unsigned enabled_constbufs;
+
+   /* Emit 0th constant buffer (with extra constants) */
+   ret = emit_constbuf_vgpu10(svga, shader);
+   if (ret != PIPE_OK) {
+      return ret;
+   }
+
+   enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u;
+
+   /* Emit other constant buffers (UBOs) */
+   dirty_constbufs = svga->state.dirty_constbufs[shader] & ~1u;
+
+   while (dirty_constbufs) {
+      unsigned index = u_bit_scan(&dirty_constbufs);
+      unsigned offset = svga->curr.constbufs[shader][index].buffer_offset;
+      unsigned size = svga->curr.constbufs[shader][index].buffer_size;
+      struct svga_buffer *buffer =
+         svga_buffer(svga->curr.constbufs[shader][index].buffer);
+      struct svga_winsys_surface *handle;
+
+      if (buffer) {
+         handle = svga_buffer_handle(svga, &buffer->b.b);
+         enabled_constbufs |= 1 << index;
+      }
+      else {
+         handle = NULL;
+         enabled_constbufs &= ~(1 << index);
+         assert(offset == 0);
+         assert(size == 0);
+      }
+
+      assert(size % 16 == 0);
+      ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc,
+                                                  index,
+                                                  svga_shader_type(shader),
+                                                  handle,
+                                                  offset,
+                                                  size);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+
+   svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs;
+   svga->state.dirty_constbufs[shader] = 0;
+
+   return ret;
+}
+
 static enum pipe_error
 emit_fs_consts(struct svga_context *svga, unsigned dirty)
 {
-   struct svga_screen *ss = svga_screen(svga->pipe.screen);
   const struct svga_shader_variant *variant = svga->state.hw_draw.fs;
   enum pipe_error ret = PIPE_OK;

@ -377,28 +734,11 @@ emit_fs_consts(struct svga_context *svga, unsigned dirty)

   /* SVGA_NEW_FS_CONST_BUFFER
    */
-   ret = emit_consts( svga, PIPE_SHADER_FRAGMENT );
-   if (ret != PIPE_OK)
-      return ret;
-
-   /* emit extra shader constants */
-   {
-      unsigned offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
-      float extras[MAX_EXTRA_CONSTS][4];
-      unsigned count, i;
-
-      count = svga_get_extra_fs_constants(svga, (float *) extras);
-
-      if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
-         ret = emit_const_range(svga, PIPE_SHADER_FRAGMENT, offset, count,
-                                (const float (*) [4])extras);
-      } else {
-         for (i = 0; i < count; i++) {
-            ret = emit_const(svga, PIPE_SHADER_FRAGMENT, offset + i, extras[i]);
-            if (ret != PIPE_OK)
-               return ret;
-         }
-      }
+   if (svga_have_vgpu10(svga)) {
+      ret = emit_consts_vgpu10(svga, PIPE_SHADER_FRAGMENT);
+   }
+   else {
+      ret = emit_consts_vgpu9(svga, PIPE_SHADER_FRAGMENT);
   }

   return ret;
@ -419,7 +759,6 @@ struct svga_tracked_state svga_hw_fs_constants =
 static enum pipe_error
 emit_vs_consts(struct svga_context *svga, unsigned dirty)
 {
-   struct svga_screen *ss = svga_screen(svga->pipe.screen);
   const struct svga_shader_variant *variant = svga->state.hw_draw.vs;
   enum pipe_error ret = PIPE_OK;

@ -430,29 +769,11 @@ emit_vs_consts(struct svga_context *svga, unsigned dirty)

   /* SVGA_NEW_VS_CONST_BUFFER
    */
-   ret = emit_consts( svga, PIPE_SHADER_VERTEX );
-   if (ret != PIPE_OK)
-      return ret;
-
-   /* emit extra shader constants */
-   {
-      unsigned offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
-      float extras[MAX_EXTRA_CONSTS][4];
-      unsigned count, i;
-
-      count = svga_get_extra_vs_constants(svga, (float *) extras);
-      assert(count <= Elements(extras));
-
-      if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
-         ret = emit_const_range(svga, PIPE_SHADER_VERTEX, offset, count,
-                                (const float (*) [4]) extras);
-      } else {
-         for (i = 0; i < count; i++) {
-            ret = emit_const(svga, PIPE_SHADER_VERTEX, offset + i, extras[i]);
-            if (ret != PIPE_OK)
-               return ret;
-         }
-      }
+   if (svga_have_vgpu10(svga)) {
+      ret = emit_consts_vgpu10(svga, PIPE_SHADER_VERTEX);
+   }
+   else {
+      ret = emit_consts_vgpu9(svga, PIPE_SHADER_VERTEX);
   }

   return ret;
@ -467,3 +788,42 @@ struct svga_tracked_state svga_hw_vs_constants =
    SVGA_NEW_VS_VARIANT),
   emit_vs_consts
 };
+
+
+static enum pipe_error
+emit_gs_consts(struct svga_context *svga, unsigned dirty)
+{
+   const struct svga_shader_variant *variant = svga->state.hw_draw.gs;
+   enum pipe_error ret = PIPE_OK;
+
+   /* SVGA_NEW_GS_VARIANT
+    */
+   if (variant == NULL)
+      return PIPE_OK;
+
+   /* SVGA_NEW_GS_CONST_BUFFER
+    */
+   if (svga_have_vgpu10(svga)) {
+      /**
+       * If only the rasterizer state has changed and the current geometry
+       * shader does not emit wide points, then there is no reason to
+       * re-emit the GS constants, so skip it.
+       */
+      if (dirty == SVGA_NEW_RAST && !variant->key.gs.wide_point)
+         return PIPE_OK;
+
+      ret = emit_consts_vgpu10(svga, PIPE_SHADER_GEOMETRY);
+   }
+
+   return ret;
+}
+
+
+struct svga_tracked_state svga_hw_gs_constants =
+{
+   "hw gs params",
+   (SVGA_NEW_GS_CONST_BUFFER |
+    SVGA_NEW_RAST |
+    SVGA_NEW_GS_VARIANT),
+   emit_gs_consts
+};
--- a/src/gallium/drivers/svga/svga_state_framebuffer.c
+++ b/src/gallium/drivers/svga/svga_state_framebuffer.c
@ -26,12 +26,14 @@
 #include "util/u_inlines.h"
 #include "pipe/p_defines.h"
 #include "util/u_math.h"
+#include "util/u_format.h"

 #include "svga_context.h"
 #include "svga_state.h"
 #include "svga_cmd.h"
 #include "svga_debug.h"
 #include "svga_screen.h"
+#include "svga_surface.h"


 /*
@ -46,30 +48,26 @@
 #define MAX_RT_PER_BATCH 8


-/***********************************************************************
- * Hardware state update
- */
-

 static enum pipe_error
-emit_framebuffer( struct svga_context *svga,
-                  unsigned dirty )
+emit_fb_vgpu9(struct svga_context *svga)
 {
   struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
   const struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
   struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
-   boolean reemit = svga->rebind.rendertargets;
+   boolean reemit = svga->rebind.flags.rendertargets;
   unsigned i;
   enum pipe_error ret;

+   assert(!svga_have_vgpu10(svga));
+
   /*
    * We need to reemit non-null surface bindings, even when they are not
    * dirty, to ensure that the resources are paged in.
    */

   for (i = 0; i < svgascreen->max_color_buffers; i++) {
-      if (curr->cbufs[i] != hw->cbufs[i] ||
-          (reemit && hw->cbufs[i])) {
+      if ((curr->cbufs[i] != hw->cbufs[i]) || (reemit && hw->cbufs[i])) {
         if (svga->curr.nr_fbs++ > MAX_RT_PER_BATCH)
            return PIPE_ERROR_OUT_OF_MEMORY;

@ -82,14 +80,13 @@ emit_framebuffer( struct svga_context *svga,
      }
   }

-   if (curr->zsbuf != hw->zsbuf ||
-       (reemit && hw->zsbuf)) {
+   if ((curr->zsbuf != hw->zsbuf) || (reemit && hw->zsbuf)) {
      ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, curr->zsbuf);
      if (ret != PIPE_OK)
         return ret;

      if (curr->zsbuf &&
-          curr->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) {
+          util_format_is_depth_and_stencil(curr->zsbuf->format)) {
         ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL,
                                      curr->zsbuf);
         if (ret != PIPE_OK)
@ -104,8 +101,6 @@ emit_framebuffer( struct svga_context *svga,
      pipe_surface_reference(&hw->zsbuf, curr->zsbuf);
   }

-   svga->rebind.rendertargets = FALSE;
-
   return PIPE_OK;
 }

@ -118,15 +113,15 @@ emit_framebuffer( struct svga_context *svga,
 * Called at the beginning of every new command buffer to ensure that
 * non-dirty rendertargets are properly paged-in.
 */
-enum pipe_error
-svga_reemit_framebuffer_bindings(struct svga_context *svga)
+static enum pipe_error
+svga_reemit_framebuffer_bindings_vgpu9(struct svga_context *svga)
 {
   struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
   struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
   unsigned i;
   enum pipe_error ret;

-   assert(svga->rebind.rendertargets);
+   assert(!svga_have_vgpu10(svga));

   for (i = 0; i < svgascreen->max_color_buffers; i++) {
      if (hw->cbufs[i]) {
@ -145,7 +140,7 @@ svga_reemit_framebuffer_bindings(struct svga_context *svga)
      }

      if (hw->zsbuf &&
-          hw->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) {
+          util_format_is_depth_and_stencil(hw->zsbuf->format)) {
         ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, hw->zsbuf);
         if (ret != PIPE_OK) {
            return ret;
@ -159,7 +154,161 @@ svga_reemit_framebuffer_bindings(struct svga_context *svga)
      }
   }

-   svga->rebind.rendertargets = FALSE;
+   return PIPE_OK;
+}
+
+
+
+static enum pipe_error
+emit_fb_vgpu10(struct svga_context *svga)
+{
+   const struct svga_screen *ss = svga_screen(svga->pipe.screen);
+   struct pipe_surface *rtv[SVGA3D_MAX_RENDER_TARGETS];
+   struct pipe_surface *dsv;
+   struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
+   struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+   const unsigned num_color = MAX2(curr->nr_cbufs, hw->nr_cbufs);
+   unsigned i;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+
+   /* Setup render targets array.  Note that we loop over the max of the
+    * number of previously bound buffers and the new buffers to unbind
+    * any previously bound buffers when the new number of buffers is less
+    * than the old number of buffers.
+    */
+   for (i = 0; i < num_color; i++) {
+      if (curr->cbufs[i]) {
+         rtv[i] = svga_validate_surface_view(svga,
+                                             svga_surface(curr->cbufs[i]));
+         if (rtv[i] == NULL) {
+            return PIPE_ERROR_OUT_OF_MEMORY;
+         }
+
+         assert(svga_surface(rtv[i])->view_id != SVGA3D_INVALID_ID);
+      }
+      else {
+         rtv[i] = NULL;
+      }
+   }
+
+   /* Setup depth stencil view */
+   if (curr->zsbuf) {
+      dsv = svga_validate_surface_view(svga, svga_surface(curr->zsbuf));
+      if (dsv == NULL) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+   }
+   else {
+      dsv = NULL;
+   }
+
+   ret = SVGA3D_vgpu10_SetRenderTargets(svga->swc, num_color, rtv, dsv);
+   if (ret != PIPE_OK)
+      return ret;
+
+   for (i = 0; i < ss->max_color_buffers; i++) {
+      if (hw->cbufs[i] != curr->cbufs[i]) {
+         /* propagate the backed view surface before unbinding it */
+         if (hw->cbufs[i] && svga_surface(hw->cbufs[i])->backed) {
+            svga_propagate_surface(svga,
+                                   &svga_surface(hw->cbufs[i])->backed->base);
+         }
+         pipe_surface_reference(&hw->cbufs[i], curr->cbufs[i]);
+      }
+   }
+   hw->nr_cbufs = curr->nr_cbufs;
+
+   if (hw->zsbuf != curr->zsbuf) {
+      /* propagate the backed view surface before unbinding it */
+      if (hw->zsbuf && svga_surface(hw->zsbuf)->backed) {
+         svga_propagate_surface(svga, &svga_surface(hw->zsbuf)->backed->base);
+      }
+      pipe_surface_reference(&hw->zsbuf, curr->zsbuf);
+   }
+
+   return ret;
+}
+
+
+static enum pipe_error
+emit_framebuffer(struct svga_context *svga, unsigned dirty)
+{
+   if (svga_have_vgpu10(svga)) {
+      return emit_fb_vgpu10(svga);
+   }
+   else {
+      return emit_fb_vgpu9(svga);
+   }
+}
+
+
+/*
+ * Rebind rendertargets.
+ *
+ * Similar to emit_framebuffer, but without any state checking/update.
+ *
+ * Called at the beginning of every new command buffer to ensure that
+ * non-dirty rendertargets are properly paged-in.
+ */
+enum pipe_error
+svga_reemit_framebuffer_bindings(struct svga_context *svga)
+{
+   enum pipe_error ret;
+
+   assert(svga->rebind.flags.rendertargets);
+
+   if (svga_have_vgpu10(svga)) {
+      ret = emit_fb_vgpu10(svga);
+   }
+   else {
+      ret = svga_reemit_framebuffer_bindings_vgpu9(svga);
+   }
+
+   svga->rebind.flags.rendertargets = FALSE;
+
+   return ret;
+}
+
+
+/*
+ * Send a private allocation command to page in rendertargets resource.
+ */
+enum pipe_error
+svga_rebind_framebuffer_bindings(struct svga_context *svga)
+{
+   const struct svga_screen *ss = svga_screen(svga->pipe.screen);
+   struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+   unsigned i;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+
+   if (!svga->rebind.flags.rendertargets)
+      return PIPE_OK;
+
+   for (i = 0; i < ss->max_color_buffers; i++) {
+      if (hw->cbufs[i]) {
+         ret = svga->swc->resource_rebind(svga->swc,
+                                          svga_surface(hw->cbufs[i])->handle,
+                                          NULL,
+                                          SVGA_RELOC_WRITE);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+   }
+
+   if (hw->zsbuf) {
+      ret = svga->swc->resource_rebind(svga->swc,
+                                       svga_surface(hw->zsbuf)->handle,
+                                       NULL,
+                                       SVGA_RELOC_WRITE);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+
+   svga->rebind.flags.rendertargets = 0;

   return PIPE_OK;
 }
@ -202,6 +351,7 @@ emit_viewport( struct svga_context *svga,
   float fy = flip * viewport->scale[1] * -1.0f + viewport->translate[1];
   float fw =        viewport->scale[0] * 2.0f;
   float fh = flip * viewport->scale[1] * 2.0f;
+   boolean emit_vgpu10_viewport = FALSE;

   memset( &prescale, 0, sizeof(prescale) );

@ -225,7 +375,16 @@ emit_viewport( struct svga_context *svga,
   prescale.translate[1] = 0;
   prescale.translate[2] = 0;
   prescale.translate[3] = 0;
-   prescale.enabled = TRUE;
+
+   /* Enable prescale to adjust vertex positions to match
+      VGPU10 convention only if rasterization is enabled.
+    */
+   if (svga->curr.rast->templ.rasterizer_discard) {
+      degenerate = TRUE;
+      goto out;
+   } else {
+      prescale.enabled = TRUE;
+   }

   if (fw < 0) {
      prescale.scale[0] *= -1.0f;
@ -235,7 +394,14 @@ emit_viewport( struct svga_context *svga,
   }

   if (fh < 0.0) {
-      prescale.translate[1] = fh - 1.0f + fy * 2.0f;
+      if (svga_have_vgpu10(svga)) {
+         /* floating point viewport params below */
+         prescale.translate[1] = fh + fy * 2.0f;
+      }
+      else {
+         /* integer viewport params below */
+         prescale.translate[1] = fh - 1.0f + fy * 2.0f;
+      }
      fh = -fh;
      fy -= fh;
      prescale.scale[1] = -1.0f;
@ -321,19 +487,31 @@ emit_viewport( struct svga_context *svga,
      float adjust_x = 0.0;
      float adjust_y = 0.0;

-      switch (svga->curr.reduced_prim) {
-      case PIPE_PRIM_POINTS:
-         adjust_x = -0.375;
-         adjust_y = -0.75;
-         break;
-      case PIPE_PRIM_LINES:
-         adjust_x = -0.5;
-         adjust_y = 0;
-         break;
-      case PIPE_PRIM_TRIANGLES:
-         adjust_x = -0.5;
-         adjust_y = -0.5;
-         break;
+      if (svga_have_vgpu10(svga)) {
+         /* Normally, we don't have to do any sub-pixel coordinate
+          * adjustments for VGPU10.  But when we draw wide points with
+          * a GS we need an X adjustment in order to be conformant.
+          */
+         if (svga->curr.reduced_prim == PIPE_PRIM_POINTS &&
+             svga->curr.rast->pointsize > 1.0f) {
+            adjust_x = 0.5;
+         }
+      }
+      else {
+         switch (svga->curr.reduced_prim) {
+         case PIPE_PRIM_POINTS:
+            adjust_x = -0.375;
+            adjust_y = -0.75;
+            break;
+         case PIPE_PRIM_LINES:
+            adjust_x = -0.5;
+            adjust_y = 0;
+            break;
+         case PIPE_PRIM_TRIANGLES:
+            adjust_x = -0.5;
+            adjust_y = -0.5;
+            break;
+         }
      }

      if (invertY)
@ -360,6 +538,17 @@ emit_viewport( struct svga_context *svga,
      prescale.scale[2] = -prescale.scale[2];
   }

+   /* If zmin is less than 0, clamp zmin to 0 and adjust the prescale.
+    * zmin can be set to -1 when viewport->scale[2] is set to 1 and
+    * viewport->translate[2] is set to 0 in the blit code.
+    */
+   if (range_min < 0.0f) {
+      range_min = -0.5f * viewport->scale[2] + 0.5f + viewport->translate[2];
+      range_max = 0.5f * viewport->scale[2] + 0.5f + viewport->translate[2];
+      prescale.scale[2] *= 2.0f;
+      prescale.translate[2] -= 0.5f;
+   }
+
   if (prescale.enabled) {
      float H[2];
      float J[2];
@ -428,21 +617,49 @@ out:
      prescale.enabled = FALSE;
   }

-   if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) {
-      ret = SVGA3D_SetViewport(svga->swc, &rect);
-      if(ret != PIPE_OK)
-         return ret;
+   if (!svga_rects_equal(&rect, &svga->state.hw_clear.viewport)) {
+      if (svga_have_vgpu10(svga)) {
+         emit_vgpu10_viewport = TRUE;
+      }
+      else {
+         ret = SVGA3D_SetViewport(svga->swc, &rect);
+         if (ret != PIPE_OK)
+            return ret;

-      memcpy(&svga->state.hw_clear.viewport, &rect, sizeof(rect));
-      assert(sizeof(rect) == sizeof(svga->state.hw_clear.viewport));
+         svga->state.hw_clear.viewport = rect;
+      }
   }

   if (svga->state.hw_clear.depthrange.zmin != range_min ||
-       svga->state.hw_clear.depthrange.zmax != range_max) {
-      ret = SVGA3D_SetZRange(svga->swc, range_min, range_max );
-      if(ret != PIPE_OK)
+       svga->state.hw_clear.depthrange.zmax != range_max)
+   {
+      if (svga_have_vgpu10(svga)) {
+         emit_vgpu10_viewport = TRUE;
+      }
+      else {
+         ret = SVGA3D_SetZRange(svga->swc, range_min, range_max );
+         if (ret != PIPE_OK)
+            return ret;
+
+         svga->state.hw_clear.depthrange.zmin = range_min;
+         svga->state.hw_clear.depthrange.zmax = range_max;
+      }
+   }
+
+   if (emit_vgpu10_viewport) {
+      SVGA3dViewport vp;
+      vp.x = (float) rect.x;
+      vp.y = (float) rect.y;
+      vp.width = (float) rect.w;
+      vp.height = (float) rect.h;
+      vp.minDepth = range_min;
+      vp.maxDepth = range_max;
+      ret = SVGA3D_vgpu10_SetViewports(svga->swc, 1, &vp);
+      if (ret != PIPE_OK)
         return ret;

+      svga->state.hw_clear.viewport = rect;
+
      svga->state.hw_clear.depthrange.zmin = range_min;
      svga->state.hw_clear.depthrange.zmax = range_max;
   }
@ -475,14 +692,27 @@ emit_scissor_rect( struct svga_context *svga,
                   unsigned dirty )
 {
   const struct pipe_scissor_state *scissor = &svga->curr.scissor;
-   SVGA3dRect rect;

-   rect.x = scissor->minx;
-   rect.y = scissor->miny;
-   rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */
-   rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */
+   if (svga_have_vgpu10(svga)) {
+      SVGASignedRect rect;

-   return SVGA3D_SetScissorRect(svga->swc, &rect);
+      rect.left = scissor->minx;
+      rect.top = scissor->miny;
+      rect.right = scissor->maxx;
+      rect.bottom = scissor->maxy;
+
+      return SVGA3D_vgpu10_SetScissorRects(svga->swc, 1, &rect);
+   }
+   else {
+      SVGA3dRect rect;
+
+      rect.x = scissor->minx;
+      rect.y = scissor->miny;
+      rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */
+      rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */
+
+      return SVGA3D_SetScissorRect(svga->swc, &rect);
+   }
 }


@ -527,9 +757,15 @@ emit_clip_planes( struct svga_context *svga,
      plane[2] = 2.0f * c;
      plane[3] = d - c;

-      ret = SVGA3D_SetClipPlane(svga->swc, i, plane);
-      if(ret != PIPE_OK)
-         return ret;
+      if (svga_have_vgpu10(svga)) {
+         //debug_printf("XXX emit DX10 clip plane\n");
+         ret = PIPE_OK;
+      }
+      else {
+         ret = SVGA3D_SetClipPlane(svga->swc, i, plane);
+         if (ret != PIPE_OK)
+            return ret;
+      }
   }

   return PIPE_OK;
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@ -36,43 +36,12 @@
 #include "svga_shader.h"
 #include "svga_resource_texture.h"
 #include "svga_tgsi.h"
+#include "svga_format.h"

 #include "svga_hw_reg.h"



-static inline int
-compare_fs_keys(const struct svga_fs_compile_key *a,
-                const struct svga_fs_compile_key *b)
-{
-   unsigned keysize_a = svga_fs_key_size( a );
-   unsigned keysize_b = svga_fs_key_size( b );
-
-   if (keysize_a != keysize_b) {
-      return (int)(keysize_a - keysize_b);
-   }
-   return memcmp( a, b, keysize_a );
-}
-
-
-/** Search for a fragment shader variant */
-static struct svga_shader_variant *
-search_fs_key(const struct svga_fragment_shader *fs,
-              const struct svga_fs_compile_key *key)
-{
-   struct svga_shader_variant *variant = fs->base.variants;
-
-   assert(key);
-
-   for ( ; variant; variant = variant->next) {
-      if (compare_fs_keys( key, &variant->key.fkey ) == 0)
-         return variant;
-   }
-   
-   return NULL;
-}
-
-
 /**
 * If we fail to compile a fragment shader (because it uses too many
 * registers, for example) we'll use a dummy/fallback shader that
@ -111,13 +80,29 @@ get_dummy_fragment_shader(void)
 }


+static struct svga_shader_variant *
+translate_fragment_program(struct svga_context *svga,
+                           const struct svga_fragment_shader *fs,
+                           const struct svga_compile_key *key)
+{
+   if (svga_have_vgpu10(svga)) {
+      return svga_tgsi_vgpu10_translate(svga, &fs->base, key,
+                                        PIPE_SHADER_FRAGMENT);
+   }
+   else {
+      return svga_tgsi_vgpu9_translate(&fs->base, key, PIPE_SHADER_FRAGMENT);
+   }
+}
+
+
 /**
 * Replace the given shader's instruction with a simple constant-color
 * shader.  We use this when normal shader translation fails.
 */
 static struct svga_shader_variant *
-get_compiled_dummy_shader(struct svga_fragment_shader *fs,
-                          const struct svga_fs_compile_key *key)
+get_compiled_dummy_shader(struct svga_context *svga,
+                          struct svga_fragment_shader *fs,
+                          const struct svga_compile_key *key)
 {
   const struct tgsi_token *dummy = get_dummy_fragment_shader();
   struct svga_shader_variant *variant;
@ -129,7 +114,7 @@ get_compiled_dummy_shader(struct svga_fragment_shader *fs,
   FREE((void *) fs->base.tokens);
   fs->base.tokens = dummy;

-   variant = svga_translate_fragment_program(fs, key);
+   variant = translate_fragment_program(svga, fs, key);
   return variant;
 }

@ -140,17 +125,17 @@ get_compiled_dummy_shader(struct svga_fragment_shader *fs,
 static enum pipe_error
 compile_fs(struct svga_context *svga,
           struct svga_fragment_shader *fs,
-           const struct svga_fs_compile_key *key,
+           const struct svga_compile_key *key,
           struct svga_shader_variant **out_variant)
 {
   struct svga_shader_variant *variant;
   enum pipe_error ret = PIPE_ERROR;

-   variant = svga_translate_fragment_program( fs, key );
+   variant = translate_fragment_program(svga, fs, key);
   if (variant == NULL) {
      debug_printf("Failed to compile fragment shader,"
                   " using dummy shader instead.\n");
-      variant = get_compiled_dummy_shader(fs, key);
+      variant = get_compiled_dummy_shader(svga, fs, key);
      if (!variant) {
         ret = PIPE_ERROR;
         goto fail;
@ -159,10 +144,11 @@ compile_fs(struct svga_context *svga,

   if (svga_shader_too_large(svga, variant)) {
      /* too big, use dummy shader */
-      debug_printf("Shader too large (%lu bytes),"
+      debug_printf("Shader too large (%u bytes),"
                   " using dummy shader instead.\n",
-                   (unsigned long ) variant->nr_tokens * sizeof(variant->tokens[0]));
-      variant = get_compiled_dummy_shader(fs, key);
+                   (unsigned) (variant->nr_tokens
+                               * sizeof(variant->tokens[0])));
+      variant = get_compiled_dummy_shader(svga, fs, key);
      if (!variant) {
         ret = PIPE_ERROR;
         goto fail;
@ -197,23 +183,45 @@ fail:
 static enum pipe_error
 make_fs_key(const struct svga_context *svga,
            struct svga_fragment_shader *fs,
-            struct svga_fs_compile_key *key)
+            struct svga_compile_key *key)
 {
+   const unsigned shader = PIPE_SHADER_FRAGMENT;
   unsigned i;
-   int idx = 0;

   memset(key, 0, sizeof *key);

+   memcpy(key->generic_remap_table, fs->generic_remap_table,
+          sizeof(fs->generic_remap_table));
+
+   /* SVGA_NEW_GS, SVGA_NEW_VS
+    */
+   if (svga->curr.gs) {
+      key->fs.gs_generic_outputs = svga->curr.gs->generic_outputs;
+   } else {
+      key->fs.vs_generic_outputs = svga->curr.vs->generic_outputs;
+   }
+
   /* Only need fragment shader fixup for twoside lighting if doing
    * hwtnl.  Otherwise the draw module does the whole job for us.
    *
    * SVGA_NEW_SWTNL
    */
   if (!svga->state.sw.need_swtnl) {
-      /* SVGA_NEW_RAST
+      /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE
       */
-      key->light_twoside = svga->curr.rast->templ.light_twoside;
-      key->front_ccw = svga->curr.rast->templ.front_ccw;
+      key->fs.light_twoside = svga->curr.rast->templ.light_twoside;
+      key->fs.front_ccw = svga->curr.rast->templ.front_ccw;
+      key->fs.pstipple = (svga->curr.rast->templ.poly_stipple_enable &&
+                          svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES);
+      key->fs.aa_point = (svga->curr.rast->templ.point_smooth &&
+                          svga->curr.reduced_prim == PIPE_PRIM_POINTS &&
+                          (svga->curr.rast->pointsize > 1.0 ||
+                           svga->curr.vs->base.info.writes_psize));
+      if (key->fs.aa_point) {
+         assert(svga->curr.gs != NULL);
+         assert(svga->curr.gs->aa_point_coord_index != -1);
+         key->fs.aa_point_coord_index = svga->curr.gs->aa_point_coord_index;
+      }
   }

   /* The blend workaround for simulating logicop xor behaviour
@ -231,7 +239,7 @@ make_fs_key(const struct svga_context *svga,
    * SVGA_NEW_BLEND
    */
   if (svga->curr.blend->need_white_fragments) {
-      key->white_fragments = 1;
+      key->fs.white_fragments = 1;
   }

 #ifdef DEBUG
@ -241,22 +249,23 @@ make_fs_key(const struct svga_context *svga,
    */
   {
      static boolean warned = FALSE;
-      unsigned i, n = MAX2(svga->curr.num_sampler_views,
-                           svga->curr.num_samplers);
+      unsigned i, n = MAX2(svga->curr.num_sampler_views[shader],
+                           svga->curr.num_samplers[shader]);
      /* Only warn once to prevent too much debug output */
      if (!warned) {
-         if (svga->curr.num_sampler_views != svga->curr.num_samplers) {
+         if (svga->curr.num_sampler_views[shader] !=
+             svga->curr.num_samplers[shader]) {
            debug_printf("svga: mismatched number of sampler views (%u) "
                         "vs. samplers (%u)\n",
-                         svga->curr.num_sampler_views,
-                         svga->curr.num_samplers);
+                         svga->curr.num_sampler_views[shader],
+                         svga->curr.num_samplers[shader]);
         }
         for (i = 0; i < n; i++) {
-            if ((svga->curr.sampler_views[i] == NULL) !=
-                (svga->curr.sampler[i] == NULL))
+            if ((svga->curr.sampler_views[shader][i] == NULL) !=
+                (svga->curr.sampler[shader][i] == NULL))
               debug_printf("sampler_view[%u] = %p but sampler[%u] = %p\n",
-                            i, svga->curr.sampler_views[i],
-                            i, svga->curr.sampler[i]);
+                            i, svga->curr.sampler_views[shader][i],
+                            i, svga->curr.sampler[shader][i]);
         }
         warned = TRUE;
      }
@ -268,68 +277,62 @@ make_fs_key(const struct svga_context *svga,
    *
    * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
    */
-   for (i = 0; i < svga->curr.num_sampler_views; i++) {
-      if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) {
-         assert(svga->curr.sampler_views[i]->texture);
-         key->tex[i].texture_target = svga->curr.sampler_views[i]->texture->target;
-         if (!svga->curr.sampler[i]->normalized_coords) {
-            key->tex[i].width_height_idx = idx++;
-            key->tex[i].unnormalized = TRUE;
-            ++key->num_unnormalized_coords;
-         }
+   svga_init_shader_key_common(svga, shader, key);

-         key->tex[i].swizzle_r = svga->curr.sampler_views[i]->swizzle_r;
-         key->tex[i].swizzle_g = svga->curr.sampler_views[i]->swizzle_g;
-         key->tex[i].swizzle_b = svga->curr.sampler_views[i]->swizzle_b;
-         key->tex[i].swizzle_a = svga->curr.sampler_views[i]->swizzle_a;
-      }
-   }
-   key->num_textures = svga->curr.num_sampler_views;
+   for (i = 0; i < svga->curr.num_samplers[shader]; ++i) {
+      struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
+      const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
+      if (view) {
+         struct pipe_resource *tex = view->texture;
+         if (tex->target != PIPE_BUFFER) {
+            struct svga_texture *stex = svga_texture(tex);
+            SVGA3dSurfaceFormat format = stex->key.format;

-   idx = 0;
-   for (i = 0; i < svga->curr.num_samplers; ++i) {
-      if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) {
-         struct pipe_resource *tex = svga->curr.sampler_views[i]->texture;
-         struct svga_texture *stex = svga_texture(tex);
-         SVGA3dSurfaceFormat format = stex->key.format;
-
-         if (format == SVGA3D_Z_D16 ||
-             format == SVGA3D_Z_D24X8 ||
-             format == SVGA3D_Z_D24S8) {
-            /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8,
-             * or SVGA3D_Z_D24S8 surface, we'll automatically get
-             * shadow comparison.  But we only get LEQUAL mode.
-             * Set TEX_COMPARE_NONE here so we don't emit the extra FS
-             * code for shadow comparison.
-             */
-            key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE;
-            key->tex[i].compare_func = PIPE_FUNC_NEVER;
-            /* These depth formats _only_ support comparison mode and
-             * not ordinary sampling so warn if the later is expected.
-             */
-            if (svga->curr.sampler[i]->compare_mode !=
-                PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               debug_warn_once("Unsupported shadow compare mode");
-            }                   
-            /* The only supported comparison mode is LEQUAL */
-            if (svga->curr.sampler[i]->compare_func != PIPE_FUNC_LEQUAL) {
-               debug_warn_once("Unsupported shadow compare function");
+            if (!svga_have_vgpu10(svga) &&
+                (format == SVGA3D_Z_D16 ||
+                 format == SVGA3D_Z_D24X8 ||
+                 format == SVGA3D_Z_D24S8)) {
+               /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8,
+                * or SVGA3D_Z_D24S8 surface, we'll automatically get
+                * shadow comparison.  But we only get LEQUAL mode.
+                * Set TEX_COMPARE_NONE here so we don't emit the extra FS
+                * code for shadow comparison.
+                */
+               key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE;
+               key->tex[i].compare_func = PIPE_FUNC_NEVER;
+               /* These depth formats _only_ support comparison mode and
+                * not ordinary sampling so warn if the later is expected.
+                */
+               if (sampler->compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+                  debug_warn_once("Unsupported shadow compare mode");
+               }
+               /* The shader translation code can emit code to
+                * handle ALWAYS and NEVER compare functions
+                */
+               else if (sampler->compare_func == PIPE_FUNC_ALWAYS ||
+                        sampler->compare_func == PIPE_FUNC_NEVER) {
+                  key->tex[i].compare_mode = sampler->compare_mode;
+                  key->tex[i].compare_func = sampler->compare_func;
+               }
+               else if (sampler->compare_func != PIPE_FUNC_LEQUAL) {
+                  debug_warn_once("Unsupported shadow compare function");
+               }
+            }
+            else {
+               /* For other texture formats, just use the compare func/mode
+                * as-is.  Should be no-ops for color textures.  For depth
+                * textures, we do not get automatic depth compare.  We have
+                * to do it ourselves in the shader.  And we don't get PCF.
+                */
+               key->tex[i].compare_mode = sampler->compare_mode;
+               key->tex[i].compare_func = sampler->compare_func;
            }
-         }
-         else {
-            /* For other texture formats, just use the compare func/mode
-             * as-is.  Should be no-ops for color textures.  For depth
-             * textures, we do not get automatic depth compare.  We have
-             * to do it ourselves in the shader.  And we don't get PCF.
-             */
-            key->tex[i].compare_mode = svga->curr.sampler[i]->compare_mode;
-            key->tex[i].compare_func = svga->curr.sampler[i]->compare_func;
         }
      }
   }

   /* sprite coord gen state */
-   for (i = 0; i < svga->curr.num_samplers; ++i) {
+   for (i = 0; i < svga->curr.num_samplers[shader]; ++i) {
      key->tex[i].sprite_texgen =
         svga->curr.rast->templ.sprite_coord_enable & (1 << i);
   }
@ -337,10 +340,25 @@ make_fs_key(const struct svga_context *svga,
   key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode
                                    == PIPE_SPRITE_COORD_LOWER_LEFT);

+   key->fs.flatshade = svga->curr.rast->templ.flatshade;
+
+   /* SVGA_NEW_DEPTH_STENCIL_ALPHA */
+   if (svga_have_vgpu10(svga)) {
+      /* Alpha testing is not supported in integer-valued render targets. */
+      if (svga_has_any_integer_cbufs(svga)) {
+         key->fs.alpha_func = SVGA3D_CMP_ALWAYS;
+         key->fs.alpha_ref = 0;
+      }
+      else {
+         key->fs.alpha_func = svga->curr.depth->alphafunc;
+         key->fs.alpha_ref = svga->curr.depth->alpharef;
+      }
+   }
+
   /* SVGA_NEW_FRAME_BUFFER */
   if (fs->base.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
      /* Replicate color0 output to N colorbuffers */
-      key->write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs;
+      key->fs.write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs;
   }

   return PIPE_OK;
@ -355,18 +373,32 @@ svga_reemit_fs_bindings(struct svga_context *svga)
 {
   enum pipe_error ret;

-   assert(svga->rebind.fs);
+   assert(svga->rebind.flags.fs);
   assert(svga_have_gb_objects(svga));

   if (!svga->state.hw_draw.fs)
      return PIPE_OK;

-   ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
-                            svga->state.hw_draw.fs->gb_shader);
+   if (!svga_need_to_rebind_resources(svga)) {
+      ret =  svga->swc->resource_rebind(svga->swc, NULL,
+                                        svga->state.hw_draw.fs->gb_shader,
+                                        SVGA_RELOC_READ);
+      goto out;
+   }
+
+   if (svga_have_vgpu10(svga))
+      ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS,
+                                    svga->state.hw_draw.fs->gb_shader,
+                                    svga->state.hw_draw.fs->id);
+   else
+      ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
+                               svga->state.hw_draw.fs->gb_shader);
+
+ out:
   if (ret != PIPE_OK)
      return ret;

-   svga->rebind.fs = FALSE;
+   svga->rebind.flags.fs = FALSE;
   return PIPE_OK;
 }

@ -378,7 +410,7 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty)
   struct svga_shader_variant *variant = NULL;
   enum pipe_error ret = PIPE_OK;
   struct svga_fragment_shader *fs = svga->curr.fs;
-   struct svga_fs_compile_key key;
+   struct svga_compile_key key;

   /* SVGA_NEW_BLEND
    * SVGA_NEW_TEXTURE_BINDING
@ -386,14 +418,16 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty)
    * SVGA_NEW_NEED_SWTNL
    * SVGA_NEW_SAMPLER
    * SVGA_NEW_FRAME_BUFFER
+    * SVGA_NEW_DEPTH_STENCIL_ALPHA
+    * SVGA_NEW_VS
    */
-   ret = make_fs_key( svga, fs, &key );
+   ret = make_fs_key(svga, fs, &key);
   if (ret != PIPE_OK)
      return ret;

-   variant = search_fs_key( fs, &key );
+   variant = svga_search_shader_key(&fs->base, &key);
   if (!variant) {
-      ret = compile_fs( svga, fs, &key, &variant );
+      ret = compile_fs(svga, fs, &key, &variant);
      if (ret != PIPE_OK)
         return ret;
   }
@ -401,22 +435,14 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty)
   assert(variant);

   if (variant != svga->state.hw_draw.fs) {
-      if (svga_have_gb_objects(svga)) {
-         ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
-                                  variant->gb_shader);
-         if (ret != PIPE_OK)
-            return ret;
+      ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, variant);
+      if (ret != PIPE_OK)
+         return ret;

-         svga->rebind.fs = FALSE;
-      }
-      else {
-         ret = SVGA3D_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS, variant->id);
-         if (ret != PIPE_OK)
-            return ret;
-      }
+      svga->rebind.flags.fs = FALSE;

      svga->dirty |= SVGA_NEW_FS_VARIANT;
-      svga->state.hw_draw.fs = variant;      
+      svga->state.hw_draw.fs = variant;
   }

   return PIPE_OK;
@ -426,11 +452,15 @@ struct svga_tracked_state svga_hw_fs =
 {
   "fragment shader (hwtnl)",
   (SVGA_NEW_FS |
+    SVGA_NEW_GS |
+    SVGA_NEW_VS |
    SVGA_NEW_TEXTURE_BINDING |
    SVGA_NEW_NEED_SWTNL |
    SVGA_NEW_RAST |
+    SVGA_NEW_REDUCED_PRIMITIVE |
    SVGA_NEW_SAMPLER |
    SVGA_NEW_FRAME_BUFFER |
+    SVGA_NEW_DEPTH_STENCIL_ALPHA |
    SVGA_NEW_BLEND),
   emit_hw_fs
 };
--- a/src/gallium/drivers/svga/svga_state_need_swtnl.c
+++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c
@ -26,6 +26,7 @@
 #include "util/u_inlines.h"
 #include "pipe/p_state.h"
 #include "svga_context.h"
+#include "svga_shader.h"
 #include "svga_state.h"
 #include "svga_debug.h"
 #include "svga_hw_reg.h"
@ -91,7 +92,7 @@ update_need_pipeline(struct svga_context *svga, unsigned dirty)
      unsigned generic_inputs =
         svga->curr.fs ? svga->curr.fs->generic_inputs : 0;

-      if (sprite_coord_gen &&
+      if (!svga_have_vgpu10(svga) && sprite_coord_gen &&
          (generic_inputs & ~sprite_coord_gen)) {
         /* The fragment shader is using some generic inputs that are
          * not being replaced by auto-generated point/sprite coords (and
--- a/src/gallium/drivers/svga/svga_state_rss.c
+++ b/src/gallium/drivers/svga/svga_state_rss.c
@ -23,16 +23,20 @@
 *
 **********************************************************/

+#include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
 #include "util/u_format.h"
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
-#include "pipe/p_defines.h"
 #include "util/u_math.h"
+#include "util/u_memory.h"

 #include "svga_context.h"
 #include "svga_screen.h"
 #include "svga_state.h"
 #include "svga_cmd.h"
+#include "svga_format.h"
+#include "svga_shader.h"


 struct rs_queue {
@ -77,7 +81,7 @@ svga_queue_rs( struct rs_queue *q,
 * the "to" state.
 */
 static enum pipe_error
-emit_rss(struct svga_context *svga, unsigned dirty)
+emit_rss_vgpu9(struct svga_context *svga, unsigned dirty)
 {
   struct svga_screen *screen = svga_screen(svga->pipe.screen);
   struct rs_queue queue;
@ -85,7 +89,7 @@ emit_rss(struct svga_context *svga, unsigned dirty)

   queue.rs_count = 0;

-   if (dirty & SVGA_NEW_BLEND) {
+   if (dirty & (SVGA_NEW_BLEND | SVGA_NEW_BLEND_COLOR)) {
      const struct svga_blend_state *curr = svga->curr.blend;

      EMIT_RS( svga, curr->rt[0].writemask, COLORWRITEENABLE, fail );
@ -119,7 +123,7 @@ emit_rss(struct svga_context *svga, unsigned dirty)
      EMIT_RS( svga, color, BLENDCOLOR, fail );
   }

-   if (dirty & (SVGA_NEW_DEPTH_STENCIL | SVGA_NEW_RAST)) {
+   if (dirty & (SVGA_NEW_DEPTH_STENCIL_ALPHA | SVGA_NEW_RAST)) {
      const struct svga_depth_stencil_state *curr = svga->curr.depth; 
      const struct svga_rasterizer_state *rast = svga->curr.rast; 

@ -300,6 +304,151 @@ fail:
   return PIPE_ERROR_OUT_OF_MEMORY;
 }

+/** Returns a non-culling rasterizer state object to be used with
+ *  point sprite.
+ */
+static struct svga_rasterizer_state *
+get_no_cull_rasterizer_state(struct svga_context *svga)
+{
+   const struct svga_rasterizer_state *r = svga->curr.rast;
+   unsigned int aa_point = r->templ.point_smooth;
+
+   if (!svga->rasterizer_no_cull[aa_point]) {
+      struct pipe_rasterizer_state rast;
+
+      memset(&rast, 0, sizeof(rast));
+      rast.flatshade = 1;
+      rast.front_ccw = 1;
+      rast.point_smooth = r->templ.point_smooth;
+
+      /* All rasterizer states have the same half_pixel_center,
+       * bottom_edge_rule and clip_halfz values since they are
+       * constant for a context. If we ever implement
+       * GL_ARB_clip_control, the clip_halfz field would have to be observed.
+       */
+      rast.half_pixel_center = r->templ.half_pixel_center;
+      rast.bottom_edge_rule = r->templ.bottom_edge_rule;
+      rast.clip_halfz = r->templ.clip_halfz;
+
+      svga->rasterizer_no_cull[aa_point] =
+               svga->pipe.create_rasterizer_state(&svga->pipe, &rast);
+   }
+   return svga->rasterizer_no_cull[aa_point];
+}
+
+static enum pipe_error
+emit_rss_vgpu10(struct svga_context *svga, unsigned dirty)
+{
+   enum pipe_error ret = PIPE_OK;
+
+   svga_hwtnl_flush_retry(svga);
+
+   if (dirty & (SVGA_NEW_BLEND | SVGA_NEW_BLEND_COLOR)) {
+      const struct svga_blend_state *curr;
+      float blend_factor[4];
+
+      if (svga_has_any_integer_cbufs(svga)) {
+         /* Blending is not supported in integer-valued render targets. */
+         curr = svga->noop_blend;
+         blend_factor[0] =
+         blend_factor[1] =
+         blend_factor[2] =
+         blend_factor[3] = 0;
+      }
+      else {
+         curr = svga->curr.blend;
+
+         if (curr->blend_color_alpha) {
+            blend_factor[0] =
+            blend_factor[1] =
+            blend_factor[2] =
+            blend_factor[3] = svga->curr.blend_color.color[3];
+         }
+         else {
+            blend_factor[0] = svga->curr.blend_color.color[0];
+            blend_factor[1] = svga->curr.blend_color.color[1];
+            blend_factor[2] = svga->curr.blend_color.color[2];
+            blend_factor[3] = svga->curr.blend_color.color[3];
+         }
+      }
+
+      /* Set/bind the blend state object */
+      if (svga->state.hw_draw.blend_id != curr->id ||
+          svga->state.hw_draw.blend_factor[0] != blend_factor[0] ||
+          svga->state.hw_draw.blend_factor[1] != blend_factor[1] ||
+          svga->state.hw_draw.blend_factor[2] != blend_factor[2] ||
+          svga->state.hw_draw.blend_factor[3] != blend_factor[3] ||
+          svga->state.hw_draw.blend_sample_mask != svga->curr.sample_mask) {
+         ret = SVGA3D_vgpu10_SetBlendState(svga->swc, curr->id,
+                                           blend_factor,
+                                           svga->curr.sample_mask);
+         if (ret != PIPE_OK)
+            return ret;
+
+         svga->state.hw_draw.blend_id = curr->id;
+         svga->state.hw_draw.blend_factor[0] = blend_factor[0];
+         svga->state.hw_draw.blend_factor[1] = blend_factor[1];
+         svga->state.hw_draw.blend_factor[2] = blend_factor[2];
+         svga->state.hw_draw.blend_factor[3] = blend_factor[3];
+         svga->state.hw_draw.blend_sample_mask = svga->curr.sample_mask;
+      }
+   }
+
+   if (dirty & (SVGA_NEW_DEPTH_STENCIL_ALPHA | SVGA_NEW_STENCIL_REF)) {
+      const struct svga_depth_stencil_state *curr = svga->curr.depth;
+      unsigned curr_ref = svga->curr.stencil_ref.ref_value[0];
+
+      if (curr->id != svga->state.hw_draw.depth_stencil_id ||
+          curr_ref != svga->state.hw_draw.stencil_ref) {
+         /* Set/bind the depth/stencil state object */
+         ret = SVGA3D_vgpu10_SetDepthStencilState(svga->swc, curr->id,
+                                                  curr_ref);
+         if (ret != PIPE_OK)
+            return ret;
+
+         svga->state.hw_draw.depth_stencil_id = curr->id;
+         svga->state.hw_draw.stencil_ref = curr_ref;
+      }
+   }
+
+   if (dirty & (SVGA_NEW_REDUCED_PRIMITIVE | SVGA_NEW_RAST)) {
+      const struct svga_rasterizer_state *rast;
+
+      if (svga->curr.reduced_prim == PIPE_PRIM_POINTS &&
+          svga->curr.gs && svga->curr.gs->wide_point) {
+
+         /* If we are drawing a point sprite, we will need to
+          * bind a non-culling rasterizer state object
+          */
+         rast = get_no_cull_rasterizer_state(svga);
+      }
+      else {
+         rast = svga->curr.rast;
+      }
+
+      if (svga->state.hw_draw.rasterizer_id != rast->id) {
+         /* Set/bind the rasterizer state object */
+         ret = SVGA3D_vgpu10_SetRasterizerState(svga->swc, rast->id);
+         if (ret != PIPE_OK)
+            return ret;
+         svga->state.hw_draw.rasterizer_id = rast->id;
+      }
+   }
+   return PIPE_OK;
+}
+
+
+static enum pipe_error
+emit_rss(struct svga_context *svga, unsigned dirty)
+{
+   if (svga_have_vgpu10(svga)) {
+      return emit_rss_vgpu10(svga, dirty);
+   }
+   else {
+      return emit_rss_vgpu9(svga, dirty);
+   }
+}
+

 struct svga_tracked_state svga_hw_rss = 
 {
@ -307,11 +456,12 @@ struct svga_tracked_state svga_hw_rss =

   (SVGA_NEW_BLEND |
    SVGA_NEW_BLEND_COLOR |
-    SVGA_NEW_DEPTH_STENCIL |
+    SVGA_NEW_DEPTH_STENCIL_ALPHA |
    SVGA_NEW_STENCIL_REF |
    SVGA_NEW_RAST |
    SVGA_NEW_FRAME_BUFFER |
-    SVGA_NEW_NEED_PIPELINE),
+    SVGA_NEW_NEED_PIPELINE |
+    SVGA_NEW_REDUCED_PRIMITIVE),

   emit_rss
 };
--- a/src/gallium/drivers/svga/svga_state_sampler.c
+++ b/src/gallium/drivers/svga/svga_state_sampler.c
@ -45,7 +45,7 @@


 /** Get resource handle for a texture or buffer */
-static INLINE struct svga_winsys_surface *
+static inline struct svga_winsys_surface *
 svga_resource_handle(struct pipe_resource *res)
 {
   if (res->target == PIPE_BUFFER) {
@ -111,7 +111,7 @@ svga_validate_pipe_sampler_view(struct svga_context *svga,

      if (texture->target == PIPE_BUFFER) {
         viewDesc.buffer.firstElement = sv->base.u.buf.first_element;
-         viewDesc.buffer.numElements = (sv->base.u.buf.last_element - 
+         viewDesc.buffer.numElements = (sv->base.u.buf.last_element -
                                        sv->base.u.buf.first_element + 1);
      }
      else {
@ -122,13 +122,13 @@ svga_validate_pipe_sampler_view(struct svga_context *svga,
      }

      /* arraySize in viewDesc specifies the number of array slices in a
-       * texture array. For 3D texture, last_layer in 
+       * texture array. For 3D texture, last_layer in
       * pipe_sampler_view specifies the last slice of the texture
       * which is different from the last slice in a texture array,
       * hence we need to set arraySize to 1 explicitly.
       */
-      viewDesc.tex.arraySize = 
-         (texture->target == PIPE_TEXTURE_3D || 
+      viewDesc.tex.arraySize =
+         (texture->target == PIPE_TEXTURE_3D ||
          texture->target == PIPE_BUFFER) ? 1 :
            (sv->base.u.tex.last_layer - sv->base.u.tex.first_layer + 1);

--- a/src/gallium/drivers/svga/svga_state_tss.c
+++ b/src/gallium/drivers/svga/svga_state_tss.c
@ -31,24 +31,28 @@
 #include "svga_sampler_view.h"
 #include "svga_winsys.h"
 #include "svga_context.h"
+#include "svga_shader.h"
 #include "svga_state.h"
 #include "svga_cmd.h"


+/**
+ * Called when tearing down a context to free resources and samplers.
+ */
 void svga_cleanup_tss_binding(struct svga_context *svga)
 {
+   const unsigned shader = PIPE_SHADER_FRAGMENT;
   unsigned i;
-   unsigned count = MAX2( svga->curr.num_sampler_views,
-                          svga->state.hw_draw.num_views );

-   for (i = 0; i < count; i++) {
+   for (i = 0; i < Elements(svga->state.hw_draw.views); i++) {
      struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
-
-      svga_sampler_view_reference(&view->v, NULL);
-      pipe_sampler_view_release(&svga->pipe, &svga->curr.sampler_views[i]);
-      pipe_resource_reference( &view->texture, NULL );
-
-      view->dirty = 1;
+      if (view) {
+         svga_sampler_view_reference(&view->v, NULL);
+         pipe_sampler_view_release(&svga->pipe,
+                                   &svga->curr.sampler_views[shader][i]);
+         pipe_resource_reference(&view->texture, NULL);
+         view->dirty = TRUE;
+      }
   }
 }

@ -63,73 +67,113 @@ struct bind_queue {
 };


+/**
+ * Update the texture binding for one texture unit.
+ */
+static void
+emit_tex_binding_unit(struct svga_context *svga,
+                      unsigned unit,
+                      const struct svga_sampler_state *s,
+                      const struct pipe_sampler_view *sv,
+                      struct svga_hw_view_state *view,
+                      boolean reemit,
+                      struct bind_queue *queue)
+{
+   struct pipe_resource *texture = NULL;
+   unsigned last_level, min_lod, max_lod;
+
+   /* get min max lod */
+   if (sv && s) {
+      if (s->mipfilter == SVGA3D_TEX_FILTER_NONE) {
+         /* just use the base level image */
+         min_lod = max_lod = sv->u.tex.first_level;
+      }
+      else {
+         last_level = MIN2(sv->u.tex.last_level, sv->texture->last_level);
+         min_lod = MAX2(0, (s->view_min_lod + sv->u.tex.first_level));
+         min_lod = MIN2(min_lod, last_level);
+         max_lod = MIN2(s->view_max_lod + sv->u.tex.first_level, last_level);
+      }
+      texture = sv->texture;
+   }
+   else {
+      min_lod = 0;
+      max_lod = 0;
+   }
+
+   if (view->texture != texture ||
+       view->min_lod != min_lod ||
+       view->max_lod != max_lod) {
+
+      svga_sampler_view_reference(&view->v, NULL);
+      pipe_resource_reference( &view->texture, texture );
+
+      view->dirty = TRUE;
+      view->min_lod = min_lod;
+      view->max_lod = max_lod;
+
+      if (texture) {
+         view->v = svga_get_tex_sampler_view(&svga->pipe,
+                                             texture,
+                                             min_lod,
+                                             max_lod);
+      }
+   }
+
+   /*
+    * We need to reemit non-null texture bindings, even when they are not
+    * dirty, to ensure that the resources are paged in.
+    */
+   if (view->dirty || (reemit && view->v)) {
+      queue->bind[queue->bind_count].unit = unit;
+      queue->bind[queue->bind_count].view = view;
+      queue->bind_count++;
+   }
+
+   if (!view->dirty && view->v) {
+      svga_validate_sampler_view(svga, view->v);
+   }
+}
+
+
 static enum pipe_error
 update_tss_binding(struct svga_context *svga, 
                   unsigned dirty )
 {
-   boolean reemit = svga->rebind.texture_samplers;
+   const unsigned shader = PIPE_SHADER_FRAGMENT;
+   boolean reemit = svga->rebind.flags.texture_samplers;
   unsigned i;
-   unsigned count = MAX2( svga->curr.num_sampler_views,
+   unsigned count = MAX2( svga->curr.num_sampler_views[shader],
                          svga->state.hw_draw.num_views );
-   unsigned min_lod;
-   unsigned max_lod;

   struct bind_queue queue;

+   if (svga_have_vgpu10(svga))
+      return PIPE_OK;
+
   queue.bind_count = 0;
   
   for (i = 0; i < count; i++) {
-      const struct svga_sampler_state *s = svga->curr.sampler[i];
-      struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
-      struct pipe_resource *texture = NULL;
-      struct pipe_sampler_view *sv = svga->curr.sampler_views[i];
-
-      /* get min max lod */
-      if (sv && s) {
-         min_lod = MAX2(0, (s->view_min_lod + sv->u.tex.first_level));
-         max_lod = MIN2(s->view_max_lod + sv->u.tex.first_level,
-                        sv->texture->last_level);
-         texture = sv->texture;
-      } else {
-         min_lod = 0;
-         max_lod = 0;
-      }
-
-      if (view->texture != texture ||
-          view->min_lod != min_lod ||
-          view->max_lod != max_lod) {
-
-         svga_sampler_view_reference(&view->v, NULL);
-         pipe_resource_reference( &view->texture, texture );
-
-         view->dirty = TRUE;
-         view->min_lod = min_lod;
-         view->max_lod = max_lod;
-
-         if (texture)
-            view->v = svga_get_tex_sampler_view(&svga->pipe, 
-                                                texture, 
-                                                min_lod,
-                                                max_lod);
-      }
-
-      /*
-       * We need to reemit non-null texture bindings, even when they are not
-       * dirty, to ensure that the resources are paged in.
-       */
-
-      if (view->dirty ||
-          (reemit && view->v)) {
-         queue.bind[queue.bind_count].unit = i;
-         queue.bind[queue.bind_count].view = view;
-         queue.bind_count++;
-      } 
-      if (!view->dirty && view->v) {
-         svga_validate_sampler_view(svga, view->v);
-      }
+      emit_tex_binding_unit(svga, i,
+                            svga->curr.sampler[shader][i],
+                            svga->curr.sampler_views[shader][i],
+                            &svga->state.hw_draw.views[i],
+                            reemit,
+                            &queue);
   }

-   svga->state.hw_draw.num_views = svga->curr.num_sampler_views;
+   svga->state.hw_draw.num_views = svga->curr.num_sampler_views[shader];
+
+   /* Polygon stipple */
+   if (svga->curr.rast->templ.poly_stipple_enable) {
+      const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+      emit_tex_binding_unit(svga, unit,
+                            svga->polygon_stipple.sampler,
+                            &svga->polygon_stipple.sampler_view->base,
+                            &svga->state.hw_draw.views[unit],
+                            reemit,
+                            &queue);
+   }

   if (queue.bind_count) {
      SVGA3dTextureState *ts;
@ -163,7 +207,7 @@ update_tss_binding(struct svga_context *svga,
      SVGA_FIFOCommitAll( svga->swc );
   }

-   svga->rebind.texture_samplers = FALSE;
+   svga->rebind.flags.texture_samplers = FALSE;

   return PIPE_OK;

@ -187,7 +231,8 @@ svga_reemit_tss_bindings(struct svga_context *svga)
   enum pipe_error ret;
   struct bind_queue queue;

-   assert(svga->rebind.texture_samplers);
+   assert(!svga_have_vgpu10(svga));
+   assert(svga->rebind.flags.texture_samplers);

   queue.bind_count = 0;

@ -201,6 +246,18 @@ svga_reemit_tss_bindings(struct svga_context *svga)
      }
   }

+   /* Polygon stipple */
+   if (svga->curr.rast->templ.poly_stipple_enable) {
+      const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+      struct svga_hw_view_state *view = &svga->state.hw_draw.views[unit];
+
+      if (view->v) {
+         queue.bind[queue.bind_count].unit = unit;
+         queue.bind[queue.bind_count].view = view;
+         queue.bind_count++;
+      }
+   }
+
   if (queue.bind_count) {
      SVGA3dTextureState *ts;

@ -229,7 +286,7 @@ svga_reemit_tss_bindings(struct svga_context *svga)
      SVGA_FIFOCommitAll(svga->swc);
   }

-   svga->rebind.texture_samplers = FALSE;
+   svga->rebind.flags.texture_samplers = FALSE;

   return PIPE_OK;
 }
@ -238,6 +295,7 @@ svga_reemit_tss_bindings(struct svga_context *svga)
 struct svga_tracked_state svga_hw_tss_binding = {
   "texture binding emit",
   SVGA_NEW_TEXTURE_BINDING |
+   SVGA_NEW_STIPPLE |
   SVGA_NEW_SAMPLER,
   update_tss_binding
 };
@ -252,29 +310,7 @@ struct ts_queue {
 };


-#define EMIT_TS(svga, unit, val, token, fail)                           \
-do {                                                                    \
-   assert(unit < Elements(svga->state.hw_draw.ts));                     \
-   assert(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit]));  \
-   if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) {        \
-      svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val );           \
-      svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val;            \
-   }                                                                    \
-} while (0)
-
-#define EMIT_TS_FLOAT(svga, unit, fvalue, token, fail)                  \
-do {                                                                    \
-   unsigned val = fui(fvalue);                                          \
-   assert(unit < Elements(svga->state.hw_draw.ts));                     \
-   assert(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit]));  \
-   if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) {        \
-      svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val );           \
-      svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val;            \
-   }                                                                    \
-} while (0)
-
-
-static inline void 
+static inline void
 svga_queue_tss( struct ts_queue *q,
                unsigned unit,
                unsigned tss,
@ -288,42 +324,84 @@ svga_queue_tss( struct ts_queue *q,
 }


+#define EMIT_TS(svga, unit, val, token)                                 \
+do {                                                                    \
+   assert(unit < Elements(svga->state.hw_draw.ts));                     \
+   assert(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit]));  \
+   if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) {        \
+      svga_queue_tss( queue, unit, SVGA3D_TS_##token, val );            \
+      svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val;            \
+   }                                                                    \
+} while (0)
+
+#define EMIT_TS_FLOAT(svga, unit, fvalue, token)                        \
+do {                                                                    \
+   unsigned val = fui(fvalue);                                          \
+   assert(unit < Elements(svga->state.hw_draw.ts));                     \
+   assert(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit]));  \
+   if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) {        \
+      svga_queue_tss( queue, unit, SVGA3D_TS_##token, val );            \
+      svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val;            \
+   }                                                                    \
+} while (0)
+
+
+/**
+ * Emit texture sampler state (tss) for one texture unit.
+ */
+static void
+emit_tss_unit(struct svga_context *svga, unsigned unit,
+              const struct svga_sampler_state *state,
+              struct ts_queue *queue)
+{
+   EMIT_TS(svga, unit, state->mipfilter, MIPFILTER);
+   EMIT_TS(svga, unit, state->min_lod, TEXTURE_MIPMAP_LEVEL);
+   EMIT_TS(svga, unit, state->magfilter, MAGFILTER);
+   EMIT_TS(svga, unit, state->minfilter, MINFILTER);
+   EMIT_TS(svga, unit, state->aniso_level, TEXTURE_ANISOTROPIC_LEVEL);
+   EMIT_TS_FLOAT(svga, unit, state->lod_bias, TEXTURE_LOD_BIAS);
+   EMIT_TS(svga, unit, state->addressu, ADDRESSU);
+   EMIT_TS(svga, unit, state->addressw, ADDRESSW);
+   EMIT_TS(svga, unit, state->bordercolor, BORDERCOLOR);
+   // TEXCOORDINDEX -- hopefully not needed
+
+   if (svga->curr.tex_flags.flag_1d & (1 << unit))
+      EMIT_TS(svga, unit, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV);
+   else
+      EMIT_TS(svga, unit, state->addressv, ADDRESSV);
+
+   if (svga->curr.tex_flags.flag_srgb & (1 << unit))
+      EMIT_TS_FLOAT(svga, unit, 2.2f, GAMMA);
+   else
+      EMIT_TS_FLOAT(svga, unit, 1.0f, GAMMA);
+}
+
 static enum pipe_error
 update_tss(struct svga_context *svga, 
           unsigned dirty )
 {
+   const unsigned shader = PIPE_SHADER_FRAGMENT;
   unsigned i;
   struct ts_queue queue;

+   if (svga_have_vgpu10(svga))
+      return PIPE_OK;
+
   queue.ts_count = 0;
-   for (i = 0; i < svga->curr.num_samplers; i++) {
-      if (svga->curr.sampler[i]) {
-         const struct svga_sampler_state *curr = svga->curr.sampler[i];
-
-         EMIT_TS(svga, i, curr->mipfilter, MIPFILTER, fail);
-         EMIT_TS(svga, i, curr->min_lod, TEXTURE_MIPMAP_LEVEL, fail);
-         EMIT_TS(svga, i, curr->magfilter, MAGFILTER, fail);
-         EMIT_TS(svga, i, curr->minfilter, MINFILTER, fail);
-         EMIT_TS(svga, i, curr->aniso_level, TEXTURE_ANISOTROPIC_LEVEL, fail);
-         EMIT_TS_FLOAT(svga, i, curr->lod_bias, TEXTURE_LOD_BIAS, fail);
-         EMIT_TS(svga, i, curr->addressu, ADDRESSU, fail);
-         EMIT_TS(svga, i, curr->addressw, ADDRESSW, fail);
-         EMIT_TS(svga, i, curr->bordercolor, BORDERCOLOR, fail);
-         // TEXCOORDINDEX -- hopefully not needed
-
-         if (svga->curr.tex_flags.flag_1d & (1 << i)) {
-            EMIT_TS(svga, i, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV, fail);
-         }
-         else
-            EMIT_TS(svga, i, curr->addressv, ADDRESSV, fail);
-
-         if (svga->curr.tex_flags.flag_srgb & (1 << i))
-            EMIT_TS_FLOAT(svga, i, 2.2f, GAMMA, fail);
-         else
-            EMIT_TS_FLOAT(svga, i, 1.0f, GAMMA, fail);
-
+   for (i = 0; i < svga->curr.num_samplers[shader]; i++) {
+      if (svga->curr.sampler[shader][i]) {
+         const struct svga_sampler_state *curr = svga->curr.sampler[shader][i];
+         emit_tss_unit(svga, i, curr, &queue);
      }
   }
+
+   /* polygon stipple sampler */
+   if (svga->curr.rast->templ.poly_stipple_enable) {
+      emit_tss_unit(svga,
+                    svga->state.hw_draw.fs->pstipple_sampler_unit,
+                    svga->polygon_stipple.sampler,
+                    &queue);
+   }
 
   if (queue.ts_count) {
      SVGA3dTextureState *ts;
@ -357,6 +435,7 @@ fail:
 struct svga_tracked_state svga_hw_tss = {
   "texture state emit",
   (SVGA_NEW_SAMPLER |
+    SVGA_NEW_STIPPLE |
    SVGA_NEW_TEXTURE_FLAGS),
   update_tss
 };
--- a/src/gallium/drivers/svga/svga_state_vdecl.c
+++ b/src/gallium/drivers/svga/svga_state_vdecl.c
@ -33,6 +33,7 @@
 #include "svga_draw.h"
 #include "svga_tgsi.h"
 #include "svga_screen.h"
+#include "svga_shader.h"
 #include "svga_resource_buffer.h"
 #include "svga_hw_reg.h"

@ -42,16 +43,14 @@ static enum pipe_error
 emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
 {
   const struct pipe_vertex_element *ve = svga->curr.velems->velem;
+   SVGA3dVertexDecl decls[SVGA3D_INPUTREG_MAX];
+   unsigned buffer_indexes[SVGA3D_INPUTREG_MAX];
   unsigned i;
   unsigned neg_bias = 0;

   assert(svga->curr.velems->count >=
          svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);

-   /* specify number of vertex element declarations to come */
-   svga_hwtnl_reset_vdecl( svga->hwtnl,
-                           svga->curr.velems->count );
-
   /**
    * We can't set the VDECL offset to something negative, so we
    * must calculate a common negative additional index bias, and modify
@ -70,15 +69,16 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
   for (i = 0; i < svga->curr.velems->count; i++) {
      const struct pipe_vertex_buffer *vb =
         &svga->curr.vb[ve[i].vertex_buffer_index];
-      const struct svga_buffer *buffer;
+      struct svga_buffer *buffer;
      unsigned int offset = vb->buffer_offset + ve[i].src_offset;
+      unsigned tmp_neg_bias = 0;

      if (!vb->buffer)
         continue;

      buffer = svga_buffer(vb->buffer);
      if (buffer->uploaded.start > offset) {
-         unsigned tmp_neg_bias = buffer->uploaded.start - offset;
+         tmp_neg_bias = buffer->uploaded.start - offset;
         if (vb->stride)
            tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride;
         neg_bias = MAX2(neg_bias, tmp_neg_bias);
@ -89,8 +89,7 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
      const struct pipe_vertex_buffer *vb =
         &svga->curr.vb[ve[i].vertex_buffer_index];
      unsigned usage, index;
-      const struct svga_buffer *buffer;
-      SVGA3dVertexDecl decl;
+      struct svga_buffer *buffer;

      if (!vb->buffer)
         continue;
@ -100,29 +99,37 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)

      /* SVGA_NEW_VELEMENT
       */
-      decl.identity.type = svga->curr.velems->decl_type[i];
-      decl.identity.method = SVGA3D_DECLMETHOD_DEFAULT;
-      decl.identity.usage = usage;
-      decl.identity.usageIndex = index;
-      decl.array.stride = vb->stride;
+      decls[i].identity.type = svga->curr.velems->decl_type[i];
+      decls[i].identity.method = SVGA3D_DECLMETHOD_DEFAULT;
+      decls[i].identity.usage = usage;
+      decls[i].identity.usageIndex = index;
+      decls[i].array.stride = vb->stride;

      /* Compensate for partially uploaded vbo, and
       * for the negative index bias.
       */
-      decl.array.offset = (vb->buffer_offset
+      decls[i].array.offset = (vb->buffer_offset
                           + ve[i].src_offset
 			   + neg_bias * vb->stride
 			   - buffer->uploaded.start);

-      assert(decl.array.offset >= 0);
+      assert(decls[i].array.offset >= 0);

-      svga_hwtnl_vdecl( svga->hwtnl,
-                        i,
-                        &decl,
-                        buffer->uploaded.buffer ? buffer->uploaded.buffer :
-                        vb->buffer );
+      buffer_indexes[i] = ve[i].vertex_buffer_index;
+
+      assert(!buffer->uploaded.buffer);
   }

+   svga_hwtnl_vertex_decls(svga->hwtnl,
+                           svga->curr.velems->count,
+                           decls,
+                           buffer_indexes,
+                           svga->curr.velems->id);
+
+   svga_hwtnl_vertex_buffers(svga->hwtnl,
+                             svga->curr.num_vertex_buffers,
+                             svga->curr.vb);
+
   svga_hwtnl_set_index_bias( svga->hwtnl, -(int) neg_bias );
   return PIPE_OK;
 }
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@ -25,7 +25,6 @@

 #include "util/u_inlines.h"
 #include "pipe/p_defines.h"
-#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_bitmask.h"
@ -41,33 +40,6 @@
 #include "svga_hw_reg.h"


-static inline int
-compare_vs_keys(const struct svga_vs_compile_key *a,
-                const struct svga_vs_compile_key *b)
-{
-   unsigned keysize = svga_vs_key_size( a );
-   return memcmp( a, b, keysize );
-}
-
-
-/** Search for a vertex shader variant */
-static struct svga_shader_variant *
-search_vs_key(const struct svga_vertex_shader *vs,
-              const struct svga_vs_compile_key *key)
-{
-   struct svga_shader_variant *variant = vs->base.variants;
-
-   assert(key);
-
-   for ( ; variant; variant = variant->next) {
-      if (compare_vs_keys( key, &variant->key.vkey ) == 0)
-         return variant;
-   }
-   
-   return NULL;
-}
-
-
 /**
 * If we fail to compile a vertex shader we'll use a dummy/fallback shader
 * that simply emits a (0,0,0,1) vertex position.
@ -99,13 +71,29 @@ get_dummy_vertex_shader(void)
 }


+static struct svga_shader_variant *
+translate_vertex_program(struct svga_context *svga,
+                         const struct svga_vertex_shader *vs,
+                         const struct svga_compile_key *key)
+{
+   if (svga_have_vgpu10(svga)) {
+      return svga_tgsi_vgpu10_translate(svga, &vs->base, key,
+                                        PIPE_SHADER_VERTEX);
+   }
+   else {
+      return svga_tgsi_vgpu9_translate(&vs->base, key, PIPE_SHADER_VERTEX);
+   }
+}
+
+
 /**
 * Replace the given shader's instruction with a simple / dummy shader.
 * We use this when normal shader translation fails.
 */
 static struct svga_shader_variant *
-get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs,
-                                 const struct svga_vs_compile_key *key)
+get_compiled_dummy_vertex_shader(struct svga_context *svga,
+                                 struct svga_vertex_shader *vs,
+                                 const struct svga_compile_key *key)
 {
   const struct tgsi_token *dummy = get_dummy_vertex_shader();
   struct svga_shader_variant *variant;
@ -117,7 +105,7 @@ get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs,
   FREE((void *) vs->base.tokens);
   vs->base.tokens = dummy;

-   variant = svga_translate_vertex_program(vs, key);
+   variant = translate_vertex_program(svga, vs, key);
   return variant;
 }

@ -128,16 +116,17 @@ get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs,
 static enum pipe_error
 compile_vs(struct svga_context *svga,
           struct svga_vertex_shader *vs,
-           const struct svga_vs_compile_key *key,
+           const struct svga_compile_key *key,
           struct svga_shader_variant **out_variant)
 {
   struct svga_shader_variant *variant;
   enum pipe_error ret = PIPE_ERROR;

-   variant = svga_translate_vertex_program( vs, key );
+   variant = translate_vertex_program(svga, vs, key);
   if (variant == NULL) {
-      /* some problem during translation, try the dummy shader */
-      variant = get_compiled_dummy_vertex_shader(vs, key);
+      debug_printf("Failed to compile vertex shader,"
+                   " using dummy shader instead.\n");
+      variant = get_compiled_dummy_vertex_shader(svga, vs, key);
      if (!variant) {
         ret = PIPE_ERROR;
         goto fail;
@ -146,11 +135,11 @@ compile_vs(struct svga_context *svga,

   if (svga_shader_too_large(svga, variant)) {
      /* too big, use dummy shader */
-      debug_printf("Shader too large (%lu bytes),"
+      debug_printf("Shader too large (%u bytes),"
                   " using dummy shader instead.\n",
-                   (unsigned long ) variant->nr_tokens
-                   * sizeof(variant->tokens[0]));
-      variant = get_compiled_dummy_vertex_shader(vs, key);
+                   (unsigned) (variant->nr_tokens
+                               * sizeof(variant->tokens[0])));
+      variant = get_compiled_dummy_vertex_shader(svga, vs, key);
      if (!variant) {
         ret = PIPE_ERROR;
         goto fail;
@ -163,10 +152,6 @@ compile_vs(struct svga_context *svga,

   *out_variant = variant;

-   /* insert variants at head of linked list */
-   variant->next = vs->base.variants;
-   vs->base.variants = variant;
-
   return PIPE_OK;

 fail:
@ -179,18 +164,44 @@ fail:
 /* SVGA_NEW_PRESCALE, SVGA_NEW_RAST, SVGA_NEW_FS
 */
 static void
-make_vs_key(struct svga_context *svga, struct svga_vs_compile_key *key)
+make_vs_key(struct svga_context *svga, struct svga_compile_key *key)
 {
+   const unsigned shader = PIPE_SHADER_VERTEX;
+
   memset(key, 0, sizeof *key);
-   key->need_prescale = svga->state.hw_clear.prescale.enabled;
-   key->allow_psiz = svga->curr.rast->templ.point_size_per_vertex;
+
+   if (svga->state.sw.need_swtnl && svga_have_vgpu10(svga)) {
+      /* Set both of these flags, to match compile_passthrough_vs() */
+      key->vs.passthrough = 1;
+      key->vs.undo_viewport = 1;
+      return;
+   }
+
+   key->vs.need_prescale = svga->state.hw_clear.prescale.enabled &&
+                           (svga->curr.gs == NULL);
+   key->vs.allow_psiz = svga->curr.rast->templ.point_size_per_vertex;

   /* SVGA_NEW_FS */
-   key->fs_generic_inputs = svga->curr.fs->generic_inputs;
+   key->vs.fs_generic_inputs = svga->curr.fs->generic_inputs;
+
+   svga_remap_generics(key->vs.fs_generic_inputs, key->generic_remap_table);

   /* SVGA_NEW_VELEMENT */
-   key->adjust_attrib_range = svga->curr.velems->adjust_attrib_range;
-   key->adjust_attrib_w_1 = svga->curr.velems->adjust_attrib_w_1;
+   key->vs.adjust_attrib_range = svga->curr.velems->adjust_attrib_range;
+   key->vs.adjust_attrib_w_1 = svga->curr.velems->adjust_attrib_w_1;
+   key->vs.attrib_is_pure_int = svga->curr.velems->attrib_is_pure_int;
+   key->vs.adjust_attrib_itof = svga->curr.velems->adjust_attrib_itof;
+   key->vs.adjust_attrib_utof = svga->curr.velems->adjust_attrib_utof;
+   key->vs.attrib_is_bgra = svga->curr.velems->attrib_is_bgra;
+   key->vs.attrib_puint_to_snorm = svga->curr.velems->attrib_puint_to_snorm;
+   key->vs.attrib_puint_to_uscaled = svga->curr.velems->attrib_puint_to_uscaled;
+   key->vs.attrib_puint_to_sscaled = svga->curr.velems->attrib_puint_to_sscaled;
+
+   /* SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER */
+   svga_init_shader_key_common(svga, shader, key);
+
+   /* SVGA_NEW_RAST */
+   key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable;
 }


@ -201,17 +212,128 @@ enum pipe_error
 svga_reemit_vs_bindings(struct svga_context *svga)
 {
   enum pipe_error ret;
-   struct svga_winsys_gb_shader *gbshader =
-      svga->state.hw_draw.vs ? svga->state.hw_draw.vs->gb_shader : NULL;
+   struct svga_winsys_gb_shader *gbshader = NULL;
+   SVGA3dShaderId shaderId = SVGA3D_INVALID_ID;

-   assert(svga->rebind.vs);
+   assert(svga->rebind.flags.vs);
   assert(svga_have_gb_objects(svga));

-   ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader);
+   if (svga->state.hw_draw.vs) {
+      gbshader = svga->state.hw_draw.vs->gb_shader;
+      shaderId = svga->state.hw_draw.vs->id;
+   }
+
+   if (!svga_need_to_rebind_resources(svga)) {
+      ret =  svga->swc->resource_rebind(svga->swc, NULL, gbshader,
+                                        SVGA_RELOC_READ);
+      goto out;
+   }
+
+   if (svga_have_vgpu10(svga))
+      ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_VS,
+                                    gbshader, shaderId);
+   else
+      ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader);
+
+ out:
   if (ret != PIPE_OK)
      return ret;

-   svga->rebind.vs = FALSE;
+   svga->rebind.flags.vs = FALSE;
+   return PIPE_OK;
+}
+
+
+/**
+ * The current vertex shader is already executed by the 'draw'
+ * module, so we just need to generate a simple vertex shader
+ * to pass through all those VS outputs that will
+ * be consumed by the fragment shader.
+ * Used when we employ the 'draw' module.
+ */
+static enum pipe_error
+compile_passthrough_vs(struct svga_context *svga,
+                       struct svga_vertex_shader *vs,
+                       struct svga_fragment_shader *fs,
+                       struct svga_shader_variant **out_variant)
+{
+   struct svga_shader_variant *variant = NULL;
+   unsigned num_inputs;
+   unsigned i;
+   unsigned num_elements;
+   struct svga_vertex_shader new_vs;
+   struct ureg_src src[PIPE_MAX_SHADER_INPUTS];
+   struct ureg_dst dst[PIPE_MAX_SHADER_OUTPUTS];
+   struct ureg_program *ureg;
+   unsigned num_tokens;
+   struct svga_compile_key key;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+   assert(fs);
+
+   num_inputs = fs->base.info.num_inputs;
+
+   ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!ureg)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* draw will always add position */
+   dst[0] = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
+   src[0] = ureg_DECL_vs_input(ureg, 0);
+   num_elements = 1;
+
+   /**
+    * swtnl backend redefines the input layout based on the
+    * fragment shader's inputs. So we only need to passthrough
+    * those inputs that will be consumed by the fragment shader.
+    * Note: DX10 requires the number of vertex elements
+    * specified in the input layout to be no less than the
+    * number of inputs to the vertex shader.
+    */
+   for (i = 0; i < num_inputs; i++) {
+      switch (fs->base.info.input_semantic_name[i]) {
+      case TGSI_SEMANTIC_COLOR:
+      case TGSI_SEMANTIC_GENERIC:
+      case TGSI_SEMANTIC_FOG:
+         dst[num_elements] = ureg_DECL_output(ureg,
+                                fs->base.info.input_semantic_name[i],
+                                fs->base.info.input_semantic_index[i]);
+         src[num_elements] = ureg_DECL_vs_input(ureg, num_elements);
+         num_elements++;
+         break;
+      default:
+         break;
+      }
+   }
+
+   for (i = 0; i < num_elements; i++) {
+      ureg_MOV(ureg, dst[i], src[i]);
+   }
+
+   ureg_END(ureg);
+
+   memset(&new_vs, 0, sizeof(new_vs));
+   new_vs.base.tokens = ureg_get_tokens(ureg, &num_tokens);
+   tgsi_scan_shader(new_vs.base.tokens, &new_vs.base.info);
+
+   memset(&key, 0, sizeof(key));
+   key.vs.undo_viewport = 1;
+
+   ret = compile_vs(svga, &new_vs, &key, &variant);
+   if (ret != PIPE_OK)
+      return ret;
+
+   ureg_free_tokens(new_vs.base.tokens);
+   ureg_destroy(ureg);
+
+   /* Overwrite the variant key to indicate it's a pass-through VS */
+   memset(&variant->key, 0, sizeof(variant->key));
+   variant->key.vs.passthrough = 1;
+   variant->key.vs.undo_viewport = 1;
+
+   *out_variant = variant;
+
   return PIPE_OK;
 }

@ -219,45 +341,67 @@ svga_reemit_vs_bindings(struct svga_context *svga)
 static enum pipe_error
 emit_hw_vs(struct svga_context *svga, unsigned dirty)
 {
-   struct svga_shader_variant *variant = NULL;
+   struct svga_shader_variant *variant;
+   struct svga_vertex_shader *vs = svga->curr.vs;
+   struct svga_fragment_shader *fs = svga->curr.fs;
   enum pipe_error ret = PIPE_OK;
+   struct svga_compile_key key;
+
+   /* If there is an active geometry shader, and it has stream output
+    * defined, then we will skip the stream output from the vertex shader
+    */
+   if (!svga_have_gs_streamout(svga)) {
+      /* No GS stream out */
+      if (svga_have_vs_streamout(svga)) {
+         /* Set VS stream out */
+         svga_set_stream_output(svga, vs->base.stream_output);
+      }
+      else {
+         /* turn off stream out */
+         svga_set_stream_output(svga, NULL);
+      }
+   }

   /* SVGA_NEW_NEED_SWTNL */
-   if (!svga->state.sw.need_swtnl) {
-      struct svga_vertex_shader *vs = svga->curr.vs;
-      struct svga_vs_compile_key key;
+   if (svga->state.sw.need_swtnl && !svga_have_vgpu10(svga)) {
+      /* No vertex shader is needed */
+      variant = NULL;
+   }
+   else {
+      make_vs_key(svga, &key);

-      make_vs_key( svga, &key );
+      /* See if we already have a VS variant that matches the key */
+      variant = svga_search_shader_key(&vs->base, &key);

-      variant = search_vs_key( vs, &key );
      if (!variant) {
-         ret = compile_vs( svga, vs, &key, &variant );
+         /* Create VS variant now */
+         if (key.vs.passthrough) {
+            ret = compile_passthrough_vs(svga, vs, fs, &variant);
+         }
+         else {
+            ret = compile_vs(svga, vs, &key, &variant);
+         }
         if (ret != PIPE_OK)
            return ret;
-      }

-      assert(variant);
+         /* insert the new variant at head of linked list */
+         assert(variant);
+         variant->next = vs->base.variants;
+         vs->base.variants = variant;
+      }
   }

   if (variant != svga->state.hw_draw.vs) {
-      if (svga_have_gb_objects(svga)) {
-         struct svga_winsys_gb_shader *gbshader =
-            variant ? variant->gb_shader : NULL;
-         ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader);
-         if (ret != PIPE_OK)
-            return ret;
-
-         svga->rebind.vs = FALSE;
-      }
-      else {
-         unsigned id = variant ? variant->id : SVGA_ID_INVALID;
-         ret = SVGA3D_SetShader(svga->swc, SVGA3D_SHADERTYPE_VS, id);
+      /* Bind the new variant */
+      if (variant) {
+         ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, variant);
         if (ret != PIPE_OK)
            return ret;
+         svga->rebind.flags.vs = FALSE;
      }

      svga->dirty |= SVGA_NEW_VS_VARIANT;
-      svga->state.hw_draw.vs = variant;      
+      svga->state.hw_draw.vs = variant;
   }

   return PIPE_OK;
@ -268,6 +412,9 @@ struct svga_tracked_state svga_hw_vs =
   "vertex shader (hwtnl)",
   (SVGA_NEW_VS |
    SVGA_NEW_FS |
+    SVGA_NEW_TEXTURE_BINDING |
+    SVGA_NEW_SAMPLER |
+    SVGA_NEW_RAST |
    SVGA_NEW_PRESCALE |
    SVGA_NEW_VELEMENT |
    SVGA_NEW_NEED_SWTNL),
--- a/src/gallium/drivers/svga/svga_surface.c
+++ b/src/gallium/drivers/svga/svga_surface.c
@ -29,6 +29,7 @@
 #include "pipe/p_defines.h"
 #include "util/u_inlines.h"
 #include "os/os_thread.h"
+#include "util/u_bitmask.h"
 #include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
@ -36,19 +37,21 @@
 #include "svga_format.h"
 #include "svga_screen.h"
 #include "svga_context.h"
+#include "svga_sampler_view.h"
 #include "svga_resource_texture.h"
 #include "svga_surface.h"
 #include "svga_debug.h"

+static void svga_mark_surface_dirty(struct pipe_surface *surf);

 void
 svga_texture_copy_handle(struct svga_context *svga,
                         struct svga_winsys_surface *src_handle,
                         unsigned src_x, unsigned src_y, unsigned src_z,
-                         unsigned src_level, unsigned src_face,
+                         unsigned src_level, unsigned src_layer,
                         struct svga_winsys_surface *dst_handle,
                         unsigned dst_x, unsigned dst_y, unsigned dst_z,
-                         unsigned dst_level, unsigned dst_face,
+                         unsigned dst_level, unsigned dst_layer,
                         unsigned width, unsigned height, unsigned depth)
 {
   struct svga_surface dst, src;
@ -59,12 +62,12 @@ svga_texture_copy_handle(struct svga_context *svga,

   src.handle = src_handle;
   src.real_level = src_level;
-   src.real_face = src_face;
+   src.real_layer = src_layer;
   src.real_zslice = 0;

   dst.handle = dst_handle;
   dst.real_level = dst_level;
-   dst.real_face = dst_face;
+   dst.real_layer = dst_layer;
   dst.real_zslice = 0;

   box.x = dst_x;
@ -103,11 +106,13 @@ svga_texture_copy_handle(struct svga_context *svga,
 struct svga_winsys_surface *
 svga_texture_view_surface(struct svga_context *svga,
                          struct svga_texture *tex,
+                          unsigned bind_flags,
                          SVGA3dSurfaceFlags flags,
                          SVGA3dSurfaceFormat format,
                          unsigned start_mip,
                          unsigned num_mip,
-                          int face_pick,
+                          int layer_pick,
+                          unsigned num_layers,
                          int zslice_pick,
                          struct svga_host_surface_cache_key *key) /* OUT */
 {
@ -117,8 +122,8 @@ svga_texture_view_surface(struct svga_context *svga,
   unsigned z_offset = 0;

   SVGA_DBG(DEBUG_PERF, 
-            "svga: Create surface view: face %d zslice %d mips %d..%d\n",
-            face_pick, zslice_pick, start_mip, start_mip+num_mip-1);
+            "svga: Create surface view: layer %d zslice %d mips %d..%d\n",
+            layer_pick, zslice_pick, start_mip, start_mip+num_mip-1);

   key->flags = flags;
   key->format = format;
@ -127,12 +132,20 @@ svga_texture_view_surface(struct svga_context *svga,
   key->size.height = u_minify(tex->b.b.height0, start_mip);
   key->size.depth = zslice_pick < 0 ? u_minify(tex->b.b.depth0, start_mip) : 1;
   key->cachable = 1;
+   key->arraySize = 1;
+   key->numFaces = 1;
+   key->sampleCount = tex->b.b.nr_samples;
+
+   if (key->sampleCount > 1) {
+      key->flags |= SVGA3D_SURFACE_MASKABLE_ANTIALIAS;
+   }
   
-   if (tex->b.b.target == PIPE_TEXTURE_CUBE && face_pick < 0) {
+   if (tex->b.b.target == PIPE_TEXTURE_CUBE && layer_pick < 0) {
      key->flags |= SVGA3D_SURFACE_CUBEMAP;
      key->numFaces = 6;
-   } else {
-      key->numFaces = 1;
+   } else if (tex->b.b.target == PIPE_TEXTURE_1D_ARRAY ||
+              tex->b.b.target == PIPE_TEXTURE_2D_ARRAY) {
+      key->arraySize = num_layers;
   }

   if (key->format == SVGA3D_FORMAT_INVALID) {
@ -141,7 +154,7 @@ svga_texture_view_surface(struct svga_context *svga,
   }

   SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n");
-   handle = svga_screen_surface_create(ss, key);
+   handle = svga_screen_surface_create(ss, bind_flags, PIPE_USAGE_DEFAULT, key);
   if (!handle) {
      key->cachable = 0;
      return NULL;
@ -149,15 +162,15 @@ svga_texture_view_surface(struct svga_context *svga,

   SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture view)\n", handle);

-   if (face_pick < 0)
-      face_pick = 0;
+   if (layer_pick < 0)
+      layer_pick = 0;

   if (zslice_pick >= 0)
      z_offset = zslice_pick;

   for (i = 0; i < key->numMipLevels; i++) {
-      for (j = 0; j < key->numFaces; j++) {
-         if (svga_is_texture_level_defined(tex, j + face_pick, i + start_mip)) {
+      for (j = 0; j < key->numFaces * key->arraySize; j++) {
+         if (svga_is_texture_level_defined(tex, j + layer_pick, i + start_mip)) {
            unsigned depth = (zslice_pick < 0 ?
                              u_minify(tex->b.b.depth0, i + start_mip) :
                              1);
@ -166,7 +179,7 @@ svga_texture_view_surface(struct svga_context *svga,
                                     tex->handle, 
                                     0, 0, z_offset, 
                                     i + start_mip, 
-                                     j + face_pick,
+                                     j + layer_pick,
                                     handle, 0, 0, 0, i, j,
                                     u_minify(tex->b.b.width0, i + start_mip),
                                     u_minify(tex->b.b.height0, i + start_mip),
@ -179,33 +192,43 @@ svga_texture_view_surface(struct svga_context *svga,
 }


+/**
+ * A helper function to create a surface view.
+ * The view boolean flag specifies whether svga_texture_view_surface()
+ * will be called to create a cloned surface and resource for the view.
+ */
 static struct pipe_surface *
-svga_create_surface(struct pipe_context *pipe,
-                    struct pipe_resource *pt,
-                    const struct pipe_surface *surf_tmpl)
+svga_create_surface_view(struct pipe_context *pipe,
+                         struct pipe_resource *pt,
+                         const struct pipe_surface *surf_tmpl,
+                         boolean view)
 {
   struct svga_context *svga = svga_context(pipe);
   struct svga_texture *tex = svga_texture(pt);
   struct pipe_screen *screen = pipe->screen;
   struct svga_screen *ss = svga_screen(screen);
   struct svga_surface *s;
-   unsigned face, zslice;
-   boolean view = FALSE;
-   SVGA3dSurfaceFlags flags;
+   unsigned layer, zslice, bind;
+   unsigned nlayers = 1;
+   SVGA3dSurfaceFlags flags = 0;
   SVGA3dSurfaceFormat format;

-   assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
-
   s = CALLOC_STRUCT(svga_surface);
   if (!s)
      return NULL;

   if (pt->target == PIPE_TEXTURE_CUBE) {
-      face = surf_tmpl->u.tex.first_layer;
+      layer = surf_tmpl->u.tex.first_layer;
      zslice = 0;
   }
+   else if (pt->target == PIPE_TEXTURE_1D_ARRAY ||
+            pt->target == PIPE_TEXTURE_2D_ARRAY) {
+      layer = surf_tmpl->u.tex.first_layer;
+      zslice = 0;
+      nlayers = surf_tmpl->u.tex.last_layer - surf_tmpl->u.tex.first_layer + 1;
+   }
   else {
-      face = 0;
+      layer = 0;
      zslice = surf_tmpl->u.tex.first_layer;
   }

@ -218,52 +241,73 @@ svga_create_surface(struct pipe_context *pipe,
   s->base.u.tex.level = surf_tmpl->u.tex.level;
   s->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
   s->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+   s->view_id = SVGA3D_INVALID_ID;
+
+   s->backed = NULL;

   if (util_format_is_depth_or_stencil(surf_tmpl->format)) {
-      flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
+      flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL |
+              SVGA3D_SURFACE_BIND_DEPTH_STENCIL;
+      bind = PIPE_BIND_DEPTH_STENCIL;
   }
   else {
-      flags = SVGA3D_SURFACE_HINT_RENDERTARGET;
+      flags = SVGA3D_SURFACE_HINT_RENDERTARGET |
+              SVGA3D_SURFACE_BIND_RENDER_TARGET;
+      bind = PIPE_BIND_RENDER_TARGET;
   }

-   format = svga_translate_format(ss, surf_tmpl->format, 0);
+   if (tex->imported)
+      format = tex->key.format;
+   else
+      format = svga_translate_format(ss, surf_tmpl->format, bind);
+
   assert(format != SVGA3D_FORMAT_INVALID);

-   if (svga_screen(screen)->debug.force_surface_view)
-      view = TRUE;
-
-   /* Currently only used for compressed textures */
-   if (format != svga_translate_format(ss, surf_tmpl->format, 0)) {
-      view = TRUE;
-   }
-
-   if (surf_tmpl->u.tex.level != 0 &&
-       svga_screen(screen)->debug.force_level_surface_view)
-      view = TRUE;
-
-   if (pt->target == PIPE_TEXTURE_3D)
-      view = TRUE;
-
-   if (svga_screen(screen)->debug.no_surface_view)
-      view = FALSE;
-
   if (view) {
-      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n",
-               pt, surf_tmpl->u.tex.level, face, zslice, s);
+      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u layer %u z %u, %p\n",
+               pt, surf_tmpl->u.tex.level, layer, zslice, s);

-      s->handle = svga_texture_view_surface(svga, tex, flags, format,
-                                            surf_tmpl->u.tex.level,
-                                            1, face, zslice, &s->key);
-      s->real_face = 0;
+      if (svga_have_vgpu10(svga)) {
+         switch (pt->target) {
+         case PIPE_TEXTURE_1D:
+            flags |= SVGA3D_SURFACE_1D;
+            break;
+         case PIPE_TEXTURE_1D_ARRAY:
+            flags |= SVGA3D_SURFACE_1D | SVGA3D_SURFACE_ARRAY;
+            break;
+         case PIPE_TEXTURE_2D_ARRAY:
+            flags |= SVGA3D_SURFACE_ARRAY;
+            break;
+         case PIPE_TEXTURE_3D:
+            flags |= SVGA3D_SURFACE_VOLUME;
+            break;
+         case PIPE_TEXTURE_CUBE:
+            if (nlayers == 6)
+               flags |= SVGA3D_SURFACE_CUBEMAP;
+            break;
+         default:
+            break;
+         }
+      }
+
+      /* When we clone the surface view resource, use the format used in
+       * the creation of the original resource.
+       */
+      s->handle = svga_texture_view_surface(svga, tex, bind, flags, tex->key.format,
+                                            surf_tmpl->u.tex.level, 1,
+                                            layer, nlayers, zslice, &s->key);
+      s->key.format = format;
+      s->real_layer = 0;
      s->real_level = 0;
      s->real_zslice = 0;
   } else {
-      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n",
-               pt, surf_tmpl->u.tex.level, face, zslice, s);
+      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, layer %u, z %u, %p\n",
+               pt, surf_tmpl->u.tex.level, layer, zslice, s);

      memset(&s->key, 0, sizeof s->key);
+      s->key.format = format;
      s->handle = tex->handle;
-      s->real_face = face;
+      s->real_layer = layer;
      s->real_zslice = zslice;
      s->real_level = surf_tmpl->u.tex.level;
   }
@ -272,19 +316,189 @@ svga_create_surface(struct pipe_context *pipe,
 }


+static struct pipe_surface *
+svga_create_surface(struct pipe_context *pipe,
+                    struct pipe_resource *pt,
+                    const struct pipe_surface *surf_tmpl)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct pipe_screen *screen = pipe->screen;
+   boolean view = FALSE;
+
+   if (svga_screen(screen)->debug.force_surface_view)
+      view = TRUE;
+
+   if (surf_tmpl->u.tex.level != 0 &&
+       svga_screen(screen)->debug.force_level_surface_view)
+      view = TRUE;
+
+   if (pt->target == PIPE_TEXTURE_3D)
+      view = TRUE;
+
+   if (svga_have_vgpu10(svga) || svga_screen(screen)->debug.no_surface_view)
+      view = FALSE;
+
+   return svga_create_surface_view(pipe, pt, surf_tmpl, view);
+}
+
+
+/**
+ * Clone the surface view and its associated resource.
+ */
+static struct svga_surface *
+create_backed_surface_view(struct svga_context *svga, struct svga_surface *s)
+{
+   struct svga_surface *bs = s->backed;
+
+   if (bs == NULL) {
+      struct svga_texture *tex = svga_texture(s->base.texture);
+      struct pipe_surface *backed_view;
+
+      backed_view = svga_create_surface_view(&svga->pipe,
+                                             &tex->b.b,
+                                             &s->base,
+                                             TRUE);
+
+      bs = svga_surface(backed_view);
+      s->backed = bs;
+   }
+
+   svga_mark_surface_dirty(&bs->base);
+
+   return bs;
+}
+
+/**
+ * Create a DX RenderTarget/DepthStencil View for the given surface,
+ * if needed.
+ */
+struct pipe_surface *
+svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s)
+{
+   enum pipe_error ret = PIPE_OK;
+   int try;
+
+   assert(svga_have_vgpu10(svga));
+
+   /**
+    * DX spec explicitly specifies that no resource can be bound to a render
+    * target view and a shader resource view simultanously.
+    * So first check if the resource bound to this surface view collides with
+    * a sampler view. If so, then we will clone this surface view and its
+    * associated resource. We will then use the cloned surface view for
+    * render target.
+    */
+   if (svga_check_sampler_view_resource_collision(svga, s->handle)) {
+      SVGA_DBG(DEBUG_VIEWS,
+               "same resource used in shaderResource and renderTarget 0x%x\n",
+               s->handle);
+      s = create_backed_surface_view(svga, s);
+   }
+
+   if (s->view_id == SVGA3D_INVALID_ID) {
+      SVGA3dResourceType resType;
+      SVGA3dRenderTargetViewDesc desc;
+
+      desc.tex.mipSlice = s->real_level;
+      desc.tex.firstArraySlice = s->real_layer + s->real_zslice;
+      desc.tex.arraySize =
+         s->base.u.tex.last_layer - s->base.u.tex.first_layer + 1;
+
+      s->view_id = util_bitmask_add(svga->surface_view_id_bm);
+
+      switch (s->base.texture->target) {
+      case PIPE_TEXTURE_1D:
+      case PIPE_TEXTURE_1D_ARRAY:
+         resType = SVGA3D_RESOURCE_TEXTURE1D;
+         break;
+      case PIPE_TEXTURE_RECT:
+      case PIPE_TEXTURE_2D:
+      case PIPE_TEXTURE_2D_ARRAY:
+      case PIPE_TEXTURE_CUBE:
+         /* drawing to cube map is treated as drawing to 2D array */
+         resType = SVGA3D_RESOURCE_TEXTURE2D;
+         break;
+      case PIPE_TEXTURE_3D:
+         resType = SVGA3D_RESOURCE_TEXTURE3D;
+         break;
+      default:
+         assert(!"Unexpected texture target");
+         resType = SVGA3D_RESOURCE_TEXTURE2D;
+      }
+
+      for (try = 0; try < 2; try++) {
+         if (util_format_is_depth_or_stencil(s->base.format)) {
+            ret = SVGA3D_vgpu10_DefineDepthStencilView(svga->swc,
+                                                       s->view_id,
+                                                       s->handle,
+                                                       s->key.format,
+                                                       resType,
+                                                       &desc);
+         }
+         else {
+            ret = SVGA3D_vgpu10_DefineRenderTargetView(svga->swc,
+                                                       s->view_id,
+                                                       s->handle,
+                                                       s->key.format,
+                                                       resType,
+                                                       &desc);
+         }
+         if (ret == PIPE_OK)
+            break;
+         svga_context_flush(svga, NULL);
+      }
+
+      assert(ret == PIPE_OK);
+      if (ret != PIPE_OK) {
+         util_bitmask_clear(svga->surface_view_id_bm, s->view_id);
+         s->view_id = SVGA3D_INVALID_ID;
+      }
+   }
+   return &s->base;
+}
+
+
+
 static void
 svga_surface_destroy(struct pipe_context *pipe,
                     struct pipe_surface *surf)
 {
+   struct svga_context *svga = svga_context(pipe);
   struct svga_surface *s = svga_surface(surf);
   struct svga_texture *t = svga_texture(surf->texture);
   struct svga_screen *ss = svga_screen(surf->texture->screen);
+   enum pipe_error ret = PIPE_OK;
+
+   /* Destroy the backed view surface if it exists */
+   if (s->backed) {
+      svga_surface_destroy(pipe, &s->backed->base);
+      s->backed = NULL;
+   }

   if (s->handle != t->handle) {
      SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle);
      svga_screen_surface_destroy(ss, &s->key, &s->handle);
   }

+   if (s->view_id != SVGA3D_INVALID_ID) {
+      unsigned try;
+
+      assert(svga_have_vgpu10(svga));
+      for (try = 0; try < 2; try++) {
+         if (util_format_is_depth_or_stencil(s->base.format)) {
+            ret = SVGA3D_vgpu10_DestroyDepthStencilView(svga->swc, s->view_id);
+         }
+         else {
+            ret = SVGA3D_vgpu10_DestroyRenderTargetView(svga->swc, s->view_id);
+         }
+         if (ret == PIPE_OK)
+            break;
+         svga_context_flush(svga, NULL);
+      }
+      assert(ret == PIPE_OK);
+      util_bitmask_clear(svga->surface_view_id_bm, s->view_id);
+   }
+
   pipe_resource_reference(&surf->texture, NULL);
   FREE(surf);
 }
@ -302,11 +516,8 @@ svga_mark_surface_dirty(struct pipe_surface *surf)

      if (s->handle == tex->handle) {
         /* hmm so 3d textures always have all their slices marked ? */
-         if (surf->texture->target == PIPE_TEXTURE_CUBE)
-            svga_define_texture_level(tex, surf->u.tex.first_layer,
-                                      surf->u.tex.level);
-         else
-            svga_define_texture_level(tex, 0, surf->u.tex.level);
+         svga_define_texture_level(tex, surf->u.tex.first_layer,
+                                   surf->u.tex.level);
      }
      else {
         /* this will happen later in svga_propagate_surface */
@ -345,18 +556,26 @@ svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf)
   struct svga_surface *s = svga_surface(surf);
   struct svga_texture *tex = svga_texture(surf->texture);
   struct svga_screen *ss = svga_screen(surf->texture->screen);
-   unsigned zslice, face;
+   unsigned zslice, layer;
+   unsigned nlayers = 1;
+   unsigned i;

   if (!s->dirty)
      return;

   if (surf->texture->target == PIPE_TEXTURE_CUBE) {
      zslice = 0;
-      face = surf->u.tex.first_layer;
+      layer = surf->u.tex.first_layer;
+   }
+   else if (surf->texture->target == PIPE_TEXTURE_1D_ARRAY ||
+            surf->texture->target == PIPE_TEXTURE_2D_ARRAY) {
+      zslice = 0;
+      layer = surf->u.tex.first_layer;
+      nlayers = surf->u.tex.last_layer - surf->u.tex.first_layer + 1;
   }
   else {
      zslice = surf->u.tex.first_layer;
-      face = 0;
+      layer = 0;
   }

   s->dirty = FALSE;
@ -367,12 +586,14 @@ svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf)
      SVGA_DBG(DEBUG_VIEWS,
               "svga: Surface propagate: tex %p, level %u, from %p\n",
               tex, surf->u.tex.level, surf);
-      svga_texture_copy_handle(svga,
-                               s->handle, 0, 0, 0, s->real_level, s->real_face,
-                               tex->handle, 0, 0, zslice, surf->u.tex.level, face,
-                               u_minify(tex->b.b.width0, surf->u.tex.level),
-                               u_minify(tex->b.b.height0, surf->u.tex.level), 1);
-      svga_define_texture_level(tex, face, surf->u.tex.level);
+      for (i = 0; i < nlayers; i++) {
+         svga_texture_copy_handle(svga,
+                                  s->handle, 0, 0, 0, s->real_level, s->real_layer + i,
+                                  tex->handle, 0, 0, zslice, surf->u.tex.level, layer + i,
+                                  u_minify(tex->b.b.width0, surf->u.tex.level),
+                                  u_minify(tex->b.b.height0, surf->u.tex.level), 1);
+         svga_define_texture_level(tex, layer + i, surf->u.tex.level);
+      }
   }
 }

@ -390,10 +611,76 @@ svga_surface_needs_propagation(const struct pipe_surface *surf)
 }


+static void
+svga_get_sample_position(struct pipe_context *context,
+                         unsigned sample_count, unsigned sample_index,
+                         float *pos_out)
+{
+   /* We can't actually query the device to learn the sample positions.
+    * These were grabbed from nvidia's driver.
+    */
+   static const float pos1[1][2] = {
+      { 0.5, 0.5 }
+   };
+   static const float pos4[4][2] = {
+      { 0.375000, 0.125000 },
+      { 0.875000, 0.375000 },
+      { 0.125000, 0.625000 },
+      { 0.625000, 0.875000 }
+   };
+   static const float pos8[8][2] = {
+      { 0.562500, 0.312500 },
+      { 0.437500, 0.687500 },
+      { 0.812500, 0.562500 },
+      { 0.312500, 0.187500 },
+      { 0.187500, 0.812500 },
+      { 0.062500, 0.437500 },
+      { 0.687500, 0.937500 },
+      { 0.937500, 0.062500 }
+   };
+   static const float pos16[16][2] = {
+      { 0.187500, 0.062500 },
+      { 0.437500, 0.187500 },
+      { 0.062500, 0.312500 },
+      { 0.312500, 0.437500 },
+      { 0.687500, 0.062500 },
+      { 0.937500, 0.187500 },
+      { 0.562500, 0.312500 },
+      { 0.812500, 0.437500 },
+      { 0.187500, 0.562500 },
+      { 0.437500, 0.687500 },
+      { 0.062500, 0.812500 },
+      { 0.312500, 0.937500 },
+      { 0.687500, 0.562500 },
+      { 0.937500, 0.687500 },
+      { 0.562500, 0.812500 },
+      { 0.812500, 0.937500 }
+   };
+   const float (*positions)[2];
+
+   switch (sample_count) {
+   case 4:
+      positions = pos4;
+      break;
+   case 8:
+      positions = pos8;
+      break;
+   case 16:
+      positions = pos16;
+      break;
+   default:
+      positions = pos1;
+   }
+
+   pos_out[0] = positions[sample_index][0];
+   pos_out[1] = positions[sample_index][1];
+}
+

 void
 svga_init_surface_functions(struct svga_context *svga)
 {
   svga->pipe.create_surface = svga_create_surface;
   svga->pipe.surface_destroy = svga_surface_destroy;
+   svga->pipe.get_sample_position = svga_get_sample_position;
 }
--- a/src/gallium/drivers/svga/svga_surface.h
+++ b/src/gallium/drivers/svga/svga_surface.h
@ -47,11 +47,15 @@ struct svga_surface
   struct svga_host_surface_cache_key key;
   struct svga_winsys_surface *handle;

-   unsigned real_face;
+   unsigned real_layer;
   unsigned real_level;
   unsigned real_zslice;

   boolean dirty;
+
+   /* VGPU10 */
+   SVGA3dRenderTargetViewId view_id;
+   struct svga_surface *backed;
 };


@ -64,11 +68,13 @@ svga_surface_needs_propagation(const struct pipe_surface *surf);
 struct svga_winsys_surface *
 svga_texture_view_surface(struct svga_context *svga,
                          struct svga_texture *tex,
+                          unsigned bind_flags,
                          SVGA3dSurfaceFlags flags,
                          SVGA3dSurfaceFormat format,
                          unsigned start_mip,
                          unsigned num_mip,
-                          int face_pick,
+                          int layer_pick,
+                          unsigned num_layers,
                          int zslice_pick,
                          struct svga_host_surface_cache_key *key); /* OUT */

@ -99,4 +105,8 @@ svga_surface_const(const struct pipe_surface *surface)
   return (const struct svga_surface *)surface;
 }

+struct pipe_surface *
+svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s);
+
+
 #endif
--- a/src/gallium/drivers/svga/svga_swtnl_backend.c
+++ b/src/gallium/drivers/svga/svga_swtnl_backend.c
@ -40,6 +40,7 @@
 #include "svga_reg.h"
 #include "svga3d_reg.h"
 #include "svga_draw.h"
+#include "svga_shader.h"
 #include "svga_swtnl_private.h"


@ -129,9 +130,12 @@ svga_vbuf_render_map_vertices( struct vbuf_render *render )
                                         PIPE_TRANSFER_DISCARD_RANGE |
                                         PIPE_TRANSFER_UNSYNCHRONIZED,
                                         &svga_render->vbuf_transfer);
-      if (ptr)
+      if (ptr) {
+         svga_render->vbuf_ptr = ptr;
         return ptr + svga_render->vbuf_offset;
+      }
      else {
+         svga_render->vbuf_ptr = NULL;
         svga_render->vbuf_transfer = NULL;
         return NULL;
      }
@ -154,6 +158,18 @@ svga_vbuf_render_unmap_vertices( struct vbuf_render *render,

   offset = svga_render->vbuf_offset + svga_render->vertex_size * min_index;
   length = svga_render->vertex_size * (max_index + 1 - min_index);
+
+   if (0) {
+      /* dump vertex data */
+      const float *f = (const float *) ((char *) svga_render->vbuf_ptr +
+                                        svga_render->vbuf_offset);
+      unsigned i;
+      debug_printf("swtnl vertex data:\n");
+      for (i = 0; i < length / 4; i += 4) {
+         debug_printf("%u: %f %f %f %f\n", i, f[i], f[i+1], f[i+2], f[i+3]);
+      }
+   }
+
   pipe_buffer_flush_mapped_range(&svga->pipe,
 				  svga_render->vbuf_transfer,
 				  offset, length);
@ -178,6 +194,7 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
   SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
   enum pipe_error ret;
   unsigned i;
+   static const unsigned zero[PIPE_MAX_ATTRIBS] = {0};

   /* if the vdecl or vbuf hasn't changed do nothing */
   if (!svga->swtnl.new_vdecl)
@ -192,18 +209,27 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
      ret = svga_hwtnl_flush(svga->hwtnl);
      /* if we hit this path we might become synced with hw */
      svga->swtnl.new_vbuf = TRUE;
-      assert(ret == 0);
+      assert(ret == PIPE_OK);
   }

-   svga_hwtnl_reset_vdecl(svga->hwtnl, svga_render->vdecl_count);
-
   for (i = 0; i < svga_render->vdecl_count; i++) {
      vdecl[i].array.offset += svga_render->vdecl_offset;
+   }

-      svga_hwtnl_vdecl( svga->hwtnl,
-                        i,
-                        &vdecl[i],
-                        svga_render->vbuf );
+   svga_hwtnl_vertex_decls(svga->hwtnl,
+                           svga_render->vdecl_count,
+                           vdecl,
+                           zero,
+                           svga_render->layout_id);
+
+   /* Specify the vertex buffer (there's only ever one) */
+   {
+      struct pipe_vertex_buffer vb;
+      vb.buffer = svga_render->vbuf;
+      vb.buffer_offset = svga_render->vdecl_offset;
+      vb.stride = vdecl[0].array.stride;
+      vb.user_buffer = NULL;
+      svga_hwtnl_vertex_buffers(svga->hwtnl, 1, &vb);
   }

   /* We have already taken care of flatshading, so let the hwtnl
@ -211,15 +237,15 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
    */
   if (svga->state.sw.need_pipeline) {
      svga_hwtnl_set_flatshade(svga->hwtnl, FALSE, FALSE);
-      svga_hwtnl_set_unfilled(svga->hwtnl, PIPE_POLYGON_MODE_FILL);
+      svga_hwtnl_set_fillmode(svga->hwtnl, PIPE_POLYGON_MODE_FILL);
   }
   else {
      svga_hwtnl_set_flatshade( svga->hwtnl,
-                                svga->curr.rast->templ.flatshade,
+                                svga->curr.rast->templ.flatshade ||
+                                svga->state.hw_draw.fs->uses_flat_interp,
                                svga->curr.rast->templ.flatshade_first );

-      svga_hwtnl_set_unfilled( svga->hwtnl,
-                               svga->curr.rast->hw_unfilled );
+      svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
   }

   svga->swtnl.new_vdecl = FALSE;
@ -227,13 +253,15 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )

 static void
 svga_vbuf_render_draw_arrays( struct vbuf_render *render,
-                              unsigned start,
-                              uint nr )
+                              unsigned start, uint nr )
 {
   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
   struct svga_context *svga = svga_render->svga;
   unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
   enum pipe_error ret = PIPE_OK;
+   /* instancing will already have been resolved at this point by 'draw' */
+   const unsigned start_instance = 0;
+   const unsigned instance_count = 1;

   /* off to hardware */
   svga_vbuf_submit_state(svga_render);
@ -244,10 +272,13 @@ svga_vbuf_render_draw_arrays( struct vbuf_render *render,
    */
   svga_update_state_retry( svga, SVGA_STATE_HW_DRAW );

-   ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+   ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr,
+                                start_instance, instance_count);
   if (ret != PIPE_OK) {
      svga_context_flush(svga, NULL);
-      ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+      ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim,
+                                   start + bias, nr,
+                                   start_instance, instance_count);
      svga->swtnl.new_vbuf = TRUE;
      assert(ret == PIPE_OK);
   }
@ -265,6 +296,9 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
   int bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
   boolean ret;
   size_t size = 2 * nr_indices;
+   /* instancing will already have been resolved at this point by 'draw' */
+   const unsigned start_instance = 0;
+   const unsigned instance_count = 1;

   assert(( svga_render->vbuf_offset - svga_render->vdecl_offset) % svga_render->vertex_size == 0);
   
@ -299,7 +333,8 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
                                        svga_render->min_index,
                                        svga_render->max_index,
                                        svga_render->prim,
-                                        svga_render->ibuf_offset / 2, nr_indices);
+                                        svga_render->ibuf_offset / 2, nr_indices,
+                                        start_instance, instance_count);
   if(ret != PIPE_OK) {
      svga_context_flush(svga, NULL);
      ret = svga_hwtnl_draw_range_elements(svga->hwtnl,
@ -309,7 +344,9 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
                                           svga_render->min_index,
                                           svga_render->max_index,
                                           svga_render->prim,
-                                           svga_render->ibuf_offset / 2, nr_indices);
+                                           svga_render->ibuf_offset / 2,
+                                           nr_indices,
+                                           start_instance, instance_count);
      svga->swtnl.new_vbuf = TRUE;
      assert(ret == PIPE_OK);
   }
@ -349,6 +386,7 @@ svga_vbuf_render_create( struct svga_context *svga )
   svga_render->vbuf_size = 0;
   svga_render->ibuf_alloc_size = 4*1024;
   svga_render->vbuf_alloc_size = 64*1024;
+   svga_render->layout_id = SVGA3D_INVALID_ID;
   svga_render->base.max_vertex_buffer_bytes = 64*1024/10;
   svga_render->base.max_indices = 65536;
   svga_render->base.get_vertex_info = svga_vbuf_render_get_vertex_info;
--- a/src/gallium/drivers/svga/svga_swtnl_draw.c
+++ b/src/gallium/drivers/svga/svga_swtnl_draw.c
@ -42,9 +42,9 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
 {
   struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = { 0 };
   struct pipe_transfer *ib_transfer = NULL;
-   struct pipe_transfer *cb_transfer = NULL;
+   struct pipe_transfer *cb_transfer[SVGA_MAX_CONST_BUFS] = { 0 };
   struct draw_context *draw = svga->swtnl.draw;
-   unsigned i;
+   unsigned i, old_num_vertex_buffers;
   const void *map;
   enum pipe_error ret;

@ -76,6 +76,7 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
         draw_set_mapped_vertex_buffer(draw, i, map, ~0);
      }
   }
+   old_num_vertex_buffers = svga->curr.num_vertex_buffers;

   /* Map index buffer, if present */
   map = NULL;
@ -88,16 +89,21 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
                       svga->curr.ib.index_size, ~0);
   }

-   if (svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer) {
+   /* Map constant buffers */
+   for (i = 0; i < Elements(svga->curr.constbufs[PIPE_SHADER_VERTEX]); ++i) {
+      if (svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer == NULL) {
+         continue;
+      }
+
      map = pipe_buffer_map(&svga->pipe,
-                            svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer,
+                            svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer,
                            PIPE_TRANSFER_READ,
-			    &cb_transfer);
+                            &cb_transfer[i]);
      assert(map);
      draw_set_mapped_constant_buffer(
-         draw, PIPE_SHADER_VERTEX, 0,
+         draw, PIPE_SHADER_VERTEX, i,
         map,
-         svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer->width0);
+         svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer->width0);
   }

   draw_vbo(draw, info);
@ -105,8 +111,8 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
   draw_flush(svga->swtnl.draw);

   /* Ensure the draw module didn't touch this */
-   assert(i == svga->curr.num_vertex_buffers);
-   
+   assert(old_num_vertex_buffers == svga->curr.num_vertex_buffers);
+
   /*
    * unmap vertex/index buffers
    */
@ -122,8 +128,10 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
      draw_set_indexes(draw, NULL, 0, 0);
   }

-   if (svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer) {
-      pipe_buffer_unmap(&svga->pipe, cb_transfer);
+   for (i = 0; i < Elements(svga->curr.constbufs[PIPE_SHADER_VERTEX]); ++i) {
+      if (svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer) {
+         pipe_buffer_unmap(&svga->pipe, cb_transfer[i]);
+      }
   }

   /* Now safe to remove the need_swtnl flag in any update_state call */
@ -167,9 +175,6 @@ boolean svga_init_swtnl( struct svga_context *svga )
   if (!screen->haveLineSmooth)
      draw_install_aaline_stage(svga->swtnl.draw, &svga->pipe);

-   /* always install polygon stipple stage */
-   draw_install_pstipple_stage(svga->swtnl.draw, &svga->pipe);
-
   /* enable/disable line stipple stage depending on device caps */
   draw_enable_line_stipple(svga->swtnl.draw, !screen->haveLineStipple);

--- a/src/gallium/drivers/svga/svga_swtnl_private.h
+++ b/src/gallium/drivers/svga/svga_swtnl_private.h
@ -43,6 +43,8 @@ struct svga_vbuf_render {

   unsigned vertex_size;

+   SVGA3dElementLayoutId layout_id; /**< current element layout id */
+
   unsigned prim;

   struct pipe_resource *vbuf;
@ -50,6 +52,8 @@ struct svga_vbuf_render {
   struct pipe_transfer *vbuf_transfer;
   struct pipe_transfer *ibuf_transfer;

+   void *vbuf_ptr;
+
   /* current size of buffer */
   size_t vbuf_size;
   size_t ibuf_size;
--- a/src/gallium/drivers/svga/svga_swtnl_state.c
+++ b/src/gallium/drivers/svga/svga_swtnl_state.c
@ -25,10 +25,13 @@

 #include "draw/draw_context.h"
 #include "draw/draw_vbuf.h"
+#include "util/u_bitmask.h"
 #include "util/u_inlines.h"
 #include "pipe/p_state.h"

+#include "svga_cmd.h"
 #include "svga_context.h"
+#include "svga_shader.h"
 #include "svga_swtnl.h"
 #include "svga_state.h"
 #include "svga_tgsi.h"
@ -51,30 +54,37 @@ static void set_draw_viewport( struct svga_context *svga )
   float adjx = 0.0f;
   float adjy = 0.0f;

-   switch (svga->curr.reduced_prim) {
-   case PIPE_PRIM_POINTS:
-      adjx = SVGA_POINT_ADJ_X;
-      adjy = SVGA_POINT_ADJ_Y;
-      break;
-   case PIPE_PRIM_LINES:
-      /* XXX: This is to compensate for the fact that wide lines are
-       * going to be drawn with triangles, but we're not catching all
-       * cases where that will happen.
-       */
-      if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES)
-      {
-         adjx = SVGA_LINE_ADJ_X + 0.175f;
-         adjy = SVGA_LINE_ADJ_Y - 0.175f;
+   if (svga_have_vgpu10(svga)) {
+      if (svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES) {
+         adjy = 0.25;
      }
-      else {
-         adjx = SVGA_LINE_ADJ_X;
-         adjy = SVGA_LINE_ADJ_Y;
+   }
+   else {
+      switch (svga->curr.reduced_prim) {
+      case PIPE_PRIM_POINTS:
+         adjx = SVGA_POINT_ADJ_X;
+         adjy = SVGA_POINT_ADJ_Y;
+         break;
+      case PIPE_PRIM_LINES:
+         /* XXX: This is to compensate for the fact that wide lines are
+          * going to be drawn with triangles, but we're not catching all
+          * cases where that will happen.
+          */
+         if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES)
+         {
+            adjx = SVGA_LINE_ADJ_X + 0.175f;
+            adjy = SVGA_LINE_ADJ_Y - 0.175f;
+         }
+         else {
+            adjx = SVGA_LINE_ADJ_X;
+            adjy = SVGA_LINE_ADJ_Y;
+         }
+         break;
+      case PIPE_PRIM_TRIANGLES:
+         adjx += SVGA_TRIANGLE_ADJ_X;
+         adjy += SVGA_TRIANGLE_ADJ_Y;
+         break;
      }
-      break;
-   case PIPE_PRIM_TRIANGLES:
-      adjx += SVGA_TRIANGLE_ADJ_X;
-      adjy += SVGA_TRIANGLE_ADJ_Y;
-      break;
   }

   vp.translate[0] += adjx;
@ -150,6 +160,59 @@ struct svga_tracked_state svga_update_swtnl_draw =
 };


+static SVGA3dSurfaceFormat
+translate_vertex_format(SVGA3dDeclType format)
+{
+   switch (format) {
+   case SVGA3D_DECLTYPE_FLOAT1:
+      return SVGA3D_R32_FLOAT;
+   case SVGA3D_DECLTYPE_FLOAT2:
+      return SVGA3D_R32G32_FLOAT;
+   case SVGA3D_DECLTYPE_FLOAT3:
+      return SVGA3D_R32G32B32_FLOAT;
+   case SVGA3D_DECLTYPE_FLOAT4:
+      return SVGA3D_R32G32B32A32_FLOAT;
+   default:
+      assert(!"Unexpected format in translate_vertex_format()");
+      return SVGA3D_R32G32B32A32_FLOAT;
+   }
+}
+
+
+static SVGA3dElementLayoutId
+svga_vdecl_to_input_element(struct svga_context *svga,
+                            const SVGA3dVertexDecl *vdecl, unsigned num_decls)
+{
+   SVGA3dElementLayoutId id;
+   SVGA3dInputElementDesc elements[PIPE_MAX_ATTRIBS];
+   enum pipe_error ret;
+   unsigned i;
+
+   assert(num_decls <= PIPE_MAX_ATTRIBS);
+   assert(svga_have_vgpu10(svga));
+
+   for (i = 0; i < num_decls; i++) {
+      elements[i].inputSlot = 0; /* vertex buffer index */
+      elements[i].alignedByteOffset = vdecl[i].array.offset;
+      elements[i].format = translate_vertex_format(vdecl[i].identity.type);
+      elements[i].inputSlotClass = SVGA3D_INPUT_PER_VERTEX_DATA;
+      elements[i].instanceDataStepRate = 0;
+      elements[i].inputRegister = i;
+   }
+
+   id = util_bitmask_add(svga->input_element_object_id_bm);
+
+   ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, id, elements);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, id, elements);
+      assert(ret == PIPE_OK);
+   }
+
+   return id;
+}
+
+
 enum pipe_error
 svga_swtnl_update_vdecl( struct svga_context *svga )
 {
@ -164,16 +227,19 @@ svga_swtnl_update_vdecl( struct svga_context *svga )
   int nr_decls = 0;
   int src;
   unsigned i;
+   int any_change;

   memset(vinfo, 0, sizeof(*vinfo));
   memset(vdecl, 0, sizeof(vdecl));

   draw_prepare_shader_outputs(draw);
+
   /* always add position */
   src = draw_find_shader_output(draw, TGSI_SEMANTIC_POSITION, 0);
   draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
   vinfo->attrib[0].emit = EMIT_4F;
   vdecl[0].array.offset = offset;
+   vdecl[0].identity.method = SVGA3D_DECLMETHOD_DEFAULT;
   vdecl[0].identity.type = SVGA3D_DECLTYPE_FLOAT4;
   vdecl[0].identity.usage = SVGA3D_DECLUSAGE_POSITIONT;
   vdecl[0].identity.usageIndex = 0;
@ -225,16 +291,67 @@ svga_swtnl_update_vdecl( struct svga_context *svga )
   draw_compute_vertex_size(vinfo);

   svga_render->vdecl_count = nr_decls;
-   for (i = 0; i < svga_render->vdecl_count; i++)
+   for (i = 0; i < svga_render->vdecl_count; i++) {
      vdecl[i].array.stride = offset;
+   }

-   if (memcmp(svga_render->vdecl, vdecl, sizeof(vdecl)) == 0)
-      return PIPE_OK;
+   any_change = memcmp(svga_render->vdecl, vdecl, sizeof(vdecl));
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret;
+
+      if (!any_change && svga_render->layout_id != SVGA3D_INVALID_ID) {
+         return PIPE_OK;
+      }
+
+      if (svga_render->layout_id != SVGA3D_INVALID_ID) {
+         /* destroy old */
+         ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc,
+                                                  svga_render->layout_id);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc,
+                                                     svga_render->layout_id);
+            assert(ret == PIPE_OK);
+         }
+
+         /**
+          * reset current layout id state after the element layout is
+          * destroyed, so that if a new layout has the same layout id, we
+          * will know to re-issue the SetInputLayout command.
+          */
+         if (svga->state.hw_draw.layout_id == svga_render->layout_id)
+            svga->state.hw_draw.layout_id = SVGA3D_INVALID_ID;
+
+         util_bitmask_clear(svga->input_element_object_id_bm,
+                            svga_render->layout_id);
+      }
+
+      svga_render->layout_id =
+         svga_vdecl_to_input_element(svga, vdecl, nr_decls);
+
+      /* bind new */
+      if (svga->state.hw_draw.layout_id != svga_render->layout_id) {
+         ret = SVGA3D_vgpu10_SetInputLayout(svga->swc, svga_render->layout_id);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_SetInputLayout(svga->swc,
+                                               svga_render->layout_id);
+            assert(ret == PIPE_OK);
+         }
+
+         svga->state.hw_draw.layout_id = svga_render->layout_id;
+      }
+   }
+   else {
+      if (!any_change)
+         return PIPE_OK;
+   }

   memcpy(svga_render->vdecl, vdecl, sizeof(vdecl));
   svga->swtnl.new_vdecl = TRUE;

-   return PIPE_OK;
+   return 0;
 }


--- a/src/gallium/drivers/svga/svga_tgsi.c
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@ -37,6 +37,7 @@
 #include "svgadump/svga_shader_dump.h"

 #include "svga_context.h"
+#include "svga_shader.h"
 #include "svga_tgsi.h"
 #include "svga_tgsi_emit.h"
 #include "svga_debug.h"
@ -165,97 +166,6 @@ svga_shader_emit_header(struct svga_shader_emitter *emit)
 }


-/**
- * Use the shader info to generate a bitmask indicating which generic
- * inputs are used by the shader.  A set bit indicates that GENERIC[i]
- * is used.
- */
-unsigned
-svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
-{
-   unsigned i, mask = 0x0;
-
-   for (i = 0; i < info->num_inputs; i++) {
-      if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
-         unsigned j = info->input_semantic_index[i];
-         assert(j < sizeof(mask) * 8);
-         mask |= 1 << j;
-      }
-   }
-
-   return mask;
-}
-
-
-/**
- * Given a mask of used generic variables (as returned by the above functions)
- * fill in a table which maps those indexes to small integers.
- * This table is used by the remap_generic_index() function in
- * svga_tgsi_decl_sm30.c
- * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
- * GENERIC[3] are used.  The remap_table will contain:
- *   table[1] = 0;
- *   table[3] = 1;
- * The remaining table entries will be filled in with the next unused
- * generic index (in this example, 2).
- */
-void
-svga_remap_generics(unsigned generics_mask,
-                    int8_t remap_table[MAX_GENERIC_VARYING])
-{
-   /* Note texcoord[0] is reserved so start at 1 */
-   unsigned count = 1, i;
-
-   for (i = 0; i < MAX_GENERIC_VARYING; i++) {
-      remap_table[i] = -1;
-   }
-
-   /* for each bit set in generic_mask */
-   while (generics_mask) {
-      unsigned index = ffs(generics_mask) - 1;
-      remap_table[index] = count++;
-      generics_mask &= ~(1 << index);
-   }
-}
-
-
-/**
- * Use the generic remap table to map a TGSI generic varying variable
- * index to a small integer.  If the remapping table doesn't have a
- * valid value for the given index (the table entry is -1) it means
- * the fragment shader doesn't use that VS output.  Just allocate
- * the next free value in that case.  Alternately, we could cull
- * VS instructions that write to register, or replace the register
- * with a dummy temp register.
- * XXX TODO: we should do one of the later as it would save precious
- * texcoord registers.
- */
-int
-svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
-                         int generic_index)
-{
-   assert(generic_index < MAX_GENERIC_VARYING);
-
-   if (generic_index >= MAX_GENERIC_VARYING) {
-      /* just don't return a random/garbage value */
-      generic_index = MAX_GENERIC_VARYING - 1;
-   }
-
-   if (remap_table[generic_index] == -1) {
-      /* This is a VS output that has no matching PS input.  Find a
-       * free index.
-       */
-      int i, max = 0;
-      for (i = 0; i < MAX_GENERIC_VARYING; i++) {
-         max = MAX2(max, remap_table[i]);
-      }
-      remap_table[generic_index] = max + 1;
-   }
-
-   return remap_table[generic_index];
-}
-
-
 /**
 * Parse TGSI shader and translate to SVGA/DX9 serialized
 * representation.
@ -264,9 +174,9 @@ svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
 * can be dynamically grown.  Once we've finished and know how large
 * it is, it will be copied to a hardware buffer for upload.
 */
-static struct svga_shader_variant *
-svga_tgsi_translate(const struct svga_shader *shader,
-                    const struct svga_compile_key *key, unsigned unit)
+struct svga_shader_variant *
+svga_tgsi_vgpu9_translate(const struct svga_shader *shader,
+                          const struct svga_compile_key *key, unsigned unit)
 {
   struct svga_shader_variant *variant = NULL;
   struct svga_shader_emitter emit;
@ -288,10 +198,10 @@ svga_tgsi_translate(const struct svga_shader *shader,
   emit.imm_start = emit.info.file_max[TGSI_FILE_CONSTANT] + 1;

   if (unit == PIPE_SHADER_FRAGMENT)
-      emit.imm_start += key->fkey.num_unnormalized_coords;
+      emit.imm_start += key->num_unnormalized_coords;

   if (unit == PIPE_SHADER_VERTEX) {
-      emit.imm_start += key->vkey.need_prescale ? 2 : 0;
+      emit.imm_start += key->vs.need_prescale ? 2 : 0;
   }

   emit.nr_hw_float_const =
@ -327,7 +237,11 @@ svga_tgsi_translate(const struct svga_shader *shader,
   memcpy(&variant->key, key, sizeof(*key));
   variant->id = UTIL_BITMASK_INVALID_INDEX;

-   if (SVGA_DEBUG & DEBUG_TGSI) {
+   variant->pstipple_sampler_unit = emit.pstipple_sampler_unit;
+
+#if 0
+   if (!svga_shader_verify(variant->tokens, variant->nr_tokens) ||
+       SVGA_DEBUG & DEBUG_TGSI) {
      debug_printf("#####################################\n");
      debug_printf("Shader %u below\n", shader->id);
      tgsi_dump(shader->tokens, 0);
@ -337,6 +251,7 @@ svga_tgsi_translate(const struct svga_shader *shader,
      }
      debug_printf("#####################################\n");
   }
+#endif

   return variant;

@ -345,39 +260,3 @@ svga_tgsi_translate(const struct svga_shader *shader,
   FREE(emit.buf);
   return NULL;
 }
-
-
-struct svga_shader_variant *
-svga_translate_fragment_program(const struct svga_fragment_shader *fs,
-                                const struct svga_fs_compile_key *fkey)
-{
-   struct svga_compile_key key;
-
-   memset(&key, 0, sizeof(key));
-
-   memcpy(&key.fkey, fkey, sizeof *fkey);
-
-   memcpy(key.generic_remap_table, fs->generic_remap_table,
-          sizeof(fs->generic_remap_table));
-
-   return svga_tgsi_translate(&fs->base, &key, PIPE_SHADER_FRAGMENT);
-}
-
-
-struct svga_shader_variant *
-svga_translate_vertex_program(const struct svga_vertex_shader *vs,
-                              const struct svga_vs_compile_key *vkey)
-{
-   struct svga_compile_key key;
-
-   memset(&key, 0, sizeof(key));
-
-   memcpy(&key.vkey, vkey, sizeof *vkey);
-
-   /* Note: we could alternately store the remap table in the vkey but
-    * that would make it larger.  We just regenerate it here instead.
-    */
-   svga_remap_generics(vkey->fs_generic_inputs, key.generic_remap_table);
-
-   return svga_tgsi_translate(&vs->base, &key, PIPE_SHADER_VERTEX);
-}
--- a/src/gallium/drivers/svga/svga_tgsi.h
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@ -26,94 +26,16 @@
 #ifndef SVGA_TGSI_H
 #define SVGA_TGSI_H

-#include "pipe/p_state.h"
-
-#include "svga_hw_reg.h"
+#include "pipe/p_compiler.h"
+#include "svga3d_reg.h"


-/**
- * We use a 32-bit mask to keep track of the generic indexes.
- */
-#define MAX_GENERIC_VARYING 32
+#define MAX_VGPU10_ADDR_REGS 2

-
-struct svga_fragment_shader;
-struct svga_vertex_shader;
+struct svga_compile_key;
+struct svga_context;
 struct svga_shader;
-struct tgsi_shader_info;
-struct tgsi_token;
-
-
-struct svga_vs_compile_key
-{
-   unsigned fs_generic_inputs;
-   unsigned need_prescale:1;
-   unsigned allow_psiz:1;
-   unsigned adjust_attrib_range:16;
-   unsigned adjust_attrib_w_1:16;
-};
-
-struct svga_fs_compile_key
-{
-   unsigned light_twoside:1;
-   unsigned front_ccw:1;
-   unsigned white_fragments:1;
-   unsigned write_color0_to_n_cbufs:3;
-   unsigned num_textures:8;
-   unsigned num_unnormalized_coords:8;
-   unsigned sprite_origin_lower_left:1;
-   struct {
-      unsigned compare_mode:1;
-      unsigned compare_func:3;
-      unsigned unnormalized:1;
-      unsigned width_height_idx:7;
-      unsigned texture_target:8;
-      unsigned sprite_texgen:1;
-      unsigned swizzle_r:3;
-      unsigned swizzle_g:3;
-      unsigned swizzle_b:3;
-      unsigned swizzle_a:3;
-   } tex[PIPE_MAX_SAMPLERS];
-};
-
-/**
- * Key/index for identifying shader variants.
- */
-struct svga_compile_key {
-   struct svga_vs_compile_key vkey;
-   struct svga_fs_compile_key fkey;
-   int8_t generic_remap_table[MAX_GENERIC_VARYING];
-};
-
-
-/**
- * A single TGSI shader may be compiled into different variants of
- * SVGA3D shaders depending on the compile key.  Each user shader
- * will have a linked list of these variants.
- */
-struct svga_shader_variant
-{
-   const struct svga_shader *shader;
-
-   /** Parameters used to generate this variant */
-   struct svga_compile_key key;
-
-   /* Compiled shader tokens:
-    */
-   const unsigned *tokens;
-   unsigned nr_tokens;
-
-   /** Per-context shader identifier used with SVGA_3D_CMD_SHADER_DEFINE,
-    * SVGA_3D_CMD_SET_SHADER and SVGA_3D_CMD_SHADER_DESTROY.
-    */
-   unsigned id;
-   
-   /* GB object buffer containing the bytecode */
-   struct svga_winsys_gb_shader *gb_shader;
-
-   /** Next variant */
-   struct svga_shader_variant *next;
-};
+struct svga_shader_variant;


 /* TGSI doesn't provide use with VS input semantics (they're actually
@ -140,37 +62,16 @@ static inline void svga_generate_vdecl_semantics( unsigned idx,



-static inline unsigned svga_vs_key_size( const struct svga_vs_compile_key *key )
-{
-   return sizeof *key;
-}
-
-static inline unsigned svga_fs_key_size( const struct svga_fs_compile_key *key )
-{
-   return (const char *)&key->tex[key->num_textures] - (const char *)key;
-}
+struct svga_shader_variant *
+svga_tgsi_vgpu9_translate(const struct svga_shader *shader,
+                          const struct svga_compile_key *key, unsigned unit);

 struct svga_shader_variant *
-svga_translate_fragment_program( const struct svga_fragment_shader *fs,
-                                 const struct svga_fs_compile_key *fkey );
+svga_tgsi_vgpu10_translate(struct svga_context *svga,
+                           const struct svga_shader *shader,
+                           const struct svga_compile_key *key,
+                           unsigned unit);

-struct svga_shader_variant *
-svga_translate_vertex_program( const struct svga_vertex_shader *fs,
-                               const struct svga_vs_compile_key *vkey );
-
-
-unsigned
-svga_get_generic_inputs_mask(const struct tgsi_shader_info *info);
-
-unsigned
-svga_get_generic_outputs_mask(const struct tgsi_shader_info *info);
-
-void
-svga_remap_generics(unsigned generics_mask,
-                    int8_t remap_table[MAX_GENERIC_VARYING]);
-
-int
-svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
-                         int generic_index);
+boolean svga_shader_verify(const uint32_t *tokens, unsigned nr_tokens);

 #endif
--- a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
+++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
@ -216,7 +216,7 @@ ps30_input(struct svga_shader_emitter *emit,

      return emit_decl( emit, reg, 0, 0 );
   }
-   else if (emit->key.fkey.light_twoside &&
+   else if (emit->key.fs.light_twoside &&
            (semantic.Name == TGSI_SEMANTIC_COLOR)) {

      if (!translate_vs_ps_semantic( emit, semantic, &usage, &index ))
@ -285,9 +285,9 @@ ps30_input(struct svga_shader_emitter *emit,
         return FALSE;

      if (semantic.Name == TGSI_SEMANTIC_GENERIC &&
-          emit->key.fkey.sprite_origin_lower_left &&
+          emit->key.sprite_origin_lower_left &&
          index >= 1 &&
-          emit->key.fkey.tex[index - 1].sprite_texgen) {
+          emit->key.tex[index - 1].sprite_texgen) {
         /* This is a sprite texture coord with lower-left origin.
          * We need to invert the texture T coordinate since the SVGA3D
          * device only supports an upper-left origin.
@ -329,7 +329,7 @@ ps30_output(struct svga_shader_emitter *emit,
   switch (semantic.Name) {
   case TGSI_SEMANTIC_COLOR:
      if (emit->unit == PIPE_SHADER_FRAGMENT) {
-         if (emit->key.fkey.white_fragments) {
+         if (emit->key.fs.white_fragments) {
            /* Used for XOR logicop mode */
            emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
                                                  emit->nr_hw_temp++ );
@ -337,14 +337,14 @@ ps30_output(struct svga_shader_emitter *emit,
            emit->true_color_output[idx] = dst_register(SVGA3DREG_COLOROUT, 
                                                        semantic.Index);
         }
-         else if (emit->key.fkey.write_color0_to_n_cbufs) {
+         else if (emit->key.fs.write_color0_to_n_cbufs) {
            /* We'll write color output [0] to all render targets.
             * Prepare all the output registers here, but only when the
             * semantic.Index == 0 so we don't do this more than once.
             */
            if (semantic.Index == 0) {
               unsigned i;
-               for (i = 0; i < emit->key.fkey.write_color0_to_n_cbufs; i++) {
+               for (i = 0; i < emit->key.fs.write_color0_to_n_cbufs; i++) {
                  emit->output_map[idx+i] = dst_register(SVGA3DREG_TEMP,
                                                     emit->nr_hw_temp++);
                  emit->temp_color_output[i] = emit->output_map[idx+i];
@ -487,7 +487,7 @@ vs30_output(struct svga_shader_emitter *emit,
      /* This has the effect of not declaring psiz (below) and not 
       * emitting the final MOV to true_psiz in the postamble.
       */
-      if (!emit->key.vkey.allow_psiz)
+      if (!emit->key.vs.allow_psiz)
         return TRUE;

      emit->true_psiz = dcl.dst;
@ -517,7 +517,7 @@ vs30_output(struct svga_shader_emitter *emit,
 static ubyte
 svga_tgsi_sampler_type(const struct svga_shader_emitter *emit, int idx)
 {
-   switch (emit->key.fkey.tex[idx].texture_target) {
+   switch (emit->key.tex[idx].texture_target) {
   case PIPE_TEXTURE_1D:
      return SVGA3DSAMP_2D;
   case PIPE_TEXTURE_2D:
--- a/src/gallium/drivers/svga/svga_tgsi_emit.h
+++ b/src/gallium/drivers/svga/svga_tgsi_emit.h
@ -28,6 +28,7 @@

 #include "tgsi/tgsi_scan.h"
 #include "svga_hw_reg.h"
+#include "svga_shader.h"
 #include "svga_tgsi.h"
 #include "svga3d_shaderdefs.h"

@ -130,6 +131,8 @@ struct svga_shader_emitter
   struct svga_arl_consts arl_consts[12];
   int num_arl_consts;
   int current_arl;
+
+   unsigned pstipple_sampler_unit;
 };


--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@ -29,6 +29,7 @@
 #include "tgsi/tgsi_parse.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
+#include "util/u_pstipple.h"

 #include "svga_tgsi_emit.h"
 #include "svga_context.h"
@ -862,7 +863,7 @@ create_common_immediate( struct svga_shader_emitter *emit )
   idx++;

   /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */
-   if (emit->key.vkey.adjust_attrib_range) {
+   if (emit->key.vs.adjust_attrib_range) {
      if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
                           idx, 2.0f, 0.0f, 0.0f, 0.0f ))
         return FALSE;
@ -1015,7 +1016,7 @@ get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
   struct src_register reg;

   /* the width/height indexes start right after constants */
-   idx = emit->key.fkey.tex[sampler_num].width_height_idx +
+   idx = emit->key.tex[sampler_num].width_height_idx +
         emit->info.file_max[TGSI_FILE_CONSTANT] + 1;

   reg = src_register( SVGA3DREG_CONST, idx );
@ -1723,7 +1724,7 @@ emit_tex2(struct svga_shader_emitter *emit,
   texcoord = translate_src_register( emit, &insn->Src[0] );
   sampler = translate_src_register( emit, &insn->Src[1] );

-   if (emit->key.fkey.tex[sampler.base.num].unnormalized ||
+   if (emit->key.tex[sampler.base.num].unnormalized ||
       emit->dynamic_branching_level > 0)
      tmp = get_temp( emit );

@ -1755,7 +1756,7 @@ emit_tex2(struct svga_shader_emitter *emit,

   /* Explicit normalization of texcoords:
    */
-   if (emit->key.fkey.tex[sampler.base.num].unnormalized) {
+   if (emit->key.tex[sampler.base.num].unnormalized) {
      struct src_register wh = get_tex_dimensions( emit, sampler.base.num );

      /* MUL  tmp, SRC0, WH */
@ -1891,14 +1892,14 @@ emit_tex(struct svga_shader_emitter *emit,
   const unsigned unit = src1.base.num;

   /* check for shadow samplers */
-   boolean compare = (emit->key.fkey.tex[unit].compare_mode ==
+   boolean compare = (emit->key.tex[unit].compare_mode ==
                      PIPE_TEX_COMPARE_R_TO_TEXTURE);

   /* texture swizzle */
-   boolean swizzle = (emit->key.fkey.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
-                      emit->key.fkey.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
-                      emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
-                      emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
+   boolean swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
+                      emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
+                      emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
+                      emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);

   boolean saturate = insn->Instruction.Saturate;

@ -1965,7 +1966,7 @@ emit_tex(struct svga_shader_emitter *emit,

         /* Compare texture sample value against R component of texcoord */
         if (!emit_select(emit,
-                          emit->key.fkey.tex[unit].compare_func,
+                          emit->key.tex[unit].compare_func,
                          writemask( dst2, TGSI_WRITEMASK_XYZ ),
                          r_coord,
                          tex_src_x))
@ -1991,10 +1992,10 @@ emit_tex(struct svga_shader_emitter *emit,
      /* swizzle from tex_result to dst (handles saturation too, if any) */
      emit_tex_swizzle(emit,
                       dst, src(tex_result),
-                       emit->key.fkey.tex[unit].swizzle_r,
-                       emit->key.fkey.tex[unit].swizzle_g,
-                       emit->key.fkey.tex[unit].swizzle_b,
-                       emit->key.fkey.tex[unit].swizzle_a);
+                       emit->key.tex[unit].swizzle_r,
+                       emit->key.tex[unit].swizzle_g,
+                       emit->key.tex[unit].swizzle_b,
+                       emit->key.tex[unit].swizzle_a);
   }

   return TRUE;
@ -3113,7 +3114,7 @@ make_immediate(struct svga_shader_emitter *emit,
 static boolean
 emit_vs_preamble(struct svga_shader_emitter *emit)
 {
-   if (!emit->key.vkey.need_prescale) {
+   if (!emit->key.vs.need_prescale) {
      if (!make_immediate( emit, 0, 0, .5, .5,
                           &emit->imm_0055))
         return FALSE;
@ -3190,7 +3191,7 @@ emit_ps_postamble(struct svga_shader_emitter *emit)
          * logicop workaround.
          */
         if (emit->unit == PIPE_SHADER_FRAGMENT &&
-             emit->key.fkey.white_fragments) {
+             emit->key.fs.white_fragments) {
            struct src_register one = get_one_immediate(emit);

            if (!submit_op1( emit,
@ -3200,7 +3201,7 @@ emit_ps_postamble(struct svga_shader_emitter *emit)
               return FALSE;
         }
         else if (emit->unit == PIPE_SHADER_FRAGMENT &&
-                  i < emit->key.fkey.write_color0_to_n_cbufs) {
+                  i < emit->key.fs.write_color0_to_n_cbufs) {
            /* Write temp color output [0] to true output [i] */
            if (!submit_op1(emit, inst_token(SVGA3DOP_MOV),
                            emit->true_color_output[i],
@ -3244,7 +3245,7 @@ emit_vs_postamble(struct svga_shader_emitter *emit)
   /* Need to perform various manipulations on vertex position to cope
    * with the different GL and D3D clip spaces.
    */
-   if (emit->key.vkey.need_prescale) {
+   if (emit->key.vs.need_prescale) {
      SVGA3dShaderDestToken temp_pos = emit->temp_pos;
      SVGA3dShaderDestToken depth = emit->depth_pos;
      SVGA3dShaderDestToken pos = emit->true_pos;
@ -3372,7 +3373,7 @@ emit_light_twoside(struct svga_shader_emitter *emit)

   if_token = inst_token( SVGA3DOP_IFC );

-   if (emit->key.fkey.front_ccw)
+   if (emit->key.fs.front_ccw)
      if_token.control = SVGA3DOPCOMP_LT;
   else
      if_token.control = SVGA3DOPCOMP_GT;
@ -3423,7 +3424,7 @@ emit_frontface(struct svga_shader_emitter *emit)
   temp = dst_register( SVGA3DREG_TEMP,
                        emit->nr_hw_temp++ );

-   if (emit->key.fkey.front_ccw) {
+   if (emit->key.fs.front_ccw) {
      pass = get_zero_immediate(emit);
      fail = get_one_immediate(emit);
   } else {
@ -3494,8 +3495,8 @@ emit_inverted_texcoords(struct svga_shader_emitter *emit)
 static boolean
 emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
 {
-   unsigned adjust_mask = (emit->key.vkey.adjust_attrib_range |
-                           emit->key.vkey.adjust_attrib_w_1);
+   unsigned adjust_mask = (emit->key.vs.adjust_attrib_range |
+                           emit->key.vs.adjust_attrib_w_1);
 
   while (adjust_mask) {
      /* Adjust vertex attrib range and/or set W component = 1 */
@ -3506,7 +3507,7 @@ emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
      tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
      emit->nr_hw_temp++;

-      if (emit->key.vkey.adjust_attrib_range & (1 << index)) {
+      if (emit->key.vs.adjust_attrib_range & (1 << index)) {
         /* The vertex input/attribute is supposed to be a signed value in
          * the range [-1,1] but we actually fetched/converted it to the
          * range [0,1].  This most likely happens when the app specifies a
@ -3558,7 +3559,7 @@ emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
            return FALSE;
      }

-      if (emit->key.vkey.adjust_attrib_w_1 & (1 << index)) {
+      if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) {
         /* move 1 into W position of tmp */
         if (!submit_op1(emit,
                         inst_token(SVGA3DOP_MOV),
@ -3588,10 +3589,10 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
   unsigned i;

   if (emit->unit == PIPE_SHADER_FRAGMENT) {
-      if (emit->key.fkey.light_twoside)
+      if (emit->key.fs.light_twoside)
         return TRUE;

-      if (emit->key.fkey.white_fragments)
+      if (emit->key.fs.white_fragments)
         return TRUE;

      if (emit->emit_frontface)
@ -3606,16 +3607,16 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
         return TRUE;

      /* look for any PIPE_SWIZZLE_ZERO/ONE terms */
-      for (i = 0; i < emit->key.fkey.num_textures; i++) {
-         if (emit->key.fkey.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
-             emit->key.fkey.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
-             emit->key.fkey.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
-             emit->key.fkey.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
+      for (i = 0; i < emit->key.num_textures; i++) {
+         if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
+             emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
+             emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
+             emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
            return TRUE;
      }

-      for (i = 0; i < emit->key.fkey.num_textures; i++) {
-         if (emit->key.fkey.tex[i].compare_mode
+      for (i = 0; i < emit->key.num_textures; i++) {
+         if (emit->key.tex[i].compare_mode
             == PIPE_TEX_COMPARE_R_TO_TEXTURE)
            return TRUE;
      }
@ -3623,8 +3624,8 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
   else if (emit->unit == PIPE_SHADER_VERTEX) {
      if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
         return TRUE;
-      if (emit->key.vkey.adjust_attrib_range ||
-          emit->key.vkey.adjust_attrib_w_1)
+      if (emit->key.vs.adjust_attrib_range ||
+          emit->key.vs.adjust_attrib_w_1)
         return TRUE;
   }

@ -3772,7 +3773,7 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit)
      if (!emit_ps_preamble( emit ))
         return FALSE;

-      if (emit->key.fkey.light_twoside) {
+      if (emit->key.fs.light_twoside) {
         if (!emit_light_twoside( emit ))
            return FALSE;
      }
@ -3787,14 +3788,14 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit)
   }
   else {
      assert(emit->unit == PIPE_SHADER_VERTEX);
-      if (emit->key.vkey.adjust_attrib_range ||
-          emit->key.vkey.adjust_attrib_w_1) {
-         if (!emit_adjusted_vertex_attribs(emit))
+      if (emit->key.vs.adjust_attrib_range) {
+         if (!emit_adjusted_vertex_attribs(emit) ||
+             emit->key.vs.adjust_attrib_w_1) {
            return FALSE;
+         }
      }
   }

-
   return TRUE;
 }

@ -3808,10 +3809,30 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit,
                              const struct tgsi_token *tokens)
 {
   struct tgsi_parse_context parse;
+   const struct tgsi_token *new_tokens = NULL;
   boolean ret = TRUE;
   boolean helpers_emitted = FALSE;
   unsigned line_nr = 0;

+   if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) {
+      unsigned unit;
+
+      new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0);
+
+      if (new_tokens) {
+         /* Setup texture state for stipple */
+         emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
+         emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
+         emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
+         emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
+         emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
+
+         emit->pstipple_sampler_unit = unit;
+
+         tokens = new_tokens;
+      }
+   }
+
   tgsi_parse_init( &parse, tokens );
   emit->internal_imm_count = 0;

@ -3878,5 +3899,9 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit,

 done:
   tgsi_parse_free( &parse );
+   if (new_tokens) {
+      tgsi_free_tokens(new_tokens);
+   }
+
   return ret;
 }
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@ -281,7 +281,7 @@ free_emitter(struct svga_shader_emitter_v10 *emit)
   FREE(emit);
 }

-static INLINE boolean
+static inline boolean
 reserve(struct svga_shader_emitter_v10 *emit,
        unsigned nr_dwords)
 {
@ -1459,7 +1459,7 @@ absolute_src(const struct tgsi_full_src_register *reg)


 /** Return the named swizzle term from the src register */
-static INLINE unsigned
+static inline unsigned
 get_swizzle(const struct tgsi_full_src_register *reg, unsigned term)
 {
   switch (term) {