gallium: begin reworking quad stages for multiple color outputs

2026-03-11 04:50:35 +01:00 · 2008-04-07 21:59:12 -06:00 · 2008-04-07 21:59:12 -06:00 · c7daa68ca3
commit c7daa68ca3
parent 4e2127b0e5
11 changed files with 737 additions and 674 deletions
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@ -138,8 +138,6 @@ struct softpipe_context {
   struct draw_stage *vbuf;
   struct softpipe_vbuf_render *vbuf_render;

-   uint current_cbuf;      /**< current color buffer being written to */
-
   struct softpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS];
   struct softpipe_tile_cache *zsbuf_cache;

--- a/src/gallium/drivers/softpipe/sp_fs_llvm.c
+++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c
@ -99,19 +99,19 @@ shade_quad_llvm(struct quad_stage *qs,
      allvmrt(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot]
             == TGSI_SEMANTIC_COLOR);
      for (i = 0; i < QUAD_SIZE; ++i) {
-         quad->outputs.color[0][i] = dests[i][qss->colorOutSlot][0];
-         quad->outputs.color[1][i] = dests[i][qss->colorOutSlot][1];
-         quad->outputs.color[2][i] = dests[i][qss->colorOutSlot][2];
-         quad->outputs.color[3][i] = dests[i][qss->colorOutSlot][3];
+         quad->outputs.color[0][0][i] = dests[i][qss->colorOutSlot][0];
+         quad->outputs.color[0][1][i] = dests[i][qss->colorOutSlot][1];
+         quad->outputs.color[0][2][i] = dests[i][qss->colorOutSlot][2];
+         quad->outputs.color[0][3][i] = dests[i][qss->colorOutSlot][3];
      }
   }
 #if DLLVM
   for (int i = 0; i < QUAD_SIZE; ++i) {
      debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot,
-             quad->outputs.color[0][i],
-             quad->outputs.color[1][i],
-             quad->outputs.color[2][i],
-             quad->outputs.color[3][i]);
+             quad->outputs.color[0][0][i],
+             quad->outputs.color[0][1][i],
+             quad->outputs.color[0][2][i],
+             quad->outputs.color[0][3][i]);
   }
 #endif

--- a/src/gallium/drivers/softpipe/sp_headers.h
+++ b/src/gallium/drivers/softpipe/sp_headers.h
@ -31,6 +31,7 @@
 #ifndef SP_HEADERS_H
 #define SP_HEADERS_H

+#include "pipe/p_state.h"
 #include "tgsi/exec/tgsi_exec.h"

 #define PRIM_POINT 1
@ -66,7 +67,8 @@ struct quad_header {
   unsigned prim:2;     /**< PRIM_POINT, LINE, TRI */

   struct {
-      float color[NUM_CHANNELS][QUAD_SIZE];	/* rrrr, gggg, bbbb, aaaa */
+      /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */
+      float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE];
      float depth[QUAD_SIZE];
   } outputs;

--- a/src/gallium/drivers/softpipe/sp_quad.c
+++ b/src/gallium/drivers/softpipe/sp_quad.c
@ -76,15 +76,6 @@ sp_build_quad_pipeline(struct softpipe_context *sp)
      sp_push_quad_first( sp, sp->quad.blend );
   }

-   if (sp->framebuffer.num_cbufs == 1) {
-      /* the usual case: write to exactly one colorbuf */
-      sp->current_cbuf = 0;
-   }
-   else {
-      /* insert bufloop stage */
-      sp_push_quad_first( sp, sp->quad.bufloop );
-   }
-
   if (sp->depth_stencil->depth.occlusion_count) {
      sp_push_quad_first( sp, sp->quad.occlusion );
   }
--- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c
+++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c
@ -16,7 +16,8 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad)
   struct softpipe_context *softpipe = qs->softpipe;
   const float ref = softpipe->depth_stencil->alpha.ref;
   unsigned passMask = 0x0, j;
-   const float *aaaa = quad->outputs.color[3];
+   const uint cbuf = 0; /* only output[0].alpha is tested */
+   const float *aaaa = quad->outputs.color[cbuf][3];

   switch (softpipe->depth_stencil->alpha.func) {
   case PIPE_FUNC_NEVER:
@ -25,7 +26,7 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad)
   case PIPE_FUNC_LESS:
      /*
       * If mask were an array [4] we could do this SIMD-style:
-       * passMask = (quad->outputs.color[3] <= vec4(ref));
+       * passMask = (quad->outputs.color[0][3] <= vec4(ref));
       */
      for (j = 0; j < QUAD_SIZE; j++) {
         if (aaaa[j] < ref) {
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
--- a/src/gallium/drivers/softpipe/sp_quad_bufloop.c
+++ b/src/gallium/drivers/softpipe/sp_quad_bufloop.c
@ -13,7 +13,7 @@ static void
 cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad)
 {
   struct softpipe_context *softpipe = qs->softpipe;
-   float tmp[4][QUAD_SIZE];
+   float tmp[PIPE_MAX_COLOR_BUFS][4][QUAD_SIZE];
   unsigned i;

   assert(sizeof(quad->outputs.color) == sizeof(tmp));
@ -30,7 +30,9 @@ cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad)

   for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) {
      /* set current cbuffer */
+#if 0 /* obsolete & going away */
      softpipe->current_cbuf = i;
+#endif

      /* pass blended quad to next stage */
      qs->next->run(qs->next, quad);
--- a/src/gallium/drivers/softpipe/sp_quad_colormask.c
+++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c
@ -47,39 +47,44 @@ static void
 colormask_quad(struct quad_stage *qs, struct quad_header *quad)
 {
   struct softpipe_context *softpipe = qs->softpipe;
-   float dest[4][QUAD_SIZE];
-   struct softpipe_cached_tile *tile
-      = sp_get_cached_tile(softpipe,
-                           softpipe->cbuf_cache[softpipe->current_cbuf],
-                           quad->x0, quad->y0);
-   float (*quadColor)[4] = quad->outputs.color;
-   uint i, j;
+   uint cbuf;

-   /* get/swizzle dest colors */
-   for (j = 0; j < QUAD_SIZE; j++) {
-      int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1);
-      int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1);
-      for (i = 0; i < 4; i++) {
-         dest[i][j] = tile->data.color[y][x][i];
+   /* loop over colorbuffer outputs */
+   for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+      float dest[4][QUAD_SIZE];
+      struct softpipe_cached_tile *tile
+         = sp_get_cached_tile(softpipe,
+                              softpipe->cbuf_cache[cbuf],
+                              quad->x0, quad->y0);
+      float (*quadColor)[4] = quad->outputs.color[cbuf];
+      uint i, j;
+
+      /* get/swizzle dest colors */
+      for (j = 0; j < QUAD_SIZE; j++) {
+         int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1);
+         int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1);
+         for (i = 0; i < 4; i++) {
+            dest[i][j] = tile->data.color[y][x][i];
+         }
      }
+
+      /* R */
+      if (!(softpipe->blend->colormask & PIPE_MASK_R))
+          COPY_4V(quadColor[0], dest[0]);
+
+      /* G */
+      if (!(softpipe->blend->colormask & PIPE_MASK_G))
+          COPY_4V(quadColor[1], dest[1]);
+
+      /* B */
+      if (!(softpipe->blend->colormask & PIPE_MASK_B))
+          COPY_4V(quadColor[2], dest[2]);
+
+      /* A */
+      if (!(softpipe->blend->colormask & PIPE_MASK_A))
+          COPY_4V(quadColor[3], dest[3]);
   }

-   /* R */
-   if (!(softpipe->blend->colormask & PIPE_MASK_R))
-       COPY_4V(quadColor[0], dest[0]);
-
-   /* G */
-   if (!(softpipe->blend->colormask & PIPE_MASK_G))
-       COPY_4V(quadColor[1], dest[1]);
-
-   /* B */
-   if (!(softpipe->blend->colormask & PIPE_MASK_B))
-       COPY_4V(quadColor[2], dest[2]);
-
-   /* A */
-   if (!(softpipe->blend->colormask & PIPE_MASK_A))
-       COPY_4V(quadColor[3], dest[3]);
-
   /* pass quad to next stage */
   qs->next->run(qs->next, quad);
 }
--- a/src/gallium/drivers/softpipe/sp_quad_coverage.c
+++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c
@ -50,12 +50,17 @@ coverage_quad(struct quad_stage *qs, struct quad_header *quad)
   if ((softpipe->rasterizer->poly_smooth && quad->prim == PRIM_TRI) ||
       (softpipe->rasterizer->line_smooth && quad->prim == PRIM_LINE) ||
       (softpipe->rasterizer->point_smooth && quad->prim == PRIM_POINT)) {
-      float (*quadColor)[4] = quad->outputs.color;
-      unsigned j;
-      for (j = 0; j < QUAD_SIZE; j++) {
-         assert(quad->coverage[j] >= 0.0);
-         assert(quad->coverage[j] <= 1.0);
+      uint cbuf;
+
+      /* loop over colorbuffer outputs */
+      for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+         float (*quadColor)[4] = quad->outputs.color[cbuf];
+         unsigned j;
+         for (j = 0; j < QUAD_SIZE; j++) {
+            assert(quad->coverage[j] >= 0.0);
+            assert(quad->coverage[j] <= 1.0);
         quadColor[3][j] *= quad->coverage[j];
+         }
      }
   }

--- a/src/gallium/drivers/softpipe/sp_quad_fs.c
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@ -88,21 +88,63 @@ shade_quad(
 				    &qss->machine,
 				    quad );

-   /* store result color */
-   if (qss->colorOutSlot >= 0) {
-      /* XXX need to handle multiple color outputs someday */
-      assert(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot]
-             == TGSI_SEMANTIC_COLOR);
-      memcpy(
-             quad->outputs.color,
-             &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0],
-             sizeof( quad->outputs.color ) );
+#if 0 /* XXX multi color outputs - untested */
+   /* store outputs */
+   boolean z_written = FALSE;
+   {
+      const ubyte *sem_name = softpipe->fs->info.output_semantic_name;
+      const ubyte *sem_index = softpipe->fs->info.output_semantic_index;
+      const uint n = qss->stage.softpipe->fs->info.num_outputs;
+      uint i;
+      for (i = 0; i < n; i++) {
+         switch (sem_name[i]) {
+         case TGSI_SEMANTIC_COLOR:
+            {
+               uint cbuf = sem_index[i];
+               memcpy(quad->outputs.color[cbuf],
+                      &machine->Outputs[i].xyzw[0].f[0],
+                      sizeof(quad->outputs.color[0]) );
+            }
+            break;
+         case TGSI_SEMANTIC_POSITION:
+            {
+               uint j;
+               for (j = 0; j < 4; j++) {
+                  quad->outputs.depth[j] = machine->Outputs[0].xyzw[2].f[j];
+               }
+               z_written = TRUE;
+            }
+            break;
+         }
+      }
   }

-   /*
-    * XXX the following code for updating quad->outputs.depth
-    * isn't really needed if we did early z testing.
-    */
+   if (!z_written) {
+      /* compute Z values now, as in the quad earlyz stage */
+      /* XXX we should really only do this if the earlyz stage is not used */
+      const float fx = (float) quad->x0;
+      const float fy = (float) quad->y0;
+      const float dzdx = quad->posCoef->dadx[2];
+      const float dzdy = quad->posCoef->dady[2];
+      const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
+
+      quad->outputs.depth[0] = z0;
+      quad->outputs.depth[1] = z0 + dzdx;
+      quad->outputs.depth[2] = z0 + dzdy;
+      quad->outputs.depth[3] = z0 + dzdx + dzdy;
+   }
+#endif
+
+   /* store result color(s) */
+   if (qss->colorOutSlot >= 0) {
+      /* XXX need to handle multiple color outputs someday */
+      assert(softpipe->fs->info.output_semantic_name[qss->colorOutSlot]
+             == TGSI_SEMANTIC_COLOR);
+      memcpy(
+             quad->outputs.color[0],
+             &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0],
+             sizeof( quad->outputs.color[0] ) );
+   }

   /* store result Z */
   if (qss->depthOutSlot >= 0) {
--- a/src/gallium/drivers/softpipe/sp_quad_output.c
+++ b/src/gallium/drivers/softpipe/sp_quad_output.c
@ -34,31 +34,36 @@


 /**
- * Write quad to framebuffer, taking mask into account.
- *
- * Note that surfaces support only full quad reads and writes.
+ * Last step of quad processing: write quad colors to the framebuffer,
+ * taking mask into account.
 */
 static void
 output_quad(struct quad_stage *qs, struct quad_header *quad)
 {
-   struct softpipe_context *softpipe = qs->softpipe;
-   struct softpipe_cached_tile *tile
-      = sp_get_cached_tile(softpipe,
-                           softpipe->cbuf_cache[softpipe->current_cbuf],
-                           quad->x0, quad->y0);
   /* in-tile pos: */
   const int itx = quad->x0 % TILE_SIZE;
   const int ity = quad->y0 % TILE_SIZE;
-   float (*quadColor)[4] = quad->outputs.color;
-   int i, j;

-   /* get/swizzle dest colors */
-   for (j = 0; j < QUAD_SIZE; j++) {
-      if (quad->mask & (1 << j)) {
-         int x = itx + (j & 1);
-         int y = ity + (j >> 1);
-         for (i = 0; i < 4; i++) { /* loop over color chans */
-            tile->data.color[y][x][i] = quadColor[i][j];
+   struct softpipe_context *softpipe = qs->softpipe;
+   uint cbuf;
+
+   /* loop over colorbuffer outputs */
+   for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) {
+      struct softpipe_cached_tile *tile
+         = sp_get_cached_tile(softpipe,
+                              softpipe->cbuf_cache[cbuf],
+                              quad->x0, quad->y0);
+      float (*quadColor)[4] = quad->outputs.color[cbuf];
+      int i, j;
+
+      /* get/swizzle dest colors */
+      for (j = 0; j < QUAD_SIZE; j++) {
+         if (quad->mask & (1 << j)) {
+            int x = itx + (j & 1);
+            int y = ity + (j >> 1);
+            for (i = 0; i < 4; i++) { /* loop over color chans */
+               tile->data.color[y][x][i] = quadColor[i][j];
+            }
         }
      }
   }