Get fragment shaders working on top of LLVM.

Redo the entry points, get the output propagation correctly, interpolate the inputs before feeding into llvm for now.
2026-05-05 05:18:08 +02:00 · 2007-11-01 14:00:25 -04:00 · 2007-11-01 14:00:25 -04:00 · 45003b0bb9
commit 45003b0bb9
parent 2af2f7e419
5 changed files with 993 additions and 762 deletions
--- a/src/mesa/pipe/llvm/gallivm.cpp
+++ b/src/mesa/pipe/llvm/gallivm.cpp
@ -68,12 +68,21 @@
 #include <fstream>
 #include <iostream>

+struct gallivm_interpolate {
+   int attrib;
+   int chan;
+   int type;
+};
+
 struct gallivm_prog {
   llvm::Module *module;
   void *function;
   int   num_consts;
   int   id;
   enum gallivm_shader_type type;
+
+   struct gallivm_interpolate interpolators[32*4]; //FIXME: this might not be enough for some shaders
+   int   num_interp;
 };

 struct gallivm_cpu_engine {
@ -141,12 +150,71 @@ static inline void AddStandardCompilePasses(PassManager &PM) {
   PM.add(createConstantMergePass());        // Merge dup global constants
 }

+static inline void
+add_interpolator(struct gallivm_prog *prog,
+                 struct gallivm_interpolate *interp)
+{
+   prog->interpolators[prog->num_interp] = *interp;
+   ++prog->num_interp;
+}
 static void
-translate_declaration(llvm::Module *module,
+translate_declaration(struct gallivm_prog *prog,
+                      llvm::Module *module,
                      Storage *storage,
                      struct tgsi_full_declaration *decl,
                      struct tgsi_full_declaration *fd)
 {
+   if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      unsigned first, last, mask;
+      uint interp_method;
+
+      assert(decl->Declaration.Declare == TGSI_DECLARE_RANGE);
+
+      first = decl->u.DeclarationRange.First;
+      last = decl->u.DeclarationRange.Last;
+      mask = decl->Declaration.UsageMask;
+
+      /* Do not touch WPOS.xy */
+      if (first == 0) {
+         mask &= ~TGSI_WRITEMASK_XY;
+         if (mask == TGSI_WRITEMASK_NONE) {
+            first++;
+            if (first > last) {
+               return;
+            }
+         }
+      }
+
+      interp_method = decl->Interpolation.Interpolate;
+
+      if (mask == TGSI_WRITEMASK_XYZW) {
+         unsigned i, j;
+
+         for (i = first; i <= last; i++) {
+            for (j = 0; j < NUM_CHANNELS; j++) {
+               //interp( mach, i, j );
+               struct gallivm_interpolate interp;
+               interp.type = interp_method;
+               interp.attrib = i;
+               interp.chan = j;
+               add_interpolator(prog, &interp);
+            }
+         }
+      } else {
+         unsigned i, j;
+         for( j = 0; j < NUM_CHANNELS; j++ ) {
+            if( mask & (1 << j) ) {
+               for( i = first; i <= last; i++ ) {
+                  struct gallivm_interpolate interp;
+                  interp.type = interp_method;
+                  interp.attrib = i;
+                  interp.chan = j;
+                  add_interpolator(prog, &interp);
+               }
+            }
+         }
+      }
+   }
 }


@ -686,7 +754,7 @@ tgsi_to_llvm(struct gallivm_prog *prog, const struct tgsi_token *tokens)

      switch (parse.FullToken.Token.Type) {
      case TGSI_TOKEN_TYPE_DECLARATION:
-         translate_declaration(mod, &storage,
+         translate_declaration(prog, mod, &storage,
                               &parse.FullToken.FullDeclaration,
                               &fd);
         break;
@ -791,24 +859,83 @@ int gallivm_prog_exec(struct gallivm_prog *prog,
 }


+
+static inline void
+constant_interpolation(float (*inputs)[16][4],
+                       const struct tgsi_interp_coef *coefs,
+                       unsigned attrib,
+                       unsigned chan)
+{
+   unsigned i;
+
+   for (i = 0; i < QUAD_SIZE; ++i) {
+      inputs[i][attrib][chan] = coefs[attrib].a0[chan];
+   }
+}
+
+static inline void
+linear_interpolation(float (*inputs)[16][4],
+                     const struct tgsi_interp_coef *coefs,
+                     unsigned attrib,
+                     unsigned chan)
+{
+   unsigned i;
+
+   for( i = 0; i < QUAD_SIZE; i++ ) {
+      const float x = inputs[i][0][0];
+      const float y = inputs[i][0][1];
+
+      inputs[i][attrib][chan] =
+         coefs[attrib].a0[chan] +
+         coefs[attrib].dadx[chan] * x +
+         coefs[attrib].dady[chan] * y;
+   }
+}
+
+static inline void
+perspective_interpolation(float (*inputs)[16][4],
+                          const struct tgsi_interp_coef *coefs,
+                          unsigned attrib,
+                          unsigned chan )
+{
+   unsigned i;
+
+   for( i = 0; i < QUAD_SIZE; i++ ) {
+      const float x = inputs[i][0][0];
+      const float y = inputs[i][0][1];
+      /* WPOS.w here is really 1/w */
+      const float w = 1.0f / inputs[i][0][3];
+      assert(inputs[i][0][3] != 0.0);
+
+      inputs[i][attrib][chan] =
+         (coefs[attrib].a0[chan] +
+          coefs[attrib].dadx[chan] * x +
+          coefs[attrib].dady[chan] * y) * w;
+   }
+}
+
 typedef int (*fragment_shader_runner)(float x, float y,
-                                     float (*dests)[32][4],
-                                     struct tgsi_interp_coef *coef,
-                                     float (*consts)[4], int num_consts,
-                                     struct tgsi_sampler *samplers,
-                                     unsigned *sampler_units);
+                                      float (*dests)[16][4],
+                                      float (*inputs)[16][4],
+                                      int num_attribs,
+                                      float (*consts)[4], int num_consts,
+                                      struct tgsi_sampler *samplers,
+                                      unsigned *sampler_units);

 int gallivm_fragment_shader_exec(struct gallivm_prog *prog,
-                                 float x, float y,
-                                 float (*dests)[32][4],
-                                 struct tgsi_interp_coef *coef,
+                                 float fx, float fy,
+                                 float (*dests)[16][4],
+                                 float (*inputs)[16][4],
                                 float (*consts)[4],
                                 struct tgsi_sampler *samplers,
                                 unsigned *sampler_units)
 {
   fragment_shader_runner runner = reinterpret_cast<fragment_shader_runner>(prog->function);
   assert(runner);
-   runner(x, y, dests, coef, consts, prog->num_consts, samplers, sampler_units);
+
+   runner(fx, fy, dests, inputs, prog->num_interp,
+          consts, prog->num_consts,
+          samplers, sampler_units);

   return 0;
 }
@ -928,8 +1055,34 @@ struct gallivm_cpu_engine * gallivm_global_cpu_engine()
   return CPU;
 }

+void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog,
+                                     float (*inputs)[16][4],
+                                     const struct tgsi_interp_coef *coef)
+{
+   for (int i = 0; i < prog->num_interp; ++i) {
+      const gallivm_interpolate &interp = prog->interpolators[i];
+      switch (interp.type) {
+      case TGSI_INTERPOLATE_CONSTANT:
+         constant_interpolation(inputs, coef, interp.attrib, interp.chan);
+         break;
+
+      case TGSI_INTERPOLATE_LINEAR:
+         linear_interpolation(inputs, coef, interp.attrib, interp.chan);
+         break;
+
+      case TGSI_INTERPOLATE_PERSPECTIVE:
+         perspective_interpolation(inputs, coef, interp.attrib, interp.chan);
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+}
+
 #endif /* MESA_LLVM */




+
--- a/src/mesa/pipe/llvm/gallivm.h
+++ b/src/mesa/pipe/llvm/gallivm.h
@ -64,11 +64,14 @@ int gallivm_prog_exec(struct gallivm_prog *prog,
                      int num_attribs);
 int gallivm_fragment_shader_exec(struct gallivm_prog *prog,
                                 float x, float y,
-                                 float (*dests)[32][4],
-                                 struct tgsi_interp_coef *coef,
+                                 float (*dests)[PIPE_MAX_SHADER_INPUTS][4],
+                                 float (*inputs)[PIPE_MAX_SHADER_INPUTS][4],
                                 float (*consts)[4],
                                 struct tgsi_sampler *samplers,
                                 unsigned *sampler_units);
+void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog,
+                                     float (*inputs)[PIPE_MAX_SHADER_INPUTS][4],
+                                     const struct tgsi_interp_coef *coefs);
 void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix);


--- a/src/mesa/pipe/llvm/llvm_base_shader.cpp
+++ b/src/mesa/pipe/llvm/llvm_base_shader.cpp
--- a/src/mesa/pipe/llvm/llvm_entry.c
+++ b/src/mesa/pipe/llvm/llvm_entry.c
@ -215,15 +215,10 @@ struct tgsi_sampler
   struct softpipe_tile_cache *cache;
 };

-struct tgsi_interp_coef
-{
-   float a0[NUM_CHANNELS];	/* in an xyzw layout */
-   float dadx[NUM_CHANNELS];
-   float dady[NUM_CHANNELS];
-};
 int run_fragment_shader(float x, float y,
-                        float (*dests)[32][4],
-                        struct tgsi_interp_coef *coef,
+                        float (*dests)[16][4],
+                        float (*ainputs)[16][4],
+                        int num_inputs,
                        float (*aconsts)[4],
                        int num_consts,
                        struct tgsi_sampler *samplers,
@ -233,40 +228,17 @@ int run_fragment_shader(float x, float y,
   float4  consts[32];
   float4  results[4][16];
   float4  temps[128];//MAX_PROGRAM_TEMPS
+   int     kilmask = 0;

-   float4  fr1, fr2, fr3, fr4;
-   fr1.x = x;
-   fr1.y = y;
-   fr2.x = x + 1.f;
-   fr2.y = y;
-   fr3.x = x;
-   fr3.y = y + 1.f;
-   fr4.x = x + 1.f;
-   fr4.y = y + 1.f;
-
-   inputs[0][0] = fr1;
-   inputs[1][0] = fr2;
-   inputs[2][0] = fr3;
-   inputs[3][0] = fr4;
-
-   for (int i = 0; i < 4; ++i) {
-      float4 vec;
-      vec.x = coef->a0[0];
-      vec.y = coef->a0[1];
-      vec.z = coef->a0[2];
-      vec.w = coef->a0[3];
-      inputs[i][1] = vec;
-   }
-   /*printf("XXX LLVM run_vertex_shader vertices = %d, inputs = %d, attribs = %d, consts = %d\n",
-     num_vertices, num_inputs, num_attribs, num_consts);*/
-   //from_array(inputs, ainputs, num_vertices, num_inputs);
+   from_array(inputs, ainputs, 4, num_inputs);
   from_consts(consts, aconsts, num_consts);
-   printf("AAAAAAAAAAAAAAAAAAAAAAA FRAGMENT SHADER %f %f\n", x, y);
+   //printf("AAAAAAAAAAAAAAAAAAAAAAA FRAGMENT SHADER %f %f\n", x, y);
   for (int i = 0; i < 4; ++i) {
      float4 *in  = inputs[i];
      float4 *res = results[i];
      execute_shader(res, in, consts, temps);
      to_array(dests[i], res, 2);
   }
+   return ~kilmask;
 }

--- a/src/mesa/pipe/softpipe/sp_quad_fs.c
+++ b/src/mesa/pipe/softpipe/sp_quad_fs.c
@ -94,11 +94,6 @@ shade_quad(

   machine->SamplerUnits = softpipe->sampler_units;
   machine->InterpCoefs = quad->coef;
-   printf("COEF = [%f %f %f %f], [%f %f %f %f], [%f %f %f %f] %p\n",
-          quad->coef->a0[0], quad->coef->a0[1], quad->coef->a0[2], quad->coef->a0[3],
-          quad->coef->dadx[0], quad->coef->dadx[1], quad->coef->dadx[2], quad->coef->dadx[3],
-          quad->coef->dady[0], quad->coef->dady[1], quad->coef->dady[2], quad->coef->dady[3],
-          quad->coef);

   machine->Inputs[0].xyzw[0].f[0] = fx;
   machine->Inputs[0].xyzw[0].f[1] = fx + 1.0f;
@ -170,26 +165,59 @@ shade_quad_llvm(struct quad_stage *qs,
 {
   struct quad_shade_stage *qss = quad_shade_stage(qs);
   struct softpipe_context *softpipe = qs->softpipe;
-   float dests[4][32][4];
+   float dests[4][16][4];
   const float fx = (float) quad->x0;
   const float fy = (float) quad->y0;
   struct gallivm_prog *llvm = qss->llvm_prog;
+   float inputs[4][16][4];
+   memset(inputs, 0, sizeof(inputs));

+   inputs[0][0][0] = fx;
+   inputs[1][0][0] = fx + 1.0f;
+   inputs[2][0][0] = fx;
+   inputs[3][0][0] = fx + 1.0f;

-   quad->mask = gallivm_fragment_shader_exec(
-      llvm, fx, fy, dests, quad->coef,
-      softpipe->mapped_constants[PIPE_SHADER_FRAGMENT],
-      qss->samplers, softpipe->sampler_units);
+   inputs[0][0][1] = fy;
+   inputs[1][0][1] = fy;
+   inputs[2][0][1] = fy + 1.0f;
+   inputs[3][0][1] = fy + 1.0f;
+   printf("MASK = %d\n", quad->mask);
+   gallivm_prog_inputs_interpolate(llvm, inputs, quad->coef);
+   for (int i = 0; i < 4; ++i) {
+      for (int j = 0; j < 2; ++j) {
+         printf("IN(%d,%d) [%f %f %f %f]\n", i, j, 
+                inputs[i][j][0], inputs[i][j][1], inputs[i][j][2], inputs[i][j][3]);
+      }
+   }
+
+   /*quad->mask &=*/
+      gallivm_fragment_shader_exec(llvm, fx, fy, dests, inputs,
+                                   softpipe->mapped_constants[PIPE_SHADER_FRAGMENT],
+                                   qss->samplers, softpipe->sampler_units);
+
+   printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n",
+          dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3], 
+          dests[0][1][0], dests[0][1][1], dests[0][1][2], dests[0][1][3]);

   /* store result color */
   if (qss->colorOutSlot >= 0) {
+      unsigned i;
      /* XXX need to handle multiple color outputs someday */
      assert(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot]
             == TGSI_SEMANTIC_COLOR);
-      memcpy(
-             quad->outputs.color,
-             &dests[0][qss->colorOutSlot],
-             sizeof( quad->outputs.color ) );
+      for (i = 0; i < QUAD_SIZE; ++i) {
+         quad->outputs.color[0][i] = dests[i][qss->colorOutSlot][0];
+         quad->outputs.color[1][i] = dests[i][qss->colorOutSlot][1];
+         quad->outputs.color[2][i] = dests[i][qss->colorOutSlot][2];
+         quad->outputs.color[3][i] = dests[i][qss->colorOutSlot][3];
+      }
+   }
+   for (int i = 0; i < QUAD_SIZE; ++i) {
+      printf("Q%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot,
+             quad->outputs.color[0][i],
+             quad->outputs.color[1][i],
+             quad->outputs.color[2][i],
+             quad->outputs.color[3][i]);
   }

   /* store result Z */
@ -197,16 +225,21 @@ shade_quad_llvm(struct quad_stage *qs,
      /* output[slot] is new Z */
      uint i;
      for (i = 0; i < 4; i++) {
-         quad->outputs.depth[i] = dests[0][2][i];
+         quad->outputs.depth[i] = dests[i][0][2];
      }
   }
   else {
      /* copy input Z (which was interpolated by the executor) to output Z */
      uint i;
      for (i = 0; i < 4; i++) {
-         quad->outputs.depth[i] = dests[0][2][i];
+         quad->outputs.depth[i] = inputs[i][0][2];
      }
   }
+   printf("D [%f, %f, %f, %f] mask = %d\n",
+             quad->outputs.depth[0],
+             quad->outputs.depth[1],
+             quad->outputs.depth[2],
+             quad->outputs.depth[3], quad->mask);

   /* shader may cull fragments */
   if( quad->mask ) {