Redo the way we pass arguments to the llvm.

simply pass aligned arrays, they should cast to vectors without any problems. also remove unnecessary memset
2026-05-07 00:38:48 +02:00 · 2007-11-07 13:26:45 -05:00 · 2007-11-07 13:26:45 -05:00 · 6dc4e6ae15
commit 6dc4e6ae15
parent 9d6e6e86d9
5 changed files with 438 additions and 876 deletions
--- a/src/mesa/pipe/draw/draw_vertex_shader_llvm.c
+++ b/src/mesa/pipe/draw/draw_vertex_shader_llvm.c
@ -115,13 +115,12 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw)
   unsigned i;

   struct vertex_header *dests[VS_QUEUE_LENGTH];
-   float                 inputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS][4];
-   float                 outputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS][4];
+   float                 inputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS][4] ALIGN16_ATTRIB;
+   float                 outputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS][4] ALIGN16_ATTRIB;
   float (*consts)[4]          = (float (*)[4]) draw->user.constants;
   struct gallivm_prog  *prog  = draw->vertex_shader->llvm_prog;
   const float          *scale = draw->viewport.scale;
   const float          *trans = draw->viewport.translate;
-
   /* fetch the inputs */
   for (i = 0; i < draw->vs.queue_nr; ++i) {
      unsigned elt = draw->vs.queue[i].elt;
@ -135,6 +134,7 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw)
                     draw->vertex_shader->state->num_inputs,
                     draw->vertex_info.num_attribs - 2);

+
   /* store machine results */
   for (int i = 0; i < draw->vs.queue_nr; ++i) {
      unsigned slot;
@ -158,7 +158,6 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw)

      vOut->clipmask = compute_clipmask(vOut->clip, draw->plane, draw->nr_planes);
      vOut->edgeflag = 1;
-
      /* divide by w */
      w = 1.0f / w;
      x *= w;
--- a/src/mesa/pipe/llvm/llvm_base_shader.cpp
+++ b/src/mesa/pipe/llvm/llvm_base_shader.cpp
--- a/src/mesa/pipe/llvm/llvm_entry.c
+++ b/src/mesa/pipe/llvm/llvm_entry.c
@ -68,6 +68,7 @@ compute_clipmask(float4 clip, float4 (*plane), unsigned nr)
   return mask;
 }

+
 inline void collect_results(float4 *results, struct vertex_header *vOut,
                            float4 *planes, int nr_planes,
                            float4 scale, float4 trans,
@ -76,7 +77,6 @@ inline void collect_results(float4 *results, struct vertex_header *vOut,
   /* store results */
   unsigned slot;
   float x, y, z, w;
-
   /* Handle attr[0] (position) specially:
    */
   float4 res0 = results[0];
@ -85,7 +85,6 @@ inline void collect_results(float4 *results, struct vertex_header *vOut,
   y = clip[1] = res0.y;
   z = clip[2] = res0.z;
   w = clip[3] = res0.w;
-
   vOut->clipmask = compute_clipmask(res0, planes, nr_planes);
   vOut->edgeflag = 1;

@ -176,23 +175,20 @@ struct ShaderInput

 extern void execute_shader(struct ShaderInput *input);

-void run_vertex_shader(float (*ainputs)[16][4],
-                       float (*dests)[16][4],
+void run_vertex_shader(float4 (*inputs)[16],
+                       float4 (*results)[16],
                       float (*aconsts)[4],
                       int num_vertices,
                       int num_inputs,
                       int num_attribs,
                       int num_consts)
 {
-   float4  inputs[16*32*4][16];
   float4  consts[32];
-   float4  results[16*32*4][16];
   float4  temps[128];//MAX_PROGRAM_TEMPS

   struct ShaderInput args;
   /*printf("XXX LLVM run_vertex_shader vertices = %d, inputs = %d, attribs = %d, consts = %d\n",
     num_vertices, num_inputs, num_attribs, num_consts);*/
-   from_array(inputs, ainputs, num_vertices, num_inputs);
   from_consts(consts, aconsts, num_consts);
   args.consts = consts;
   args.temps = temps;
@ -200,7 +196,6 @@ void run_vertex_shader(float (*ainputs)[16][4],
      args.dests  = results[i];
      args.inputs = inputs[i];
      execute_shader(&args);
-      to_array(dests[i], args.dests, num_attribs);
   }
 }

@ -227,22 +222,19 @@ struct tgsi_sampler


 int run_fragment_shader(float x, float y,
-                        float (*dests)[16][4],
-                        float (*ainputs)[16][4],
+                        float4 (*results)[16],
+                        float4 (*inputs)[16],
                        int num_inputs,
                        float (*aconsts)[4],
                        int num_consts,
                        struct tgsi_sampler *samplers)
 {
-   float4  inputs[4][16];
   float4  consts[32];
-   float4  results[4][16];
   float4  temps[128];//MAX_PROGRAM_TEMPS
   struct ShaderInput args;
   int mask = 0;
   args.kilmask = 0;

-   from_array(inputs, ainputs, 4, num_inputs);
   from_consts(consts, aconsts, num_consts);
   args.consts = consts;
   args.temps = temps;
@ -254,8 +246,6 @@ int run_fragment_shader(float x, float y,
      args.kilmask = 0;
      execute_shader(&args);
      args.kilmask = mask | (args.kilmask << i);
-
-      to_array(dests[i], args.dests, 2);
   }
   return ~args.kilmask;
 }
--- a/src/mesa/pipe/p_compiler.h
+++ b/src/mesa/pipe/p_compiler.h
@ -73,9 +73,11 @@ typedef unsigned long long uint64;
 #if defined __GNUC__
 #define ALIGN16_DECL(TYPE, NAME, SIZE)  TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) ))
 #define ALIGN16_ASSIGN(NAME) NAME##___aligned
+#define ALIGN16_ATTRIB  __attribute__(( aligned( 16 ) ))
 #else
 #define ALIGN16_DECL(TYPE, NAME, SIZE)  TYPE NAME##___unaligned[SIZE + 1]
 #define ALIGN16_ASSIGN(NAME) align16(NAME##___unaligned)
+#define ALIGN16_ATTRIB
 #endif


--- a/src/mesa/pipe/softpipe/sp_quad_fs.c
+++ b/src/mesa/pipe/softpipe/sp_quad_fs.c
@ -173,12 +173,11 @@ shade_quad_llvm(struct quad_stage *qs,
 {
   struct quad_shade_stage *qss = quad_shade_stage(qs);
   struct softpipe_context *softpipe = qs->softpipe;
-   float dests[4][16][4];
+   float dests[4][16][4] ALIGN16_ATTRIB;
+   float inputs[4][16][4] ALIGN16_ATTRIB;
   const float fx = (float) quad->x0;
   const float fy = (float) quad->y0;
   struct gallivm_prog *llvm = qss->llvm_prog;
-   float inputs[4][16][4];
-   memset(inputs, 0, sizeof(inputs));

   inputs[0][0][0] = fx;
   inputs[1][0][0] = fx + 1.0f;