From e377037bef521a985dc801371f195ada327ec304 Mon Sep 17 00:00:00 2001
From: Jan Vesely <jan.vesely@rutgers.edu>
Date: Fri, 5 Feb 2016 17:54:57 -0500
Subject: [PATCH 01/94] r600, compute: Do not overwrite pipe_resource.screen
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

found by inspection.

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/r600/evergreen_compute.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 2cf08897a8d..d92e691fdb8 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -961,8 +961,8 @@ struct pipe_resource *r600_compute_global_buffer_create(
 			templ->array_size);
 
 	result->base.b.vtbl = &r600_global_buffer_vtbl;
-	result->base.b.b.screen = screen;
 	result->base.b.b = *templ;
+	result->base.b.b.screen = screen;
 	pipe_reference_init(&result->base.b.b.reference, 1);
 
 	size_in_dw = (templ->width0+3) / 4;

From c1bbaff1e83f901d67d78f9e1ddfe8291dd09bfa Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Wed, 3 Feb 2016 09:39:10 +1100
Subject: [PATCH 02/94] glsl: replace unreachable code with an assert()

All interface blocks will have been lowered by this point so just
use an assert. Returning false would have caused all sorts of
problems if they were not lowered yet and there is an assert to
catch this later anyway.

We also update the tests to reflect this change.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/compiler/glsl/link_varyings.cpp       | 25 +++-----
 src/compiler/glsl/tests/varyings_test.cpp | 78 +++++++++--------------
 2 files changed, 38 insertions(+), 65 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp
index a4c730ffdcf..535c83cd0e7 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -1352,7 +1352,7 @@ private:
 
 namespace linker {
 
-bool
+void
 populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
                              hash_table *consumer_inputs,
                              hash_table *consumer_interface_inputs,
@@ -1366,8 +1366,8 @@ populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
       ir_variable *const input_var = node->as_variable();
 
       if ((input_var != NULL) && (input_var->data.mode == ir_var_shader_in)) {
-         if (input_var->type->is_interface())
-            return false;
+         /* All interface blocks should have been lowered by this point */
+         assert(!input_var->type->is_interface());
 
          if (input_var->data.explicit_location) {
             /* assign_varying_locations only cares about finding the
@@ -1401,8 +1401,6 @@ populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
          }
       }
    }
-
-   return true;
 }
 
 /**
@@ -1626,18 +1624,11 @@ assign_varying_locations(struct gl_context *ctx,
    if (producer)
       canonicalize_shader_io(producer->ir, ir_var_shader_out);
 
-   if (consumer
-       && !linker::populate_consumer_input_sets(mem_ctx,
-                                                consumer->ir,
-                                                consumer_inputs,
-                                                consumer_interface_inputs,
-                                                consumer_inputs_with_locations)) {
-      assert(!"populate_consumer_input_sets failed");
-      hash_table_dtor(tfeedback_candidates);
-      hash_table_dtor(consumer_inputs);
-      hash_table_dtor(consumer_interface_inputs);
-      return false;
-   }
+   if (consumer)
+      linker::populate_consumer_input_sets(mem_ctx, consumer->ir,
+                                           consumer_inputs,
+                                           consumer_interface_inputs,
+                                           consumer_inputs_with_locations);
 
    if (producer) {
       foreach_in_list(ir_instruction, node, producer->ir) {
diff --git a/src/compiler/glsl/tests/varyings_test.cpp b/src/compiler/glsl/tests/varyings_test.cpp
index 0c4e0a471b8..9be5e8344b4 100644
--- a/src/compiler/glsl/tests/varyings_test.cpp
+++ b/src/compiler/glsl/tests/varyings_test.cpp
@@ -156,11 +156,11 @@ TEST_F(link_varyings, single_simple_input)
 
    ir.push_tail(v);
 
-   ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx,
-                                                    &ir,
-                                                    consumer_inputs,
-                                                    consumer_interface_inputs,
-                                                    junk));
+   linker::populate_consumer_input_sets(mem_ctx,
+                                        &ir,
+                                        consumer_inputs,
+                                        consumer_interface_inputs,
+                                        junk);
 
    EXPECT_EQ((void *) v, hash_table_find(consumer_inputs, "a"));
    EXPECT_EQ(1u, num_elements(consumer_inputs));
@@ -183,11 +183,11 @@ TEST_F(link_varyings, gl_ClipDistance)
 
    ir.push_tail(clipdistance);
 
-   ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx,
-                                                    &ir,
-                                                    consumer_inputs,
-                                                    consumer_interface_inputs,
-                                                    junk));
+   linker::populate_consumer_input_sets(mem_ctx,
+                                        &ir,
+                                        consumer_inputs,
+                                        consumer_interface_inputs,
+                                        junk);
 
    EXPECT_EQ(clipdistance, junk[VARYING_SLOT_CLIP_DIST0]);
    EXPECT_TRUE(is_empty(consumer_inputs));
@@ -205,11 +205,11 @@ TEST_F(link_varyings, single_interface_input)
 
    ir.push_tail(v);
 
-   ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx,
-                                                    &ir,
-                                                    consumer_inputs,
-                                                    consumer_interface_inputs,
-                                                    junk));
+   linker::populate_consumer_input_sets(mem_ctx,
+                                        &ir,
+                                        consumer_inputs,
+                                        consumer_interface_inputs,
+                                        junk);
    char *const full_name = interface_field_name(simple_interface);
 
    EXPECT_EQ((void *) v, hash_table_find(consumer_interface_inputs, full_name));
@@ -236,11 +236,11 @@ TEST_F(link_varyings, one_interface_and_one_simple_input)
 
    ir.push_tail(iface);
 
-   ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx,
-                                                    &ir,
-                                                    consumer_inputs,
-                                                    consumer_interface_inputs,
-                                                    junk));
+   linker::populate_consumer_input_sets(mem_ctx,
+                                        &ir,
+                                        consumer_inputs,
+                                        consumer_interface_inputs,
+                                        junk);
 
    char *const iface_field_name = interface_field_name(simple_interface);
 
@@ -252,24 +252,6 @@ TEST_F(link_varyings, one_interface_and_one_simple_input)
    EXPECT_EQ(1u, num_elements(consumer_inputs));
 }
 
-TEST_F(link_varyings, invalid_interface_input)
-{
-   ir_variable *const v =
-      new(mem_ctx) ir_variable(simple_interface,
-                               "named_interface",
-                               ir_var_shader_in);
-
-   ASSERT_EQ(simple_interface, v->get_interface_type());
-
-   ir.push_tail(v);
-
-   EXPECT_FALSE(linker::populate_consumer_input_sets(mem_ctx,
-                                                    &ir,
-                                                    consumer_inputs,
-                                                     consumer_interface_inputs,
-                                                     junk));
-}
-
 TEST_F(link_varyings, interface_field_doesnt_match_noninterface)
 {
    char *const iface_field_name = interface_field_name(simple_interface);
@@ -283,11 +265,11 @@ TEST_F(link_varyings, interface_field_doesnt_match_noninterface)
 
    ir.push_tail(in_v);
 
-   ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx,
-                                                    &ir,
-                                                    consumer_inputs,
-                                                    consumer_interface_inputs,
-                                                    junk));
+   linker::populate_consumer_input_sets(mem_ctx,
+                                        &ir,
+                                        consumer_inputs,
+                                        consumer_interface_inputs,
+                                        junk);
 
    /* Create an output variable, "v", that is part of an interface block named
     * "a".  They should not match.
@@ -325,11 +307,11 @@ TEST_F(link_varyings, interface_field_doesnt_match_noninterface_vice_versa)
 
    ir.push_tail(in_v);
 
-   ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx,
-                                                    &ir,
-                                                    consumer_inputs,
-                                                    consumer_interface_inputs,
-                                                    junk));
+   linker::populate_consumer_input_sets(mem_ctx,
+                                        &ir,
+                                        consumer_inputs,
+                                        consumer_interface_inputs,
+                                        junk);
 
    /* Create an output variable "a.v".  They should not match.
     */

From ea7f64f74d0910f72730849d3081ae8a62cc28d4 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Wed, 3 Feb 2016 09:46:56 +1100
Subject: [PATCH 03/94] glsl: don't generate transform feedback candidate when
 not required

If we are not even looking for one don't bother generating a candidate
list.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/compiler/glsl/link_varyings.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp
index 535c83cd0e7..590de174507 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -1643,8 +1643,10 @@ assign_varying_locations(struct gl_context *ctx,
                 (output_var->data.stream < MAX_VERTEX_STREAMS &&
                  producer->Stage == MESA_SHADER_GEOMETRY));
 
-         tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates);
-         g.process(output_var);
+         if (num_tfeedback_decls > 0) {
+            tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates);
+            g.process(output_var);
+         }
 
          ir_variable *const input_var =
             linker::get_matching_input(mem_ctx, output_var, consumer_inputs,

From 04c2ca5038fbfd6848cdc4f44d88b55a6047d579 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Sat, 6 Feb 2016 20:24:41 +0100
Subject: [PATCH 04/94] tgsi: use TGSI_WRITEMASK_XYZW instead of hardcoding the
 mask

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Serge Martin <edb+mesa@sigluy.net>
---
 src/gallium/auxiliary/tgsi/tgsi_transform.h | 2 +-
 src/gallium/auxiliary/tgsi/tgsi_ureg.c      | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h
index 3bd512b6f3e..27e6179c9ee 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h
@@ -192,7 +192,7 @@ tgsi_transform_sampler_view_decl(struct tgsi_transform_context *ctx,
 
    decl = tgsi_default_full_declaration();
    decl.Declaration.File = TGSI_FILE_SAMPLER_VIEW;
-   decl.Declaration.UsageMask = 0xf;
+   decl.Declaration.UsageMask = TGSI_WRITEMASK_XYZW;
    decl.Range.First =
    decl.Range.Last = index;
    decl.SamplerView.Resource = target;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index d6811501d16..9654ac52bf2 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -1593,7 +1593,7 @@ emit_decl_sampler_view(struct ureg_program *ureg,
    out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
    out[0].decl.NrTokens = 3;
    out[0].decl.File = TGSI_FILE_SAMPLER_VIEW;
-   out[0].decl.UsageMask = 0xf;
+   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
 
    out[1].value = 0;
    out[1].decl_range.First = index;
@@ -1621,7 +1621,7 @@ emit_decl_image(struct ureg_program *ureg,
    out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
    out[0].decl.NrTokens = 3;
    out[0].decl.File = TGSI_FILE_IMAGE;
-   out[0].decl.UsageMask = 0xf;
+   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
 
    out[1].value = 0;
    out[1].decl_range.First = index;
@@ -1645,7 +1645,7 @@ emit_decl_buffer(struct ureg_program *ureg,
    out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
    out[0].decl.NrTokens = 2;
    out[0].decl.File = TGSI_FILE_BUFFER;
-   out[0].decl.UsageMask = 0xf;
+   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
    out[0].decl.Atomic = atomic;
 
    out[1].value = 0;

From ac57577e29643a59a33a7c2b01def2e297db3448 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 6 Feb 2016 17:07:59 -0500
Subject: [PATCH 05/94] glsl: make sure builtins are initialized before getting
 the shader

The builtin function shader is part of the builtin state, released
when glReleaseShaderCompiler is called. We must ensure that the
builtins have been (re)initialized before attempting to link with the
builtin shader.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
Tested-by: Rob Herring <robh@kernel.org>
Cc: mesa-stable@lists.freedesktop.org
---
 src/compiler/glsl/linker.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 4776ffa6acd..f1ac53abb0a 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -2125,6 +2125,7 @@ link_intrastage_shaders(void *mem_ctx,
 
       if (ok) {
          memcpy(linking_shaders, shader_list, num_shaders * sizeof(gl_shader *));
+         _mesa_glsl_initialize_builtin_functions();
          linking_shaders[num_shaders] = _mesa_glsl_get_builtin_function_shader();
 
          ok = link_function_calls(prog, linked, linking_shaders, num_shaders + 1);

From 88519c60873e6a5d67fc1fc09b125e4fe2fa3aee Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 6 Feb 2016 17:08:29 -0500
Subject: [PATCH 06/94] glsl: return cloned signature, not the builtin one

The builtin data can get released with a glReleaseShaderCompiler call.
We're careful everywhere to clone everything that comes out of builtins
except here, where we accidentally return the signature belonging to the
builtin version, rather than the locally-cloned one.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
Tested-by: Rob Herring <robh@kernel.org>
Cc: mesa-stable@lists.freedesktop.org
---
 src/compiler/glsl/ast_function.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ast_function.cpp b/src/compiler/glsl/ast_function.cpp
index 0eb456a2b1f..c7fdcb24379 100644
--- a/src/compiler/glsl/ast_function.cpp
+++ b/src/compiler/glsl/ast_function.cpp
@@ -560,7 +560,8 @@ done:
 	    state->symbols->add_global_function(f);
 	    emit_function(state, f);
 	 }
-	 f->add_signature(sig->clone_prototype(f, NULL));
+	 sig = sig->clone_prototype(f, NULL);
+	 f->add_signature(sig);
       }
    }
    return sig;

From ccaf734275ede89bfc86f274a64570be715fed94 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@freedesktop.org>
Date: Fri, 5 Feb 2016 23:16:31 -0800
Subject: [PATCH 07/94] mesa/extensions: Fix NVX_gpu_memory_info
 lexicographical order.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes MesaExtensionsTest.AlphabeticallySorted.

Fixes: 1d79b9958090 ("mesa: implement GL_NVX_gpu_memory_info (v2)")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94016
Signed-off-by: Vinson Lee <vlee@freedesktop.org>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/mesa/main/extensions_table.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index ded6f2c06dc..d1e3a99fdc0 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -273,6 +273,8 @@ EXT(MESA_texture_signed_rgba                , EXT_texture_snorm
 EXT(MESA_window_pos                         , dummy_true                             , GLL,  x ,  x ,  x , 2000)
 EXT(MESA_ycbcr_texture                      , MESA_ycbcr_texture                     , GLL, GLC,  x ,  x , 2002)
 
+EXT(NVX_gpu_memory_info                     , NVX_gpu_memory_info                    , GLL, GLC,  x ,  x , 2013)
+
 EXT(NV_blend_square                         , dummy_true                             , GLL,  x ,  x ,  x , 1999)
 EXT(NV_conditional_render                   , NV_conditional_render                  , GLL, GLC,  x ,  x , 2008)
 EXT(NV_depth_clamp                          , ARB_depth_clamp                        , GLL, GLC,  x ,  x , 2001)
@@ -293,7 +295,6 @@ EXT(NV_texture_barrier                      , NV_texture_barrier
 EXT(NV_texture_env_combine4                 , NV_texture_env_combine4                , GLL,  x ,  x ,  x , 1999)
 EXT(NV_texture_rectangle                    , NV_texture_rectangle                   , GLL,  x ,  x ,  x , 2000)
 EXT(NV_vdpau_interop                        , NV_vdpau_interop                       , GLL, GLC,  x ,  x , 2010)
-EXT(NVX_gpu_memory_info                     , NVX_gpu_memory_info                    , GLL, GLC,  x ,  x , 2013)
 
 EXT(OES_EGL_image                           , OES_EGL_image                          , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
 EXT(OES_EGL_image_external                  , OES_EGL_image_external                 ,  x ,  x , ES1, ES2, 2010)

From 1dacbb7b46f458dcc09a42f7ac1bb18983f17075 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Sun, 7 Feb 2016 22:33:33 +0100
Subject: [PATCH 08/94] trace: remove useless MALLOC() in
 trace_context_draw_vbo()

There is no need to allocate memory when unwrapping the indirect buf.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/trace/tr_context.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 4d03fe1ee0b..066a0ee3d5c 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -120,18 +120,13 @@ trace_context_draw_vbo(struct pipe_context *_pipe,
    trace_dump_trace_flush();
 
    if (info->indirect) {
-      struct pipe_draw_info *_info = NULL;
+      struct pipe_draw_info _info;
 
-      _info = MALLOC(sizeof(*_info));
-      if (!_info)
-         return;
-
-      memcpy(_info, info, sizeof(*_info));
-      _info->indirect = trace_resource_unwrap(tr_ctx, _info->indirect);
-      _info->indirect_params = trace_resource_unwrap(tr_ctx,
-                                                     _info->indirect_params);
-      pipe->draw_vbo(pipe, _info);
-      FREE(_info);
+      memcpy(&_info, info, sizeof(_info));
+      _info.indirect = trace_resource_unwrap(tr_ctx, _info.indirect);
+      _info.indirect_params = trace_resource_unwrap(tr_ctx,
+                                                    _info.indirect_params);
+      pipe->draw_vbo(pipe, &_info);
    } else {
       pipe->draw_vbo(pipe, info);
    }

From efe5829578083d26f6dbac37f960a61e5915c91e Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Sun, 7 Feb 2016 23:27:48 +0100
Subject: [PATCH 09/94] trace: add missing pipe_context::clear_texture()

This fixes a crash with bin/arb_clear_texture-base-formats and
probably some other tests which use clear_texture().

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/trace/tr_context.c | 28 ++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 066a0ee3d5c..2ce2b3aef75 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -1279,6 +1279,33 @@ trace_context_clear_depth_stencil(struct pipe_context *_pipe,
    trace_dump_call_end();
 }
 
+static inline void
+trace_context_clear_texture(struct pipe_context *_pipe,
+                            struct pipe_resource *res,
+                            unsigned level,
+                            const struct pipe_box *box,
+                            const void *data)
+{
+   struct trace_context *tr_ctx = trace_context(_pipe);
+   struct pipe_context *pipe = tr_ctx->pipe;
+
+   res = trace_resource_unwrap(tr_ctx, res);
+
+   trace_dump_call_begin("pipe_context", "clear_texture");
+
+   trace_dump_arg(ptr, pipe);
+   trace_dump_arg(ptr, res);
+   trace_dump_arg(uint, level);
+   trace_dump_arg_begin("box");
+   trace_dump_box(box);
+   trace_dump_arg_end();
+   trace_dump_arg(ptr, data);
+
+   pipe->clear_texture(pipe, res, level, box, data);
+
+   trace_dump_call_end();
+}
+
 static inline void
 trace_context_flush(struct pipe_context *_pipe,
                     struct pipe_fence_handle **fence,
@@ -1704,6 +1731,7 @@ trace_context_create(struct trace_screen *tr_scr,
    TR_CTX_INIT(clear);
    TR_CTX_INIT(clear_render_target);
    TR_CTX_INIT(clear_depth_stencil);
+   TR_CTX_INIT(clear_texture);
    TR_CTX_INIT(flush);
    TR_CTX_INIT(generate_mipmap);
    TR_CTX_INIT(texture_barrier);

From 6c7d4a7173c64f61812c2204bcbbe82b7456debd Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Feb 2016 09:29:38 -0700
Subject: [PATCH 10/94] gallium/util: whitespace, formatting fixes in u_debug.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/auxiliary/util/u_debug.c | 201 ++++++++++++++-------------
 1 file changed, 106 insertions(+), 95 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 2b605594a2e..7a3d51f12c1 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -1,9 +1,9 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2008 VMware, Inc.
  * Copyright (c) 2008 VMware, Inc.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -11,11 +11,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -23,22 +23,22 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 
-#include "pipe/p_config.h" 
+#include "pipe/p_config.h"
 
 #include "pipe/p_compiler.h"
-#include "util/u_debug.h" 
-#include "pipe/p_format.h" 
-#include "pipe/p_state.h" 
-#include "util/u_inlines.h" 
+#include "util/u_debug.h"
+#include "pipe/p_format.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
 #include "util/u_format.h"
-#include "util/u_memory.h" 
-#include "util/u_string.h" 
-#include "util/u_math.h" 
-#include "util/u_tile.h" 
+#include "util/u_memory.h"
+#include "util/u_string.h"
+#include "util/u_math.h"
+#include "util/u_tile.h"
 #include "util/u_prim.h"
 #include "util/u_surface.h"
 #include <inttypes.h>
@@ -53,14 +53,15 @@
 #endif
 
 
-void _debug_vprintf(const char *format, va_list ap)
+void
+_debug_vprintf(const char *format, va_list ap)
 {
    static char buf[4096] = {'\0'};
 #if defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_EMBEDDED)
    /* We buffer until we find a newline. */
    size_t len = strlen(buf);
    int ret = util_vsnprintf(buf + len, sizeof(buf) - len, format, ap);
-   if(ret > (int)(sizeof(buf) - len - 1) || util_strchr(buf + len, '\n')) {
+   if (ret > (int)(sizeof(buf) - len - 1) || util_strchr(buf + len, '\n')) {
       os_log_message(buf);
       buf[0] = '\0';
    }
@@ -70,12 +71,12 @@ void _debug_vprintf(const char *format, va_list ap)
 #endif
 }
 
+
 void
-_pipe_debug_message(
-   struct pipe_debug_callback *cb,
-   unsigned *id,
-   enum pipe_debug_type type,
-   const char *fmt, ...)
+_pipe_debug_message(struct pipe_debug_callback *cb,
+                    unsigned *id,
+                    enum pipe_debug_type type,
+                    const char *fmt, ...)
 {
    va_list args;
    va_start(args, fmt);
@@ -112,9 +113,8 @@ debug_disable_error_message_boxes(void)
 
 
 #ifdef DEBUG
-void debug_print_blob( const char *name,
-                       const void *blob,
-                       unsigned size )
+void
+debug_print_blob(const char *name, const void *blob, unsigned size)
 {
    const unsigned *ublob = (const unsigned *)blob;
    unsigned i;
@@ -147,6 +147,7 @@ debug_get_option_should_print(void)
    return value;
 }
 
+
 const char *
 debug_get_option(const char *name, const char *dfault)
 {
@@ -157,39 +158,42 @@ debug_get_option(const char *name, const char *dfault)
       result = dfault;
 
    if (debug_get_option_should_print())
-      debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)");
-   
+      debug_printf("%s: %s = %s\n", __FUNCTION__, name,
+                   result ? result : "(null)");
+
    return result;
 }
 
+
 boolean
 debug_get_bool_option(const char *name, boolean dfault)
 {
    const char *str = os_get_option(name);
    boolean result;
-   
-   if(str == NULL)
+
+   if (str == NULL)
       result = dfault;
-   else if(!util_strcmp(str, "n"))
+   else if (!util_strcmp(str, "n"))
       result = FALSE;
-   else if(!util_strcmp(str, "no"))
+   else if (!util_strcmp(str, "no"))
       result = FALSE;
-   else if(!util_strcmp(str, "0"))
+   else if (!util_strcmp(str, "0"))
       result = FALSE;
-   else if(!util_strcmp(str, "f"))
+   else if (!util_strcmp(str, "f"))
       result = FALSE;
-   else if(!util_strcmp(str, "F"))
+   else if (!util_strcmp(str, "F"))
       result = FALSE;
-   else if(!util_strcmp(str, "false"))
+   else if (!util_strcmp(str, "false"))
       result = FALSE;
-   else if(!util_strcmp(str, "FALSE"))
+   else if (!util_strcmp(str, "FALSE"))
       result = FALSE;
    else
       result = TRUE;
 
    if (debug_get_option_should_print())
-      debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? "TRUE" : "FALSE");
-   
+      debug_printf("%s: %s = %s\n", __FUNCTION__, name,
+                   result ? "TRUE" : "FALSE");
+
    return result;
 }
 
@@ -199,23 +203,23 @@ debug_get_num_option(const char *name, long dfault)
 {
    long result;
    const char *str;
-   
+
    str = os_get_option(name);
-   if(!str)
+   if (!str)
       result = dfault;
    else {
       long sign;
       char c;
       c = *str++;
-      if(c == '-') {
+      if (c == '-') {
 	 sign = -1;
 	 c = *str++;
-      } 
+      }
       else {
 	 sign = 1;
       }
       result = 0;
-      while('0' <= c && c <= '9') {
+      while ('0' <= c && c <= '9') {
 	 result = result*10 + (c - '0');
 	 c = *str++;
       }
@@ -228,7 +232,9 @@ debug_get_num_option(const char *name, long dfault)
    return result;
 }
 
-static boolean str_has_option(const char *str, const char *name)
+
+static boolean
+str_has_option(const char *str, const char *name)
 {
    /* Empty string. */
    if (!*str) {
@@ -271,8 +277,9 @@ static boolean str_has_option(const char *str, const char *name)
    return FALSE;
 }
 
+
 uint64_t
-debug_get_flags_option(const char *name, 
+debug_get_flags_option(const char *name,
                        const struct debug_named_value *flags,
                        uint64_t dfault)
 {
@@ -280,9 +287,9 @@ debug_get_flags_option(const char *name,
    const char *str;
    const struct debug_named_value *orig = flags;
    unsigned namealign = 0;
-   
+
    str = os_get_option(name);
-   if(!str)
+   if (!str)
       result = dfault;
    else if (!util_strcmp(str, "help")) {
       result = dfault;
@@ -296,7 +303,7 @@ debug_get_flags_option(const char *name,
    }
    else {
       result = 0;
-      while( flags->name ) {
+      while (flags->name) {
 	 if (str_has_option(str, flags->name))
 	    result |= flags->value;
 	 ++flags;
@@ -305,7 +312,8 @@ debug_get_flags_option(const char *name,
 
    if (debug_get_option_should_print()) {
       if (str) {
-         debug_printf("%s: %s = 0x%"PRIx64" (%s)\n", __FUNCTION__, name, result, str);
+         debug_printf("%s: %s = 0x%"PRIx64" (%s)\n",
+                      __FUNCTION__, name, result, str);
       } else {
          debug_printf("%s: %s = 0x%"PRIx64"\n", __FUNCTION__, name, result);
       }
@@ -315,24 +323,24 @@ debug_get_flags_option(const char *name,
 }
 
 
-void _debug_assert_fail(const char *expr, 
-                        const char *file, 
-                        unsigned line, 
-                        const char *function) 
+void
+_debug_assert_fail(const char *expr, const char *file, unsigned line,
+                   const char *function)
 {
-   _debug_printf("%s:%u:%s: Assertion `%s' failed.\n", file, line, function, expr);
+   _debug_printf("%s:%u:%s: Assertion `%s' failed.\n",
+                 file, line, function, expr);
    os_abort();
 }
 
 
 const char *
-debug_dump_enum(const struct debug_named_value *names, 
+debug_dump_enum(const struct debug_named_value *names,
                 unsigned long value)
 {
    static char rest[64];
-   
-   while(names->name) {
-      if(names->value == value)
+
+   while (names->name) {
+      if (names->value == value)
 	 return names->name;
       ++names;
    }
@@ -343,14 +351,14 @@ debug_dump_enum(const struct debug_named_value *names,
 
 
 const char *
-debug_dump_enum_noprefix(const struct debug_named_value *names, 
+debug_dump_enum_noprefix(const struct debug_named_value *names,
                          const char *prefix,
                          unsigned long value)
 {
    static char rest[64];
-   
-   while(names->name) {
-      if(names->value == value) {
+
+   while (names->name) {
+      if (names->value == value) {
          const char *name = names->name;
          while (*name == *prefix) {
             name++;
@@ -361,16 +369,13 @@ debug_dump_enum_noprefix(const struct debug_named_value *names,
       ++names;
    }
 
-   
-
    util_snprintf(rest, sizeof(rest), "0x%08lx", value);
    return rest;
 }
 
 
 const char *
-debug_dump_flags(const struct debug_named_value *names, 
-                 unsigned long value)
+debug_dump_flags(const struct debug_named_value *names, unsigned long value)
 {
    static char output[4096];
    static char rest[256];
@@ -378,8 +383,8 @@ debug_dump_flags(const struct debug_named_value *names,
 
    output[0] = '\0';
 
-   while(names->name) {
-      if((names->value & value) == names->value) {
+   while (names->name) {
+      if ((names->value & value) == names->value) {
 	 if (!first)
 	    util_strncat(output, "|", sizeof(output) - strlen(output) - 1);
 	 else
@@ -390,27 +395,28 @@ debug_dump_flags(const struct debug_named_value *names,
       }
       ++names;
    }
-   
+
    if (value) {
       if (!first)
 	 util_strncat(output, "|", sizeof(output) - strlen(output) - 1);
       else
 	 first = 0;
-      
+
       util_snprintf(rest, sizeof(rest), "0x%08lx", value);
       util_strncat(output, rest, sizeof(output) - strlen(output) - 1);
       output[sizeof(output) - 1] = '\0';
    }
-   
-   if(first)
+
+   if (first)
       return "0";
-   
+
    return output;
 }
 
 
 #ifdef DEBUG
-void debug_print_format(const char *msg, unsigned fmt )
+void
+debug_print_format(const char *msg, unsigned fmt )
 {
    debug_printf("%s: %s\n", msg, util_format_name(fmt));
 }
@@ -447,7 +453,8 @@ u_prim_name(unsigned prim)
 int fl_indent = 0;
 const char* fl_function[1024];
 
-int debug_funclog_enter(const char* f, const int line, const char* file)
+int
+debug_funclog_enter(const char* f, const int line, const char* file)
 {
    int i;
 
@@ -461,14 +468,16 @@ int debug_funclog_enter(const char* f, const int line, const char* file)
    return 0;
 }
 
-void debug_funclog_exit(const char* f, const int line, const char* file)
+void
+debug_funclog_exit(const char* f, const int line, const char* file)
 {
    --fl_indent;
    assert(fl_indent >= 0);
    assert(fl_function[fl_indent] == f);
 }
 
-void debug_funclog_enter_exit(const char* f, const int line, const char* file)
+void
+debug_funclog_enter_exit(const char* f, const int line, const char* file)
 {
    int i;
    for (i = 0; i < fl_indent; i++)
@@ -488,11 +497,12 @@ void debug_funclog_enter_exit(const char* f, const int line, const char* file)
  * \param height height in pixels
  * \param stride  row stride in bytes
  */
-void debug_dump_image(const char *prefix,
-                      enum pipe_format format, unsigned cpp,
-                      unsigned width, unsigned height,
-                      unsigned stride,
-                      const void *data)     
+void
+debug_dump_image(const char *prefix,
+                 enum pipe_format format, unsigned cpp,
+                 unsigned width, unsigned height,
+                 unsigned stride,
+                 const void *data)
 {
    /* write a ppm file */
    char filename[256];
@@ -533,10 +543,12 @@ void debug_dump_image(const char *prefix,
    FREE(rgb8);
 }
 
+
 /* FIXME: dump resources, not surfaces... */
-void debug_dump_surface(struct pipe_context *pipe,
-                        const char *prefix,
-                        struct pipe_surface *surface)
+void
+debug_dump_surface(struct pipe_context *pipe,
+                   const char *prefix,
+                   struct pipe_surface *surface)
 {
    struct pipe_resource *texture;
    struct pipe_transfer *transfer;
@@ -572,9 +584,10 @@ void debug_dump_surface(struct pipe_context *pipe,
 }
 
 
-void debug_dump_texture(struct pipe_context *pipe,
-                        const char *prefix,
-                        struct pipe_resource *texture)
+void
+debug_dump_texture(struct pipe_context *pipe,
+                   const char *prefix,
+                   struct pipe_resource *texture)
 {
    struct pipe_surface *surface, surf_tmpl;
 
@@ -709,10 +722,9 @@ debug_dump_float_rgba_bmp(const char *filename,
    fwrite(&bmih, 40, 1, stream);
 
    y = height;
-   while(y--) {
+   while (y--) {
       float *ptr = rgba + (stride * y * 4);
-      for(x = 0; x < width; ++x)
-      {
+      for (x = 0; x < width; ++x) {
          struct bmp_rgb_quad pixel;
          pixel.rgbRed   = float_to_ubyte(ptr[x*4 + 0]);
          pixel.rgbGreen = float_to_ubyte(ptr[x*4 + 1]);
@@ -738,7 +750,7 @@ debug_dump_ubyte_rgba_bmp(const char *filename,
    unsigned x, y;
 
    assert(rgba);
-   if(!rgba)
+   if (!rgba)
       goto error1;
 
    bmfh.bfType = 0x4d42;
@@ -761,17 +773,16 @@ debug_dump_ubyte_rgba_bmp(const char *filename,
 
    stream = fopen(filename, "wb");
    assert(stream);
-   if(!stream)
+   if (!stream)
       goto error1;
 
    fwrite(&bmfh, 14, 1, stream);
    fwrite(&bmih, 40, 1, stream);
 
    y = height;
-   while(y--) {
+   while (y--) {
       const ubyte *ptr = rgba + (stride * y * 4);
-      for(x = 0; x < width; ++x)
-      {
+      for (x = 0; x < width; ++x) {
          struct bmp_rgb_quad pixel;
          pixel.rgbRed   = ptr[x*4 + 0];
          pixel.rgbGreen = ptr[x*4 + 1];

From 3917c8f3f9d44bd6ca50d5d5f8b9fa9d21295d37 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Feb 2016 09:29:38 -0700
Subject: [PATCH 11/94] gallium/util: put image dumping functions into separate
 file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To try to reduce the clutter in u_debug.[ch]

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/auxiliary/util/u_debug_image.c | 348 +++++++++++++++++++++
 src/gallium/auxiliary/util/u_debug_image.h |  74 +++++
 2 files changed, 422 insertions(+)
 create mode 100644 src/gallium/auxiliary/util/u_debug_image.c
 create mode 100644 src/gallium/auxiliary/util/u_debug_image.h

diff --git a/src/gallium/auxiliary/util/u_debug_image.c b/src/gallium/auxiliary/util/u_debug_image.c
new file mode 100644
index 00000000000..98d73a63de2
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_image.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2008-2016 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "util/u_debug_image.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+#include "util/u_surface.h"
+#include "util/u_tile.h"
+
+#include <stdio.h>
+
+
+#ifdef DEBUG
+
+/**
+ * Dump an image to .ppm file.
+ * \param format  PIPE_FORMAT_x
+ * \param cpp  bytes per pixel
+ * \param width  width in pixels
+ * \param height height in pixels
+ * \param stride  row stride in bytes
+ */
+void
+debug_dump_image(const char *prefix,
+                 enum pipe_format format, unsigned cpp,
+                 unsigned width, unsigned height,
+                 unsigned stride,
+                 const void *data)
+{
+   /* write a ppm file */
+   char filename[256];
+   unsigned char *rgb8;
+   FILE *f;
+
+   util_snprintf(filename, sizeof(filename), "%s.ppm", prefix);
+
+   rgb8 = MALLOC(height * width * 3);
+   if (!rgb8) {
+      return;
+   }
+
+   util_format_translate(
+         PIPE_FORMAT_R8G8B8_UNORM,
+         rgb8, width * 3,
+         0, 0,
+         format,
+         data, stride,
+         0, 0, width, height);
+
+   /* Must be opened in binary mode or DOS line ending causes data
+    * to be read with one byte offset.
+    */
+   f = fopen(filename, "wb");
+   if (f) {
+      fprintf(f, "P6\n");
+      fprintf(f, "# ppm-file created by gallium\n");
+      fprintf(f, "%i %i\n", width, height);
+      fprintf(f, "255\n");
+      fwrite(rgb8, 1, height * width * 3, f);
+      fclose(f);
+   }
+   else {
+      fprintf(stderr, "Can't open %s for writing\n", filename);
+   }
+
+   FREE(rgb8);
+}
+
+
+/* FIXME: dump resources, not surfaces... */
+void
+debug_dump_surface(struct pipe_context *pipe,
+                   const char *prefix,
+                   struct pipe_surface *surface)
+{
+   struct pipe_resource *texture;
+   struct pipe_transfer *transfer;
+   void *data;
+
+   if (!surface)
+      return;
+
+   /* XXX: this doesn't necessarily work, as the driver may be using
+    * temporary storage for the surface which hasn't been propagated
+    * back into the texture.  Need to nail down the semantics of views
+    * and transfers a bit better before we can say if extra work needs
+    * to be done here:
+    */
+   texture = surface->texture;
+
+   data = pipe_transfer_map(pipe, texture, surface->u.tex.level,
+                            surface->u.tex.first_layer,
+                            PIPE_TRANSFER_READ,
+                            0, 0, surface->width, surface->height, &transfer);
+   if (!data)
+      return;
+
+   debug_dump_image(prefix,
+                    texture->format,
+                    util_format_get_blocksize(texture->format),
+                    util_format_get_nblocksx(texture->format, surface->width),
+                    util_format_get_nblocksy(texture->format, surface->height),
+                    transfer->stride,
+                    data);
+
+   pipe->transfer_unmap(pipe, transfer);
+}
+
+
+void
+debug_dump_texture(struct pipe_context *pipe,
+                   const char *prefix,
+                   struct pipe_resource *texture)
+{
+   struct pipe_surface *surface, surf_tmpl;
+
+   if (!texture)
+      return;
+
+   /* XXX for now, just dump image for layer=0, level=0 */
+   u_surface_default_template(&surf_tmpl, texture);
+   surface = pipe->create_surface(pipe, texture, &surf_tmpl);
+   if (surface) {
+      debug_dump_surface(pipe, prefix, surface);
+      pipe->surface_destroy(pipe, surface);
+   }
+}
+
+
+#pragma pack(push,2)
+struct bmp_file_header {
+   uint16_t bfType;
+   uint32_t bfSize;
+   uint16_t bfReserved1;
+   uint16_t bfReserved2;
+   uint32_t bfOffBits;
+};
+#pragma pack(pop)
+
+struct bmp_info_header {
+   uint32_t biSize;
+   int32_t biWidth;
+   int32_t biHeight;
+   uint16_t biPlanes;
+   uint16_t biBitCount;
+   uint32_t biCompression;
+   uint32_t biSizeImage;
+   int32_t biXPelsPerMeter;
+   int32_t biYPelsPerMeter;
+   uint32_t biClrUsed;
+   uint32_t biClrImportant;
+};
+
+struct bmp_rgb_quad {
+   uint8_t rgbBlue;
+   uint8_t rgbGreen;
+   uint8_t rgbRed;
+   uint8_t rgbAlpha;
+};
+
+void
+debug_dump_surface_bmp(struct pipe_context *pipe,
+                       const char *filename,
+                       struct pipe_surface *surface)
+{
+   struct pipe_transfer *transfer;
+   struct pipe_resource *texture = surface->texture;
+   void *ptr;
+
+   ptr = pipe_transfer_map(pipe, texture, surface->u.tex.level,
+                           surface->u.tex.first_layer, PIPE_TRANSFER_READ,
+                           0, 0, surface->width, surface->height, &transfer);
+
+   debug_dump_transfer_bmp(pipe, filename, transfer, ptr);
+
+   pipe->transfer_unmap(pipe, transfer);
+}
+
+void
+debug_dump_transfer_bmp(struct pipe_context *pipe,
+                        const char *filename,
+                        struct pipe_transfer *transfer, void *ptr)
+{
+   float *rgba;
+
+   if (!transfer)
+      goto error1;
+
+   rgba = MALLOC(transfer->box.width *
+		 transfer->box.height *
+		 transfer->box.depth *
+		 4*sizeof(float));
+   if (!rgba)
+      goto error1;
+
+   pipe_get_tile_rgba(transfer, ptr, 0, 0,
+                      transfer->box.width, transfer->box.height,
+                      rgba);
+
+   debug_dump_float_rgba_bmp(filename,
+                             transfer->box.width, transfer->box.height,
+                             rgba, transfer->box.width);
+
+   FREE(rgba);
+error1:
+   ;
+}
+
+void
+debug_dump_float_rgba_bmp(const char *filename,
+                          unsigned width, unsigned height,
+                          float *rgba, unsigned stride)
+{
+   FILE *stream;
+   struct bmp_file_header bmfh;
+   struct bmp_info_header bmih;
+   unsigned x, y;
+
+   if (!rgba)
+      goto error1;
+
+   bmfh.bfType = 0x4d42;
+   bmfh.bfSize = 14 + 40 + height*width*4;
+   bmfh.bfReserved1 = 0;
+   bmfh.bfReserved2 = 0;
+   bmfh.bfOffBits = 14 + 40;
+
+   bmih.biSize = 40;
+   bmih.biWidth = width;
+   bmih.biHeight = height;
+   bmih.biPlanes = 1;
+   bmih.biBitCount = 32;
+   bmih.biCompression = 0;
+   bmih.biSizeImage = height*width*4;
+   bmih.biXPelsPerMeter = 0;
+   bmih.biYPelsPerMeter = 0;
+   bmih.biClrUsed = 0;
+   bmih.biClrImportant = 0;
+
+   stream = fopen(filename, "wb");
+   if (!stream)
+      goto error1;
+
+   fwrite(&bmfh, 14, 1, stream);
+   fwrite(&bmih, 40, 1, stream);
+
+   y = height;
+   while (y--) {
+      float *ptr = rgba + (stride * y * 4);
+      for (x = 0; x < width; ++x) {
+         struct bmp_rgb_quad pixel;
+         pixel.rgbRed   = float_to_ubyte(ptr[x*4 + 0]);
+         pixel.rgbGreen = float_to_ubyte(ptr[x*4 + 1]);
+         pixel.rgbBlue  = float_to_ubyte(ptr[x*4 + 2]);
+         pixel.rgbAlpha = float_to_ubyte(ptr[x*4 + 3]);
+         fwrite(&pixel, 1, 4, stream);
+      }
+   }
+
+   fclose(stream);
+error1:
+   ;
+}
+
+void
+debug_dump_ubyte_rgba_bmp(const char *filename,
+                          unsigned width, unsigned height,
+                          const ubyte *rgba, unsigned stride)
+{
+   FILE *stream;
+   struct bmp_file_header bmfh;
+   struct bmp_info_header bmih;
+   unsigned x, y;
+
+   assert(rgba);
+   if (!rgba)
+      goto error1;
+
+   bmfh.bfType = 0x4d42;
+   bmfh.bfSize = 14 + 40 + height*width*4;
+   bmfh.bfReserved1 = 0;
+   bmfh.bfReserved2 = 0;
+   bmfh.bfOffBits = 14 + 40;
+
+   bmih.biSize = 40;
+   bmih.biWidth = width;
+   bmih.biHeight = height;
+   bmih.biPlanes = 1;
+   bmih.biBitCount = 32;
+   bmih.biCompression = 0;
+   bmih.biSizeImage = height*width*4;
+   bmih.biXPelsPerMeter = 0;
+   bmih.biYPelsPerMeter = 0;
+   bmih.biClrUsed = 0;
+   bmih.biClrImportant = 0;
+
+   stream = fopen(filename, "wb");
+   assert(stream);
+   if (!stream)
+      goto error1;
+
+   fwrite(&bmfh, 14, 1, stream);
+   fwrite(&bmih, 40, 1, stream);
+
+   y = height;
+   while (y--) {
+      const ubyte *ptr = rgba + (stride * y * 4);
+      for (x = 0; x < width; ++x) {
+         struct bmp_rgb_quad pixel;
+         pixel.rgbRed   = ptr[x*4 + 0];
+         pixel.rgbGreen = ptr[x*4 + 1];
+         pixel.rgbBlue  = ptr[x*4 + 2];
+         pixel.rgbAlpha = ptr[x*4 + 3];
+         fwrite(&pixel, 1, 4, stream);
+      }
+   }
+
+   fclose(stream);
+error1:
+   ;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_debug_image.h b/src/gallium/auxiliary/util/u_debug_image.h
new file mode 100644
index 00000000000..f190eec5f52
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_image.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2008-2016 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef U_DEBUG_IMAGE_H
+#define U_DEBUG_IMAGE_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_format.h"
+
+
+#ifdef DEBUG
+struct pipe_context;
+struct pipe_surface;
+struct pipe_transfer;
+struct pipe_resource;
+
+void debug_dump_image(const char *prefix,
+                      enum pipe_format format, unsigned cpp,
+                      unsigned width, unsigned height,
+                      unsigned stride,
+                      const void *data);
+void debug_dump_surface(struct pipe_context *pipe,
+			const char *prefix,
+                        struct pipe_surface *surface);
+void debug_dump_texture(struct pipe_context *pipe,
+			const char *prefix,
+                        struct pipe_resource *texture);
+void debug_dump_surface_bmp(struct pipe_context *pipe,
+                            const char *filename,
+                            struct pipe_surface *surface);
+void debug_dump_transfer_bmp(struct pipe_context *pipe,
+                             const char *filename,
+                             struct pipe_transfer *transfer, void *ptr);
+void debug_dump_float_rgba_bmp(const char *filename,
+                               unsigned width, unsigned height,
+                               float *rgba, unsigned stride);
+void debug_dump_ubyte_rgba_bmp(const char *filename,
+                               unsigned width, unsigned height,
+                               const ubyte *rgba, unsigned stride);
+#else
+#define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0)
+#define debug_dump_surface(pipe, prefix, surface) ((void)0)
+#define debug_dump_surface_bmp(pipe, filename, surface) ((void)0)
+#define debug_dump_transfer_bmp(filename, transfer, ptr) ((void)0)
+#define debug_dump_float_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
+#define debug_dump_ubyte_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
+#endif
+
+
+#endif

From c84a8911fcd2e33f1b29ec2a7d94724709bb9b80 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Feb 2016 09:29:38 -0700
Subject: [PATCH 12/94] gallium/util: switch over to new u_debug_image.[ch]
 code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/auxiliary/Makefile.sources     |   4 +-
 src/gallium/auxiliary/util/u_debug.c       | 311 ---------------------
 src/gallium/auxiliary/util/u_debug.h       |  39 ---
 src/gallium/drivers/llvmpipe/lp_flush.c    |   1 +
 src/gallium/drivers/softpipe/sp_flush.c    |   1 +
 src/gallium/drivers/svga/svga_pipe_flush.c |   1 +
 src/gallium/targets/graw-null/graw_util.c  |   1 +
 src/gallium/tests/graw/graw_util.h         |   1 +
 src/gallium/tests/trivial/quad-tex.c       |   2 +-
 src/gallium/tests/trivial/tri.c            |   2 +-
 10 files changed, 10 insertions(+), 353 deletions(-)

diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index 6f50f714c3f..84da85c5b96 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -191,11 +191,13 @@ C_SOURCES := \
 	util/u_cpu_detect.c \
 	util/u_cpu_detect.h \
 	util/u_debug.c \
+	util/u_debug.h \
 	util/u_debug_describe.c \
 	util/u_debug_describe.h \
 	util/u_debug_flush.c \
 	util/u_debug_flush.h \
-	util/u_debug.h \
+	util/u_debug_image.c \
+	util/u_debug_image.h \
 	util/u_debug_memory.c \
 	util/u_debug_refcnt.c \
 	util/u_debug_refcnt.h \
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 7a3d51f12c1..db6635713e5 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -38,9 +38,7 @@
 #include "util/u_memory.h"
 #include "util/u_string.h"
 #include "util/u_math.h"
-#include "util/u_tile.h"
 #include "util/u_prim.h"
-#include "util/u_surface.h"
 #include <inttypes.h>
 
 #include <stdio.h>
@@ -489,315 +487,6 @@ debug_funclog_enter_exit(const char* f, const int line, const char* file)
 
 
 #ifdef DEBUG
-/**
- * Dump an image to .ppm file.
- * \param format  PIPE_FORMAT_x
- * \param cpp  bytes per pixel
- * \param width  width in pixels
- * \param height height in pixels
- * \param stride  row stride in bytes
- */
-void
-debug_dump_image(const char *prefix,
-                 enum pipe_format format, unsigned cpp,
-                 unsigned width, unsigned height,
-                 unsigned stride,
-                 const void *data)
-{
-   /* write a ppm file */
-   char filename[256];
-   unsigned char *rgb8;
-   FILE *f;
-
-   util_snprintf(filename, sizeof(filename), "%s.ppm", prefix);
-
-   rgb8 = MALLOC(height * width * 3);
-   if (!rgb8) {
-      return;
-   }
-
-   util_format_translate(
-         PIPE_FORMAT_R8G8B8_UNORM,
-         rgb8, width * 3,
-         0, 0,
-         format,
-         data, stride,
-         0, 0, width, height);
-
-   /* Must be opened in binary mode or DOS line ending causes data
-    * to be read with one byte offset.
-    */
-   f = fopen(filename, "wb");
-   if (f) {
-      fprintf(f, "P6\n");
-      fprintf(f, "# ppm-file created by gallium\n");
-      fprintf(f, "%i %i\n", width, height);
-      fprintf(f, "255\n");
-      fwrite(rgb8, 1, height * width * 3, f);
-      fclose(f);
-   }
-   else {
-      fprintf(stderr, "Can't open %s for writing\n", filename);
-   }
-
-   FREE(rgb8);
-}
-
-
-/* FIXME: dump resources, not surfaces... */
-void
-debug_dump_surface(struct pipe_context *pipe,
-                   const char *prefix,
-                   struct pipe_surface *surface)
-{
-   struct pipe_resource *texture;
-   struct pipe_transfer *transfer;
-   void *data;
-
-   if (!surface)
-      return;
-
-   /* XXX: this doesn't necessarily work, as the driver may be using
-    * temporary storage for the surface which hasn't been propagated
-    * back into the texture.  Need to nail down the semantics of views
-    * and transfers a bit better before we can say if extra work needs
-    * to be done here:
-    */
-   texture = surface->texture;
-
-   data = pipe_transfer_map(pipe, texture, surface->u.tex.level,
-                            surface->u.tex.first_layer,
-                            PIPE_TRANSFER_READ,
-                            0, 0, surface->width, surface->height, &transfer);
-   if (!data)
-      return;
-
-   debug_dump_image(prefix,
-                    texture->format,
-                    util_format_get_blocksize(texture->format),
-                    util_format_get_nblocksx(texture->format, surface->width),
-                    util_format_get_nblocksy(texture->format, surface->height),
-                    transfer->stride,
-                    data);
-
-   pipe->transfer_unmap(pipe, transfer);
-}
-
-
-void
-debug_dump_texture(struct pipe_context *pipe,
-                   const char *prefix,
-                   struct pipe_resource *texture)
-{
-   struct pipe_surface *surface, surf_tmpl;
-
-   if (!texture)
-      return;
-
-   /* XXX for now, just dump image for layer=0, level=0 */
-   u_surface_default_template(&surf_tmpl, texture);
-   surface = pipe->create_surface(pipe, texture, &surf_tmpl);
-   if (surface) {
-      debug_dump_surface(pipe, prefix, surface);
-      pipe->surface_destroy(pipe, surface);
-   }
-}
-
-
-#pragma pack(push,2)
-struct bmp_file_header {
-   uint16_t bfType;
-   uint32_t bfSize;
-   uint16_t bfReserved1;
-   uint16_t bfReserved2;
-   uint32_t bfOffBits;
-};
-#pragma pack(pop)
-
-struct bmp_info_header {
-   uint32_t biSize;
-   int32_t biWidth;
-   int32_t biHeight;
-   uint16_t biPlanes;
-   uint16_t biBitCount;
-   uint32_t biCompression;
-   uint32_t biSizeImage;
-   int32_t biXPelsPerMeter;
-   int32_t biYPelsPerMeter;
-   uint32_t biClrUsed;
-   uint32_t biClrImportant;
-};
-
-struct bmp_rgb_quad {
-   uint8_t rgbBlue;
-   uint8_t rgbGreen;
-   uint8_t rgbRed;
-   uint8_t rgbAlpha;
-};
-
-void
-debug_dump_surface_bmp(struct pipe_context *pipe,
-                       const char *filename,
-                       struct pipe_surface *surface)
-{
-   struct pipe_transfer *transfer;
-   struct pipe_resource *texture = surface->texture;
-   void *ptr;
-
-   ptr = pipe_transfer_map(pipe, texture, surface->u.tex.level,
-                           surface->u.tex.first_layer, PIPE_TRANSFER_READ,
-                           0, 0, surface->width, surface->height, &transfer);
-
-   debug_dump_transfer_bmp(pipe, filename, transfer, ptr);
-
-   pipe->transfer_unmap(pipe, transfer);
-}
-
-void
-debug_dump_transfer_bmp(struct pipe_context *pipe,
-                        const char *filename,
-                        struct pipe_transfer *transfer, void *ptr)
-{
-   float *rgba;
-
-   if (!transfer)
-      goto error1;
-
-   rgba = MALLOC(transfer->box.width *
-		 transfer->box.height *
-		 transfer->box.depth *
-		 4*sizeof(float));
-   if (!rgba)
-      goto error1;
-
-   pipe_get_tile_rgba(transfer, ptr, 0, 0,
-                      transfer->box.width, transfer->box.height,
-                      rgba);
-
-   debug_dump_float_rgba_bmp(filename,
-                             transfer->box.width, transfer->box.height,
-                             rgba, transfer->box.width);
-
-   FREE(rgba);
-error1:
-   ;
-}
-
-void
-debug_dump_float_rgba_bmp(const char *filename,
-                          unsigned width, unsigned height,
-                          float *rgba, unsigned stride)
-{
-   FILE *stream;
-   struct bmp_file_header bmfh;
-   struct bmp_info_header bmih;
-   unsigned x, y;
-
-   if (!rgba)
-      goto error1;
-
-   bmfh.bfType = 0x4d42;
-   bmfh.bfSize = 14 + 40 + height*width*4;
-   bmfh.bfReserved1 = 0;
-   bmfh.bfReserved2 = 0;
-   bmfh.bfOffBits = 14 + 40;
-
-   bmih.biSize = 40;
-   bmih.biWidth = width;
-   bmih.biHeight = height;
-   bmih.biPlanes = 1;
-   bmih.biBitCount = 32;
-   bmih.biCompression = 0;
-   bmih.biSizeImage = height*width*4;
-   bmih.biXPelsPerMeter = 0;
-   bmih.biYPelsPerMeter = 0;
-   bmih.biClrUsed = 0;
-   bmih.biClrImportant = 0;
-
-   stream = fopen(filename, "wb");
-   if (!stream)
-      goto error1;
-
-   fwrite(&bmfh, 14, 1, stream);
-   fwrite(&bmih, 40, 1, stream);
-
-   y = height;
-   while (y--) {
-      float *ptr = rgba + (stride * y * 4);
-      for (x = 0; x < width; ++x) {
-         struct bmp_rgb_quad pixel;
-         pixel.rgbRed   = float_to_ubyte(ptr[x*4 + 0]);
-         pixel.rgbGreen = float_to_ubyte(ptr[x*4 + 1]);
-         pixel.rgbBlue  = float_to_ubyte(ptr[x*4 + 2]);
-         pixel.rgbAlpha = float_to_ubyte(ptr[x*4 + 3]);
-         fwrite(&pixel, 1, 4, stream);
-      }
-   }
-
-   fclose(stream);
-error1:
-   ;
-}
-
-void
-debug_dump_ubyte_rgba_bmp(const char *filename,
-                          unsigned width, unsigned height,
-                          const ubyte *rgba, unsigned stride)
-{
-   FILE *stream;
-   struct bmp_file_header bmfh;
-   struct bmp_info_header bmih;
-   unsigned x, y;
-
-   assert(rgba);
-   if (!rgba)
-      goto error1;
-
-   bmfh.bfType = 0x4d42;
-   bmfh.bfSize = 14 + 40 + height*width*4;
-   bmfh.bfReserved1 = 0;
-   bmfh.bfReserved2 = 0;
-   bmfh.bfOffBits = 14 + 40;
-
-   bmih.biSize = 40;
-   bmih.biWidth = width;
-   bmih.biHeight = height;
-   bmih.biPlanes = 1;
-   bmih.biBitCount = 32;
-   bmih.biCompression = 0;
-   bmih.biSizeImage = height*width*4;
-   bmih.biXPelsPerMeter = 0;
-   bmih.biYPelsPerMeter = 0;
-   bmih.biClrUsed = 0;
-   bmih.biClrImportant = 0;
-
-   stream = fopen(filename, "wb");
-   assert(stream);
-   if (!stream)
-      goto error1;
-
-   fwrite(&bmfh, 14, 1, stream);
-   fwrite(&bmih, 40, 1, stream);
-
-   y = height;
-   while (y--) {
-      const ubyte *ptr = rgba + (stride * y * 4);
-      for (x = 0; x < width; ++x) {
-         struct bmp_rgb_quad pixel;
-         pixel.rgbRed   = ptr[x*4 + 0];
-         pixel.rgbGreen = ptr[x*4 + 1];
-         pixel.rgbBlue  = ptr[x*4 + 2];
-         pixel.rgbAlpha = ptr[x*4 + 3];
-         fwrite(&pixel, 1, 4, stream);
-      }
-   }
-
-   fclose(stream);
-error1:
-   ;
-}
-
-
 /**
  * Print PIPE_TRANSFER_x flags with a message.
  */
diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h
index 671bd37a085..c2707b402cb 100644
--- a/src/gallium/auxiliary/util/u_debug.h
+++ b/src/gallium/auxiliary/util/u_debug.h
@@ -464,45 +464,6 @@ void
 debug_memory_end(unsigned long beginning);
 
 
-#ifdef DEBUG
-struct pipe_context;
-struct pipe_surface;
-struct pipe_transfer;
-struct pipe_resource;
-
-void debug_dump_image(const char *prefix,
-                      enum pipe_format format, unsigned cpp,
-                      unsigned width, unsigned height,
-                      unsigned stride,
-                      const void *data);
-void debug_dump_surface(struct pipe_context *pipe,
-			const char *prefix,
-                        struct pipe_surface *surface);   
-void debug_dump_texture(struct pipe_context *pipe,
-			const char *prefix,
-                        struct pipe_resource *texture);
-void debug_dump_surface_bmp(struct pipe_context *pipe,
-                            const char *filename,
-                            struct pipe_surface *surface);
-void debug_dump_transfer_bmp(struct pipe_context *pipe,
-                             const char *filename,
-                             struct pipe_transfer *transfer, void *ptr);
-void debug_dump_float_rgba_bmp(const char *filename,
-                               unsigned width, unsigned height,
-                               float *rgba, unsigned stride);
-void debug_dump_ubyte_rgba_bmp(const char *filename,
-                               unsigned width, unsigned height,
-                               const ubyte *rgba, unsigned stride);
-#else
-#define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0)
-#define debug_dump_surface(pipe, prefix, surface) ((void)0)
-#define debug_dump_surface_bmp(pipe, filename, surface) ((void)0)
-#define debug_dump_transfer_bmp(filename, transfer, ptr) ((void)0)
-#define debug_dump_float_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
-#define debug_dump_ubyte_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
-#endif
-
-
 void
 debug_print_transfer_flags(const char *msg, unsigned usage);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
index 268aab26c40..241c2ccafb7 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.c
+++ b/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -32,6 +32,7 @@
 
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
+#include "util/u_debug_image.h"
 #include "util/u_string.h"
 #include "draw/draw_context.h"
 #include "lp_flush.h"
diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c
index 188347bb4ca..5a29e26517d 100644
--- a/src/gallium/drivers/softpipe/sp_flush.c
+++ b/src/gallium/drivers/softpipe/sp_flush.c
@@ -38,6 +38,7 @@
 #include "sp_state.h"
 #include "sp_tile_cache.h"
 #include "sp_tex_tile_cache.h"
+#include "util/u_debug_image.h"
 #include "util/u_memory.h"
 #include "util/u_string.h"
 
diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c
index d593c781680..8e0af12d294 100644
--- a/src/gallium/drivers/svga/svga_pipe_flush.c
+++ b/src/gallium/drivers/svga/svga_pipe_flush.c
@@ -24,6 +24,7 @@
  **********************************************************/
 
 #include "pipe/p_defines.h"
+#include "util/u_debug_image.h"
 #include "util/u_string.h"
 #include "svga_screen.h"
 #include "svga_surface.h"
diff --git a/src/gallium/targets/graw-null/graw_util.c b/src/gallium/targets/graw-null/graw_util.c
index 07693e85f6a..03b45d99e9d 100644
--- a/src/gallium/targets/graw-null/graw_util.c
+++ b/src/gallium/targets/graw-null/graw_util.c
@@ -5,6 +5,7 @@
 #include "pipe/p_state.h"
 #include "tgsi/tgsi_text.h"
 #include "util/u_debug.h"
+#include "util/u_debug_image.h"
 #include "util/u_memory.h"
 #include "state_tracker/graw.h"
 
diff --git a/src/gallium/tests/graw/graw_util.h b/src/gallium/tests/graw/graw_util.h
index f09c1eadc9c..3c7dbd061cc 100644
--- a/src/gallium/tests/graw/graw_util.h
+++ b/src/gallium/tests/graw/graw_util.h
@@ -9,6 +9,7 @@
 
 #include "util/u_box.h"    
 #include "util/u_debug.h"
+#include "util/u_debug_image.h"
 #include "util/u_draw_quad.h"
 #include "util/u_format.h"
 #include "util/u_inlines.h"
diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c
index 4c5a9200a52..ddee2942af9 100644
--- a/src/gallium/tests/trivial/quad-tex.c
+++ b/src/gallium/tests/trivial/quad-tex.c
@@ -50,7 +50,7 @@
 /* u_sampler_view_default_template */
 #include "util/u_sampler.h"
 /* debug_dump_surface_bmp */
-#include "util/u_debug.h"
+#include "util/u_debug_image.h"
 /* util_draw_vertex_buffer helper */
 #include "util/u_draw_quad.h"
 /* FREE & CALLOC_STRUCT */
diff --git a/src/gallium/tests/trivial/tri.c b/src/gallium/tests/trivial/tri.c
index c71a63f44e5..914f5e75fa9 100644
--- a/src/gallium/tests/trivial/tri.c
+++ b/src/gallium/tests/trivial/tri.c
@@ -48,7 +48,7 @@
 #include "cso_cache/cso_context.h"
 
 /* debug_dump_surface_bmp */
-#include "util/u_debug.h"
+#include "util/u_debug_image.h"
 /* util_draw_vertex_buffer helper */
 #include "util/u_draw_quad.h"
 /* FREE & CALLOC_STRUCT */

From 5d2539cb49c7f009f0e2a1646f1ea0156f1d0b36 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Feb 2016 09:29:38 -0700
Subject: [PATCH 13/94] gallium/util: whitespace, formatting fixes in
 u_staging.[ch] files

Still some nonsensical comments.
---
 src/gallium/auxiliary/util/u_staging.c | 50 ++++++++++++++------------
 src/gallium/auxiliary/util/u_staging.h | 24 +++++++------
 2 files changed, 41 insertions(+), 33 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c
index b569c8f9907..caef2a8245c 100644
--- a/src/gallium/auxiliary/util/u_staging.c
+++ b/src/gallium/auxiliary/util/u_staging.c
@@ -29,11 +29,14 @@
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 
+
 static void
-util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigned height, unsigned depth, struct pipe_resource *template)
+util_staging_resource_template(struct pipe_resource *pt, unsigned width,
+                               unsigned height, unsigned depth,
+                               struct pipe_resource *template)
 {
    memset(template, 0, sizeof(struct pipe_resource));
-   if(pt->target != PIPE_BUFFER && depth <= 1)
+   if (pt->target != PIPE_BUFFER && depth <= 1)
       template->target = PIPE_TEXTURE_RECT;
    else
       template->target = pt->target;
@@ -49,16 +52,15 @@ util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigne
    template->flags = 0;
 }
 
+
 struct util_staging_transfer *
 util_staging_transfer_init(struct pipe_context *pipe,
-           struct pipe_resource *pt,
-           unsigned level,
-           unsigned usage,
-           const struct pipe_box *box,
-           boolean direct, struct util_staging_transfer *tx)
+                           struct pipe_resource *pt,
+                           unsigned level, unsigned usage,
+                           const struct pipe_box *box,
+                           boolean direct, struct util_staging_transfer *tx)
 {
    struct pipe_screen *pscreen = pipe->screen;
-
    struct pipe_resource staging_resource_template;
 
    pipe_resource_reference(&tx->base.resource, pt);
@@ -66,23 +68,22 @@ util_staging_transfer_init(struct pipe_context *pipe,
    tx->base.usage = usage;
    tx->base.box = *box;
 
-   if (direct)
-   {
+   if (direct) {
       tx->staging_resource = pt;
       return tx;
    }
 
-   util_staging_resource_template(pt, box->width, box->height, box->depth, &staging_resource_template);
-   tx->staging_resource = pscreen->resource_create(pscreen, &staging_resource_template);
-   if (!tx->staging_resource)
-   {
+   util_staging_resource_template(pt, box->width, box->height,
+                                  box->depth, &staging_resource_template);
+   tx->staging_resource = pscreen->resource_create(pscreen,
+                                                   &staging_resource_template);
+   if (!tx->staging_resource) {
       pipe_resource_reference(&tx->base.resource, NULL);
       FREE(tx);
       return NULL;
    }
 
-   if (usage & PIPE_TRANSFER_READ)
-   {
+   if (usage & PIPE_TRANSFER_READ) {
       /* XXX this looks wrong dst is always the same but looping over src z? */
       int zi;
       struct pipe_box sbox;
@@ -92,7 +93,7 @@ util_staging_transfer_init(struct pipe_context *pipe,
       sbox.width = box->width;
       sbox.height = box->height;
       sbox.depth = 1;
-      for(zi = 0; zi < box->depth; ++zi) {
+      for (zi = 0; zi < box->depth; ++zi) {
          sbox.z = sbox.z + zi;
          pipe->resource_copy_region(pipe, tx->staging_resource, 0, 0, 0, 0,
                                     tx->base.resource, level, &sbox);
@@ -102,14 +103,15 @@ util_staging_transfer_init(struct pipe_context *pipe,
    return tx;
 }
 
+
 void
-util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx)
+util_staging_transfer_destroy(struct pipe_context *pipe,
+                              struct pipe_transfer *ptx)
 {
    struct util_staging_transfer *tx = (struct util_staging_transfer *)ptx;
 
-   if (tx->staging_resource != tx->base.resource)
-   {
-      if(tx->base.usage & PIPE_TRANSFER_WRITE) {
+   if (tx->staging_resource != tx->base.resource) {
+      if (tx->base.usage & PIPE_TRANSFER_WRITE) {
          /* XXX this looks wrong src is always the same but looping over dst z? */
          int zi;
          struct pipe_box sbox;
@@ -119,8 +121,10 @@ util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *p
          sbox.width = tx->base.box.width;
          sbox.height = tx->base.box.height;
          sbox.depth = 1;
-         for(zi = 0; zi < tx->base.box.depth; ++zi)
-            pipe->resource_copy_region(pipe, tx->base.resource, tx->base.level, tx->base.box.x, tx->base.box.y, tx->base.box.z + zi,
+         for (zi = 0; zi < tx->base.box.depth; ++zi)
+            pipe->resource_copy_region(pipe, tx->base.resource, tx->base.level,
+                                       tx->base.box.x, tx->base.box.y,
+                                       tx->base.box.z + zi,
                                        tx->staging_resource, 0, &sbox);
       }
 
diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h
index ddbb33443e4..6c468aad161 100644
--- a/src/gallium/auxiliary/util/u_staging.h
+++ b/src/gallium/auxiliary/util/u_staging.h
@@ -42,22 +42,26 @@
 struct util_staging_transfer {
    struct pipe_transfer base;
 
-   /* if direct, same as base.resource, otherwise the temporary staging resource */
+   /* if direct, same as base.resource, otherwise the temporary staging
+    * resource
+    */
    struct pipe_resource *staging_resource;
 };
 
-/* user must be stride, slice_stride and offset */
-/* pt->usage == PIPE_USAGE_DYNAMIC || pt->usage == PIPE_USAGE_STAGING should be a good value to pass for direct */
-/* staging resource is currently created with PIPE_USAGE_STAGING */
+/* user must be stride, slice_stride and offset.
+ * pt->usage == PIPE_USAGE_DYNAMIC || pt->usage == PIPE_USAGE_STAGING
+ * should be a good value to pass for direct staging resource is currently
+ * created with PIPE_USAGE_STAGING
+ */
 struct util_staging_transfer *
 util_staging_transfer_init(struct pipe_context *pipe,
-           struct pipe_resource *pt,
-           unsigned level,
-           unsigned usage,
-           const struct pipe_box *box,
-           boolean direct, struct util_staging_transfer *tx);
+                           struct pipe_resource *pt,
+                           unsigned level, unsigned usage,
+                           const struct pipe_box *box,
+                           boolean direct, struct util_staging_transfer *tx);
 
 void
-util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx);
+util_staging_transfer_destroy(struct pipe_context *pipe,
+                              struct pipe_transfer *ptx);
 
 #endif

From 6691ba1fe8e54c6ce5c6b4424c8096a351fda932 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Feb 2016 09:29:38 -0700
Subject: [PATCH 14/94] gallium/util: whitespace, formatting fixes in
 u_debug_stack.c

---
 src/gallium/auxiliary/util/u_debug_stack.c | 61 ++++++++++++----------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_debug_stack.c b/src/gallium/auxiliary/util/u_debug_stack.c
index 68961d3510e..1faa1903a76 100644
--- a/src/gallium/auxiliary/util/u_debug_stack.c
+++ b/src/gallium/auxiliary/util/u_debug_stack.c
@@ -2,7 +2,7 @@
  * 
  * Copyright 2009 VMware, Inc.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,13 +22,13 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 /**
  * @file
  * Stack backtracing.
- * 
+ *
  * @author Jose Fonseca <jfonseca@vmware.com>
  */
 
@@ -44,12 +44,13 @@
 /**
  * Capture stack backtrace.
  *
- * NOTE: The implementation of this function is quite big, but it is important not to
- * break it down in smaller functions to avoid adding new frames to the calling stack.
+ * NOTE: The implementation of this function is quite big, but it is important
+ * not to break it down in smaller functions to avoid adding new frames to the
+ * calling stack.
  */
 void
 debug_backtrace_capture(struct debug_stack_frame *backtrace,
-                        unsigned start_frame, 
+                        unsigned start_frame,
                         unsigned nr_frames)
 {
    const void **frame_pointer = NULL;
@@ -66,7 +67,8 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace,
     */
 #if defined(PIPE_OS_WINDOWS)
    {
-      typedef USHORT (WINAPI *PFNCAPTURESTACKBACKTRACE)(ULONG, ULONG, PVOID *, PULONG);
+      typedef USHORT (WINAPI *PFNCAPTURESTACKBACKTRACE)(ULONG, ULONG,
+                                                        PVOID *, PULONG);
       static PFNCAPTURESTACKBACKTRACE pfnCaptureStackBackTrace = NULL;
 
       if (!pfnCaptureStackBackTrace) {
@@ -76,8 +78,9 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace,
             assert(hModule);
          }
          if (hModule) {
-            pfnCaptureStackBackTrace = (PFNCAPTURESTACKBACKTRACE)GetProcAddress(hModule,
-                                                                                "RtlCaptureStackBackTrace");
+            pfnCaptureStackBackTrace =
+               (PFNCAPTURESTACKBACKTRACE)GetProcAddress(hModule,
+                                                "RtlCaptureStackBackTrace");
          }
       }
       if (pfnCaptureStackBackTrace) {
@@ -88,7 +91,8 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace,
          start_frame += 1;
 
          assert(start_frame + nr_frames < 63);
-         i = pfnCaptureStackBackTrace(start_frame, nr_frames, (PVOID *) &backtrace->function, NULL);
+         i = pfnCaptureStackBackTrace(start_frame, nr_frames,
+                                      (PVOID *) &backtrace->function, NULL);
 
          /* Pad remaing requested frames with NULL */
          while (i < nr_frames) {
@@ -110,50 +114,49 @@ debug_backtrace_capture(struct debug_stack_frame *backtrace,
 #else
    frame_pointer = NULL;
 #endif
-  
-   
+
 #ifdef PIPE_ARCH_X86
-   while(nr_frames) {
+   while (nr_frames) {
       const void **next_frame_pointer;
 
-      if(!frame_pointer)
+      if (!frame_pointer)
          break;
-      
-      if(start_frame)
+
+      if (start_frame)
          --start_frame;
       else {
          backtrace[i++].function = frame_pointer[1];
          --nr_frames;
       }
-      
+
       next_frame_pointer = (const void **)frame_pointer[0];
-      
+
       /* Limit the stack walk to avoid referencing undefined memory */
-      if((uintptr_t)next_frame_pointer <= (uintptr_t)frame_pointer ||
-         (uintptr_t)next_frame_pointer > (uintptr_t)frame_pointer + 64*1024)
+      if ((uintptr_t)next_frame_pointer <= (uintptr_t)frame_pointer ||
+          (uintptr_t)next_frame_pointer > (uintptr_t)frame_pointer + 64*1024)
          break;
-      
+
       frame_pointer = next_frame_pointer;
    }
 #else
    (void) frame_pointer;
 #endif
 
-   while(nr_frames) {
+   while (nr_frames) {
       backtrace[i++].function = NULL;
       --nr_frames;
    }
 }
-   
+
 
 void
-debug_backtrace_dump(const struct debug_stack_frame *backtrace, 
+debug_backtrace_dump(const struct debug_stack_frame *backtrace,
                      unsigned nr_frames)
 {
    unsigned i;
-   
-   for(i = 0; i < nr_frames; ++i) {
-      if(!backtrace[i].function)
+
+   for (i = 0; i < nr_frames; ++i) {
+      if (!backtrace[i].function)
          break;
       debug_symbol_print(backtrace[i].function);
    }

From 5fdbfb8d6fa2e1521cf2f60e80ac571a37ab22a2 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Feb 2016 09:29:38 -0700
Subject: [PATCH 15/94] mesa: move GL_ARB_debug_output code into new
 debug_output.c file

The errors.c file had grown quite large so split off this extension
code into its own file.  This involved making a handful of functions
non-static.

Acked-by: Timothy Arceri <timothy.arceri@collabora.com>
---
 src/compiler/glsl/glsl_parser_extras.cpp |    1 +
 src/mapi/glapi/gen/gl_genexec.py         |    1 +
 src/mesa/Makefile.sources                |    2 +
 src/mesa/main/context.c                  |    3 +-
 src/mesa/main/debug_output.c             | 1301 ++++++++++++++++++++++
 src/mesa/main/debug_output.h             |  107 ++
 src/mesa/main/enable.c                   |    1 +
 src/mesa/main/errors.c                   | 1286 +--------------------
 src/mesa/main/errors.h                   |   48 +-
 src/mesa/main/get.c                      |    1 +
 src/mesa/main/getstring.c                |    1 +
 src/mesa/state_tracker/st_manager.c      |    1 +
 12 files changed, 1435 insertions(+), 1318 deletions(-)
 create mode 100644 src/mesa/main/debug_output.c
 create mode 100644 src/mesa/main/debug_output.h

diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp
index b635d99f61b..20ec89dd210 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -27,6 +27,7 @@
 
 #include "main/core.h" /* for struct gl_context */
 #include "main/context.h"
+#include "main/debug_output.h"
 #include "main/shaderobj.h"
 #include "util/u_atomic.h" /* for p_atomic_cmpxchg */
 #include "util/ralloc.h"
diff --git a/src/mapi/glapi/gen/gl_genexec.py b/src/mapi/glapi/gen/gl_genexec.py
index 6c66779c222..72d7b6fea1f 100644
--- a/src/mapi/glapi/gen/gl_genexec.py
+++ b/src/mapi/glapi/gen/gl_genexec.py
@@ -66,6 +66,7 @@ header = """/**
 #include "main/convolve.h"
 #include "main/copyimage.h"
 #include "main/depth.h"
+#include "main/debug_output.h"
 #include "main/dlist.h"
 #include "main/drawpix.h"
 #include "main/drawtex.h"
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index ffe560faa3d..35405e7d1e0 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -57,6 +57,8 @@ MAIN_FILES = \
 	main/dd.h \
 	main/debug.c \
 	main/debug.h \
+	main/debug_output.c \
+	main/debug_output.h \
 	main/depth.c \
 	main/depth.h \
 	main/dlist.c \
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 8b415ed6019..9388a1ca51d 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -89,6 +89,7 @@
 #include "context.h"
 #include "cpuinfo.h"
 #include "debug.h"
+#include "debug_output.h"
 #include "depth.h"
 #include "dlist.h"
 #include "eval.h"
@@ -814,8 +815,8 @@ init_attrib_groups(struct gl_context *ctx)
    _mesa_init_current( ctx );
    _mesa_init_depth( ctx );
    _mesa_init_debug( ctx );
+   _mesa_init_debug_output( ctx );
    _mesa_init_display_list( ctx );
-   _mesa_init_errors( ctx );
    _mesa_init_eval( ctx );
    _mesa_init_fbobjects( ctx );
    _mesa_init_feedback( ctx );
diff --git a/src/mesa/main/debug_output.c b/src/mesa/main/debug_output.c
new file mode 100644
index 00000000000..10ee6757cc1
--- /dev/null
+++ b/src/mesa/main/debug_output.c
@@ -0,0 +1,1301 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2016  Brian Paul, et al   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "context.h"
+#include "debug_output.h"
+#include "dispatch.h"
+#include "enums.h"
+#include "imports.h"
+#include "hash.h"
+#include "mtypes.h"
+#include "version.h"
+#include "util/hash_table.h"
+#include "util/simple_list.h"
+
+
+static mtx_t DynamicIDMutex = _MTX_INITIALIZER_NP;
+static GLuint NextDynamicID = 1;
+
+
+/**
+ * A namespace element.
+ */
+struct gl_debug_element
+{
+   struct simple_node link;
+
+   GLuint ID;
+   /* at which severity levels (mesa_debug_severity) is the message enabled */
+   GLbitfield State;
+};
+
+
+struct gl_debug_namespace
+{
+   struct simple_node Elements;
+   GLbitfield DefaultState;
+};
+
+
+struct gl_debug_group {
+   struct gl_debug_namespace Namespaces[MESA_DEBUG_SOURCE_COUNT][MESA_DEBUG_TYPE_COUNT];
+};
+
+
+/**
+ * An error, warning, or other piece of debug information for an application
+ * to consume via GL_ARB_debug_output/GL_KHR_debug.
+ */
+struct gl_debug_message
+{
+   enum mesa_debug_source source;
+   enum mesa_debug_type type;
+   GLuint id;
+   enum mesa_debug_severity severity;
+   /* length as given by the user - if message was explicitly null terminated,
+    * length can be negative */
+   GLsizei length;
+   GLcharARB *message;
+};
+
+
+/**
+ * Debug message log.  It works like a ring buffer.
+ */
+struct gl_debug_log {
+   struct gl_debug_message Messages[MAX_DEBUG_LOGGED_MESSAGES];
+   GLint NextMessage;
+   GLint NumMessages;
+};
+
+
+struct gl_debug_state
+{
+   GLDEBUGPROC Callback;
+   const void *CallbackData;
+   GLboolean SyncOutput;
+   GLboolean DebugOutput;
+
+   struct gl_debug_group *Groups[MAX_DEBUG_GROUP_STACK_DEPTH];
+   struct gl_debug_message GroupMessages[MAX_DEBUG_GROUP_STACK_DEPTH];
+   GLint CurrentGroup; // GroupStackDepth - 1
+
+   struct gl_debug_log Log;
+};
+
+
+static char out_of_memory[] = "Debugging error: out of memory";
+
+static const GLenum debug_source_enums[] = {
+   GL_DEBUG_SOURCE_API,
+   GL_DEBUG_SOURCE_WINDOW_SYSTEM,
+   GL_DEBUG_SOURCE_SHADER_COMPILER,
+   GL_DEBUG_SOURCE_THIRD_PARTY,
+   GL_DEBUG_SOURCE_APPLICATION,
+   GL_DEBUG_SOURCE_OTHER,
+};
+
+static const GLenum debug_type_enums[] = {
+   GL_DEBUG_TYPE_ERROR,
+   GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR,
+   GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR,
+   GL_DEBUG_TYPE_PORTABILITY,
+   GL_DEBUG_TYPE_PERFORMANCE,
+   GL_DEBUG_TYPE_OTHER,
+   GL_DEBUG_TYPE_MARKER,
+   GL_DEBUG_TYPE_PUSH_GROUP,
+   GL_DEBUG_TYPE_POP_GROUP,
+};
+
+static const GLenum debug_severity_enums[] = {
+   GL_DEBUG_SEVERITY_LOW,
+   GL_DEBUG_SEVERITY_MEDIUM,
+   GL_DEBUG_SEVERITY_HIGH,
+   GL_DEBUG_SEVERITY_NOTIFICATION,
+};
+
+
+static enum mesa_debug_source
+gl_enum_to_debug_source(GLenum e)
+{
+   unsigned i;
+
+   for (i = 0; i < ARRAY_SIZE(debug_source_enums); i++) {
+      if (debug_source_enums[i] == e)
+         break;
+   }
+   return i;
+}
+
+static enum mesa_debug_type
+gl_enum_to_debug_type(GLenum e)
+{
+   unsigned i;
+
+   for (i = 0; i < ARRAY_SIZE(debug_type_enums); i++) {
+      if (debug_type_enums[i] == e)
+         break;
+   }
+   return i;
+}
+
+static enum mesa_debug_severity
+gl_enum_to_debug_severity(GLenum e)
+{
+   unsigned i;
+
+   for (i = 0; i < ARRAY_SIZE(debug_severity_enums); i++) {
+      if (debug_severity_enums[i] == e)
+         break;
+   }
+   return i;
+}
+
+
+/**
+ * Handles generating a GL_ARB_debug_output message ID generated by the GL or
+ * GLSL compiler.
+ *
+ * The GL API has this "ID" mechanism, where the intention is to allow a
+ * client to filter in/out messages based on source, type, and ID.  Of course,
+ * building a giant enum list of all debug output messages that Mesa might
+ * generate is ridiculous, so instead we have our caller pass us a pointer to
+ * static storage where the ID should get stored.  This ID will be shared
+ * across all contexts for that message (which seems like a desirable
+ * property, even if it's not expected by the spec), but note that it won't be
+ * the same between executions if messages aren't generated in the same order.
+ */
+void
+_mesa_debug_get_id(GLuint *id)
+{
+   if (!(*id)) {
+      mtx_lock(&DynamicIDMutex);
+      if (!(*id))
+         *id = NextDynamicID++;
+      mtx_unlock(&DynamicIDMutex);
+   }
+}
+
+static void
+debug_message_clear(struct gl_debug_message *msg)
+{
+   if (msg->message != (char*)out_of_memory)
+      free(msg->message);
+   msg->message = NULL;
+   msg->length = 0;
+}
+
+static void
+debug_message_store(struct gl_debug_message *msg,
+                    enum mesa_debug_source source,
+                    enum mesa_debug_type type, GLuint id,
+                    enum mesa_debug_severity severity,
+                    GLsizei len, const char *buf)
+{
+   GLsizei length = len;
+
+   assert(!msg->message && !msg->length);
+
+   if (length < 0)
+      length = strlen(buf);
+
+   msg->message = malloc(length+1);
+   if (msg->message) {
+      (void) strncpy(msg->message, buf, (size_t)length);
+      msg->message[length] = '\0';
+
+      msg->length = len;
+      msg->source = source;
+      msg->type = type;
+      msg->id = id;
+      msg->severity = severity;
+   } else {
+      static GLuint oom_msg_id = 0;
+      _mesa_debug_get_id(&oom_msg_id);
+
+      /* malloc failed! */
+      msg->message = out_of_memory;
+      msg->length = -1;
+      msg->source = MESA_DEBUG_SOURCE_OTHER;
+      msg->type = MESA_DEBUG_TYPE_ERROR;
+      msg->id = oom_msg_id;
+      msg->severity = MESA_DEBUG_SEVERITY_HIGH;
+   }
+}
+
+static void
+debug_namespace_init(struct gl_debug_namespace *ns)
+{
+   make_empty_list(&ns->Elements);
+
+   /* Enable all the messages with severity HIGH or MEDIUM by default */
+   ns->DefaultState = (1 << MESA_DEBUG_SEVERITY_MEDIUM ) |
+                      (1 << MESA_DEBUG_SEVERITY_HIGH) |
+                      (1 << MESA_DEBUG_SEVERITY_NOTIFICATION);
+}
+
+static void
+debug_namespace_clear(struct gl_debug_namespace *ns)
+{
+   struct simple_node *node, *tmp;
+
+   foreach_s(node, tmp, &ns->Elements)
+      free(node);
+}
+
+static bool
+debug_namespace_copy(struct gl_debug_namespace *dst,
+                     const struct gl_debug_namespace *src)
+{
+   struct simple_node *node;
+
+   dst->DefaultState = src->DefaultState;
+
+   make_empty_list(&dst->Elements);
+   foreach(node, &src->Elements) {
+      const struct gl_debug_element *elem =
+         (const struct gl_debug_element *) node;
+      struct gl_debug_element *copy;
+
+      copy = malloc(sizeof(*copy));
+      if (!copy) {
+         debug_namespace_clear(dst);
+         return false;
+      }
+
+      copy->ID = elem->ID;
+      copy->State = elem->State;
+      insert_at_tail(&dst->Elements, &copy->link);
+   }
+
+   return true;
+}
+
+/**
+ * Set the state of \p id in the namespace.
+ */
+static bool
+debug_namespace_set(struct gl_debug_namespace *ns,
+                    GLuint id, bool enabled)
+{
+   const uint32_t state = (enabled) ?
+      ((1 << MESA_DEBUG_SEVERITY_COUNT) - 1) : 0;
+   struct gl_debug_element *elem = NULL;
+   struct simple_node *node;
+
+   /* find the element */
+   foreach(node, &ns->Elements) {
+      struct gl_debug_element *tmp = (struct gl_debug_element *) node;
+      if (tmp->ID == id) {
+         elem = tmp;
+         break;
+      }
+   }
+
+   /* we do not need the element if it has the default state */
+   if (ns->DefaultState == state) {
+      if (elem) {
+         remove_from_list(&elem->link);
+         free(elem);
+      }
+      return true;
+   }
+
+   if (!elem) {
+      elem = malloc(sizeof(*elem));
+      if (!elem)
+         return false;
+
+      elem->ID = id;
+      insert_at_tail(&ns->Elements, &elem->link);
+   }
+
+   elem->State = state;
+
+   return true;
+}
+
+/**
+ * Set the default state of the namespace for \p severity.  When \p severity
+ * is MESA_DEBUG_SEVERITY_COUNT, the default values for all severities are
+ * updated.
+ */
+static void
+debug_namespace_set_all(struct gl_debug_namespace *ns,
+                        enum mesa_debug_severity severity,
+                        bool enabled)
+{
+   struct simple_node *node, *tmp;
+   uint32_t mask, val;
+
+   /* set all elements to the same state */
+   if (severity == MESA_DEBUG_SEVERITY_COUNT) {
+      ns->DefaultState = (enabled) ? ((1 << severity) - 1) : 0;
+      debug_namespace_clear(ns);
+      make_empty_list(&ns->Elements);
+      return;
+   }
+
+   mask = 1 << severity;
+   val = (enabled) ? mask : 0;
+
+   ns->DefaultState = (ns->DefaultState & ~mask) | val;
+
+   foreach_s(node, tmp, &ns->Elements) {
+      struct gl_debug_element *elem = (struct gl_debug_element *) node;
+
+      elem->State = (elem->State & ~mask) | val;
+      if (elem->State == ns->DefaultState) {
+         remove_from_list(node);
+         free(node);
+      }
+   }
+}
+
+/**
+ * Get the state of \p id in the namespace.
+ */
+static bool
+debug_namespace_get(const struct gl_debug_namespace *ns, GLuint id,
+                    enum mesa_debug_severity severity)
+{
+   struct simple_node *node;
+   uint32_t state;
+
+   state = ns->DefaultState;
+   foreach(node, &ns->Elements) {
+      struct gl_debug_element *elem = (struct gl_debug_element *) node;
+
+      if (elem->ID == id) {
+         state = elem->State;
+         break;
+      }
+   }
+
+   return (state & (1 << severity));
+}
+
+/**
+ * Allocate and initialize context debug state.
+ */
+static struct gl_debug_state *
+debug_create(void)
+{
+   struct gl_debug_state *debug;
+   int s, t;
+
+   debug = CALLOC_STRUCT(gl_debug_state);
+   if (!debug)
+      return NULL;
+
+   debug->Groups[0] = malloc(sizeof(*debug->Groups[0]));
+   if (!debug->Groups[0]) {
+      free(debug);
+      return NULL;
+   }
+
+   /* Initialize state for filtering known debug messages. */
+   for (s = 0; s < MESA_DEBUG_SOURCE_COUNT; s++) {
+      for (t = 0; t < MESA_DEBUG_TYPE_COUNT; t++)
+         debug_namespace_init(&debug->Groups[0]->Namespaces[s][t]);
+   }
+
+   return debug;
+}
+
+/**
+ * Return true if the top debug group points to the group below it.
+ */
+static bool
+debug_is_group_read_only(const struct gl_debug_state *debug)
+{
+   const GLint gstack = debug->CurrentGroup;
+   return (gstack > 0 && debug->Groups[gstack] == debug->Groups[gstack - 1]);
+}
+
+/**
+ * Make the top debug group writable.
+ */
+static bool
+debug_make_group_writable(struct gl_debug_state *debug)
+{
+   const GLint gstack = debug->CurrentGroup;
+   const struct gl_debug_group *src = debug->Groups[gstack];
+   struct gl_debug_group *dst;
+   int s, t;
+
+   if (!debug_is_group_read_only(debug))
+      return true;
+
+   dst = malloc(sizeof(*dst));
+   if (!dst)
+      return false;
+
+   for (s = 0; s < MESA_DEBUG_SOURCE_COUNT; s++) {
+      for (t = 0; t < MESA_DEBUG_TYPE_COUNT; t++) {
+         if (!debug_namespace_copy(&dst->Namespaces[s][t],
+                                   &src->Namespaces[s][t])) {
+            /* error path! */
+            for (t = t - 1; t >= 0; t--)
+               debug_namespace_clear(&dst->Namespaces[s][t]);
+            for (s = s - 1; s >= 0; s--) {
+               for (t = 0; t < MESA_DEBUG_TYPE_COUNT; t++)
+                  debug_namespace_clear(&dst->Namespaces[s][t]);
+            }
+            free(dst);
+            return false;
+         }
+      }
+   }
+
+   debug->Groups[gstack] = dst;
+
+   return true;
+}
+
+/**
+ * Free the top debug group.
+ */
+static void
+debug_clear_group(struct gl_debug_state *debug)
+{
+   const GLint gstack = debug->CurrentGroup;
+
+   if (!debug_is_group_read_only(debug)) {
+      struct gl_debug_group *grp = debug->Groups[gstack];
+      int s, t;
+
+      for (s = 0; s < MESA_DEBUG_SOURCE_COUNT; s++) {
+         for (t = 0; t < MESA_DEBUG_TYPE_COUNT; t++)
+            debug_namespace_clear(&grp->Namespaces[s][t]);
+      }
+
+      free(grp);
+   }
+
+   debug->Groups[gstack] = NULL;
+}
+
+/**
+ * Loop through debug group stack tearing down states for
+ * filtering debug messages.  Then free debug output state.
+ */
+static void
+debug_destroy(struct gl_debug_state *debug)
+{
+   while (debug->CurrentGroup > 0) {
+      debug_clear_group(debug);
+      debug->CurrentGroup--;
+   }
+
+   debug_clear_group(debug);
+   free(debug);
+}
+
+/**
+ * Sets the state of the given message source/type/ID tuple.
+ */
+static void
+debug_set_message_enable(struct gl_debug_state *debug,
+                         enum mesa_debug_source source,
+                         enum mesa_debug_type type,
+                         GLuint id, GLboolean enabled)
+{
+   const GLint gstack = debug->CurrentGroup;
+   struct gl_debug_namespace *ns;
+
+   debug_make_group_writable(debug);
+   ns = &debug->Groups[gstack]->Namespaces[source][type];
+
+   debug_namespace_set(ns, id, enabled);
+}
+
+/*
+ * Set the state of all message IDs found in the given intersection of
+ * 'source', 'type', and 'severity'.  The _COUNT enum can be used for
+ * GL_DONT_CARE (include all messages in the class).
+ *
+ * This requires both setting the state of all previously seen message
+ * IDs in the hash table, and setting the default state for all
+ * applicable combinations of source/type/severity, so that all the
+ * yet-unknown message IDs that may be used in the future will be
+ * impacted as if they were already known.
+ */
+static void
+debug_set_message_enable_all(struct gl_debug_state *debug,
+                             enum mesa_debug_source source,
+                             enum mesa_debug_type type,
+                             enum mesa_debug_severity severity,
+                             GLboolean enabled)
+{
+   const GLint gstack = debug->CurrentGroup;
+   int s, t, smax, tmax;
+
+   if (source == MESA_DEBUG_SOURCE_COUNT) {
+      source = 0;
+      smax = MESA_DEBUG_SOURCE_COUNT;
+   } else {
+      smax = source+1;
+   }
+
+   if (type == MESA_DEBUG_TYPE_COUNT) {
+      type = 0;
+      tmax = MESA_DEBUG_TYPE_COUNT;
+   } else {
+      tmax = type+1;
+   }
+
+   debug_make_group_writable(debug);
+
+   for (s = source; s < smax; s++) {
+      for (t = type; t < tmax; t++) {
+         struct gl_debug_namespace *nspace =
+            &debug->Groups[gstack]->Namespaces[s][t];
+         debug_namespace_set_all(nspace, severity, enabled);
+      }
+   }
+}
+
+/**
+ * Returns if the given message source/type/ID tuple is enabled.
+ */
+bool
+_mesa_debug_is_message_enabled(const struct gl_debug_state *debug,
+                               enum mesa_debug_source source,
+                               enum mesa_debug_type type,
+                               GLuint id,
+                               enum mesa_debug_severity severity)
+{
+   const GLint gstack = debug->CurrentGroup;
+   struct gl_debug_group *grp = debug->Groups[gstack];
+   struct gl_debug_namespace *nspace = &grp->Namespaces[source][type];
+
+   if (!debug->DebugOutput)
+      return false;
+
+   return debug_namespace_get(nspace, id, severity);
+}
+
+/**
+ * 'buf' is not necessarily a null-terminated string. When logging, copy
+ * 'len' characters from it, store them in a new, null-terminated string,
+ * and remember the number of bytes used by that string, *including*
+ * the null terminator this time.
+ */
+static void
+debug_log_message(struct gl_debug_state *debug,
+                  enum mesa_debug_source source,
+                  enum mesa_debug_type type, GLuint id,
+                  enum mesa_debug_severity severity,
+                  GLsizei len, const char *buf)
+{
+   struct gl_debug_log *log = &debug->Log;
+   GLint nextEmpty;
+   struct gl_debug_message *emptySlot;
+
+   assert(len < MAX_DEBUG_MESSAGE_LENGTH);
+
+   if (log->NumMessages == MAX_DEBUG_LOGGED_MESSAGES)
+      return;
+
+   nextEmpty = (log->NextMessage + log->NumMessages)
+      % MAX_DEBUG_LOGGED_MESSAGES;
+   emptySlot = &log->Messages[nextEmpty];
+
+   debug_message_store(emptySlot, source, type,
+                       id, severity, len, buf);
+
+   log->NumMessages++;
+}
+
+/**
+ * Return the oldest debug message out of the log.
+ */
+static const struct gl_debug_message *
+debug_fetch_message(const struct gl_debug_state *debug)
+{
+   const struct gl_debug_log *log = &debug->Log;
+
+   return (log->NumMessages) ? &log->Messages[log->NextMessage] : NULL;
+}
+
+/**
+ * Delete the oldest debug messages out of the log.
+ */
+static void
+debug_delete_messages(struct gl_debug_state *debug, int count)
+{
+   struct gl_debug_log *log = &debug->Log;
+
+   if (count > log->NumMessages)
+      count = log->NumMessages;
+
+   while (count--) {
+      struct gl_debug_message *msg = &log->Messages[log->NextMessage];
+
+      debug_message_clear(msg);
+
+      log->NumMessages--;
+      log->NextMessage++;
+      log->NextMessage %= MAX_DEBUG_LOGGED_MESSAGES;
+   }
+}
+
+static struct gl_debug_message *
+debug_get_group_message(struct gl_debug_state *debug)
+{
+   return &debug->GroupMessages[debug->CurrentGroup];
+}
+
+static void
+debug_push_group(struct gl_debug_state *debug)
+{
+   const GLint gstack = debug->CurrentGroup;
+
+   /* just point to the previous stack */
+   debug->Groups[gstack + 1] = debug->Groups[gstack];
+   debug->CurrentGroup++;
+}
+
+static void
+debug_pop_group(struct gl_debug_state *debug)
+{
+   debug_clear_group(debug);
+   debug->CurrentGroup--;
+}
+
+
+/**
+ * Lock and return debug state for the context.  The debug state will be
+ * allocated and initialized upon the first call.  When NULL is returned, the
+ * debug state is not locked.
+ */
+static struct gl_debug_state *
+_mesa_lock_debug_state(struct gl_context *ctx)
+{
+   mtx_lock(&ctx->DebugMutex);
+
+   if (!ctx->Debug) {
+      ctx->Debug = debug_create();
+      if (!ctx->Debug) {
+         GET_CURRENT_CONTEXT(cur);
+         mtx_unlock(&ctx->DebugMutex);
+
+         /*
+          * This function may be called from other threads.  When that is the
+          * case, we cannot record this OOM error.
+          */
+         if (ctx == cur)
+            _mesa_error(ctx, GL_OUT_OF_MEMORY, "allocating debug state");
+
+         return NULL;
+      }
+   }
+
+   return ctx->Debug;
+}
+
+static void
+_mesa_unlock_debug_state(struct gl_context *ctx)
+{
+   mtx_unlock(&ctx->DebugMutex);
+}
+
+/**
+ * Set the integer debug state specified by \p pname.  This can be called from
+ * _mesa_set_enable for example.
+ */
+bool
+_mesa_set_debug_state_int(struct gl_context *ctx, GLenum pname, GLint val)
+{
+   struct gl_debug_state *debug = _mesa_lock_debug_state(ctx);
+
+   if (!debug)
+      return false;
+
+   switch (pname) {
+   case GL_DEBUG_OUTPUT:
+      debug->DebugOutput = (val != 0);
+      break;
+   case GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB:
+      debug->SyncOutput = (val != 0);
+      break;
+   default:
+      assert(!"unknown debug output param");
+      break;
+   }
+
+   _mesa_unlock_debug_state(ctx);
+
+   return true;
+}
+
+/**
+ * Query the integer debug state specified by \p pname.  This can be called
+ * _mesa_GetIntegerv for example.
+ */
+GLint
+_mesa_get_debug_state_int(struct gl_context *ctx, GLenum pname)
+{
+   struct gl_debug_state *debug;
+   GLint val;
+
+   mtx_lock(&ctx->DebugMutex);
+   debug = ctx->Debug;
+   if (!debug) {
+      mtx_unlock(&ctx->DebugMutex);
+      return 0;
+   }
+
+   switch (pname) {
+   case GL_DEBUG_OUTPUT:
+      val = debug->DebugOutput;
+      break;
+   case GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB:
+      val = debug->SyncOutput;
+      break;
+   case GL_DEBUG_LOGGED_MESSAGES:
+      val = debug->Log.NumMessages;
+      break;
+   case GL_DEBUG_NEXT_LOGGED_MESSAGE_LENGTH:
+      val = (debug->Log.NumMessages) ?
+         debug->Log.Messages[debug->Log.NextMessage].length : 0;
+      break;
+   case GL_DEBUG_GROUP_STACK_DEPTH:
+      val = debug->CurrentGroup + 1;
+      break;
+   default:
+      assert(!"unknown debug output param");
+      val = 0;
+      break;
+   }
+
+   mtx_unlock(&ctx->DebugMutex);
+
+   return val;
+}
+
+/**
+ * Query the pointer debug state specified by \p pname.  This can be called
+ * _mesa_GetPointerv for example.
+ */
+void *
+_mesa_get_debug_state_ptr(struct gl_context *ctx, GLenum pname)
+{
+   struct gl_debug_state *debug;
+   void *val;
+
+   mtx_lock(&ctx->DebugMutex);
+   debug = ctx->Debug;
+   if (!debug) {
+      mtx_unlock(&ctx->DebugMutex);
+      return NULL;
+   }
+
+   switch (pname) {
+   case GL_DEBUG_CALLBACK_FUNCTION_ARB:
+      val = (void *) debug->Callback;
+      break;
+   case GL_DEBUG_CALLBACK_USER_PARAM_ARB:
+      val = (void *) debug->CallbackData;
+      break;
+   default:
+      assert(!"unknown debug output param");
+      val = NULL;
+      break;
+   }
+
+   mtx_unlock(&ctx->DebugMutex);
+
+   return val;
+}
+
+/**
+ * Insert a debug message.  The mutex is assumed to be locked, and will be
+ * unlocked by this call.
+ */
+static void
+log_msg_locked_and_unlock(struct gl_context *ctx,
+                          enum mesa_debug_source source,
+                          enum mesa_debug_type type, GLuint id,
+                          enum mesa_debug_severity severity,
+                          GLint len, const char *buf)
+{
+   struct gl_debug_state *debug = ctx->Debug;
+
+   if (!_mesa_debug_is_message_enabled(debug, source, type, id, severity)) {
+      _mesa_unlock_debug_state(ctx);
+      return;
+   }
+
+   if (ctx->Debug->Callback) {
+      GLenum gl_source = debug_source_enums[source];
+      GLenum gl_type = debug_type_enums[type];
+      GLenum gl_severity = debug_severity_enums[severity];
+      GLDEBUGPROC callback = ctx->Debug->Callback;
+      const void *data = ctx->Debug->CallbackData;
+
+      /*
+       * When ctx->Debug->SyncOutput is GL_FALSE, the client is prepared for
+       * unsynchronous calls.  When it is GL_TRUE, we will not spawn threads.
+       * In either case, we can call the callback unlocked.
+       */
+      _mesa_unlock_debug_state(ctx);
+      callback(gl_source, gl_type, id, gl_severity, len, buf, data);
+   }
+   else {
+      debug_log_message(ctx->Debug, source, type, id, severity, len, buf);
+      _mesa_unlock_debug_state(ctx);
+   }
+}
+
+/**
+ * Log a client or driver debug message.
+ */
+void
+_mesa_log_msg(struct gl_context *ctx, enum mesa_debug_source source,
+              enum mesa_debug_type type, GLuint id,
+              enum mesa_debug_severity severity, GLint len, const char *buf)
+{
+   struct gl_debug_state *debug = _mesa_lock_debug_state(ctx);
+
+   if (!debug)
+      return;
+
+   log_msg_locked_and_unlock(ctx, source, type, id, severity, len, buf);
+}
+
+
+/**
+ * Verify that source, type, and severity are valid enums.
+ *
+ * The 'caller' param is used for handling values available
+ * only in glDebugMessageInsert or glDebugMessageControl
+ */
+static GLboolean
+validate_params(struct gl_context *ctx, unsigned caller,
+                const char *callerstr, GLenum source, GLenum type,
+                GLenum severity)
+{
+#define INSERT 1
+#define CONTROL 2
+   switch(source) {
+   case GL_DEBUG_SOURCE_APPLICATION_ARB:
+   case GL_DEBUG_SOURCE_THIRD_PARTY_ARB:
+      break;
+   case GL_DEBUG_SOURCE_API_ARB:
+   case GL_DEBUG_SOURCE_SHADER_COMPILER_ARB:
+   case GL_DEBUG_SOURCE_WINDOW_SYSTEM_ARB:
+   case GL_DEBUG_SOURCE_OTHER_ARB:
+      if (caller != INSERT)
+         break;
+      else
+         goto error;
+   case GL_DONT_CARE:
+      if (caller == CONTROL)
+         break;
+      else
+         goto error;
+   default:
+      goto error;
+   }
+
+   switch(type) {
+   case GL_DEBUG_TYPE_ERROR_ARB:
+   case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB:
+   case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_ARB:
+   case GL_DEBUG_TYPE_PERFORMANCE_ARB:
+   case GL_DEBUG_TYPE_PORTABILITY_ARB:
+   case GL_DEBUG_TYPE_OTHER_ARB:
+   case GL_DEBUG_TYPE_MARKER:
+   case GL_DEBUG_TYPE_PUSH_GROUP:
+   case GL_DEBUG_TYPE_POP_GROUP:
+      break;
+   case GL_DONT_CARE:
+      if (caller == CONTROL)
+         break;
+      else
+         goto error;
+   default:
+      goto error;
+   }
+
+   switch(severity) {
+   case GL_DEBUG_SEVERITY_HIGH_ARB:
+   case GL_DEBUG_SEVERITY_MEDIUM_ARB:
+   case GL_DEBUG_SEVERITY_LOW_ARB:
+   case GL_DEBUG_SEVERITY_NOTIFICATION:
+      break;
+   case GL_DONT_CARE:
+      if (caller == CONTROL)
+         break;
+      else
+         goto error;
+   default:
+      goto error;
+   }
+   return GL_TRUE;
+
+error:
+   _mesa_error(ctx, GL_INVALID_ENUM, "bad values passed to %s"
+               "(source=0x%x, type=0x%x, severity=0x%x)", callerstr,
+               source, type, severity);
+
+   return GL_FALSE;
+}
+
+
+static GLboolean
+validate_length(struct gl_context *ctx, const char *callerstr, GLsizei length,
+                const GLchar *buf)
+{
+
+   if (length < 0) {
+      GLsizei len = strlen(buf);
+
+      if (len >= MAX_DEBUG_MESSAGE_LENGTH) {
+         _mesa_error(ctx, GL_INVALID_VALUE,
+                    "%s(null terminated string length=%d, is not less than "
+                    "GL_MAX_DEBUG_MESSAGE_LENGTH=%d)", callerstr, len,
+                    MAX_DEBUG_MESSAGE_LENGTH);
+         return GL_FALSE;
+      }
+   }
+
+   if (length >= MAX_DEBUG_MESSAGE_LENGTH) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                 "%s(length=%d, which is not less than "
+                 "GL_MAX_DEBUG_MESSAGE_LENGTH=%d)", callerstr, length,
+                 MAX_DEBUG_MESSAGE_LENGTH);
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+
+void GLAPIENTRY
+_mesa_DebugMessageInsert(GLenum source, GLenum type, GLuint id,
+                         GLenum severity, GLint length,
+                         const GLchar *buf)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   const char *callerstr;
+
+   if (_mesa_is_desktop_gl(ctx))
+      callerstr = "glDebugMessageInsert";
+   else
+      callerstr = "glDebugMessageInsertKHR";
+
+   if (!validate_params(ctx, INSERT, callerstr, source, type, severity))
+      return; /* GL_INVALID_ENUM */
+
+   if (!validate_length(ctx, callerstr, length, buf))
+      return; /* GL_INVALID_VALUE */
+
+   _mesa_log_msg(ctx, gl_enum_to_debug_source(source),
+                 gl_enum_to_debug_type(type), id,
+                 gl_enum_to_debug_severity(severity),
+                 length, buf);
+
+   if (type == GL_DEBUG_TYPE_MARKER && ctx->Driver.EmitStringMarker) {
+      /* if length not specified, string will be null terminated: */
+      if (length < 0)
+         length = strlen(buf);
+      ctx->Driver.EmitStringMarker(ctx, buf, length);
+   }
+}
+
+
+GLuint GLAPIENTRY
+_mesa_GetDebugMessageLog(GLuint count, GLsizei logSize, GLenum *sources,
+                         GLenum *types, GLenum *ids, GLenum *severities,
+                         GLsizei *lengths, GLchar *messageLog)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_debug_state *debug;
+   const char *callerstr;
+   GLuint ret;
+
+   if (_mesa_is_desktop_gl(ctx))
+      callerstr = "glGetDebugMessageLog";
+   else
+      callerstr = "glGetDebugMessageLogKHR";
+
+   if (!messageLog)
+      logSize = 0;
+
+   if (logSize < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "%s(logSize=%d : logSize must not be negative)",
+                  callerstr, logSize);
+      return 0;
+   }
+
+   debug = _mesa_lock_debug_state(ctx);
+   if (!debug)
+      return 0;
+
+   for (ret = 0; ret < count; ret++) {
+      const struct gl_debug_message *msg = debug_fetch_message(debug);
+      GLsizei len;
+
+      if (!msg)
+         break;
+
+      len = msg->length;
+      if (len < 0)
+         len = strlen(msg->message);
+
+      if (logSize < len+1 && messageLog != NULL)
+         break;
+
+      if (messageLog) {
+         assert(msg->message[len] == '\0');
+         (void) strncpy(messageLog, msg->message, (size_t)len+1);
+
+         messageLog += len+1;
+         logSize -= len+1;
+      }
+
+      if (lengths)
+         *lengths++ = len+1;
+      if (severities)
+         *severities++ = debug_severity_enums[msg->severity];
+      if (sources)
+         *sources++ = debug_source_enums[msg->source];
+      if (types)
+         *types++ = debug_type_enums[msg->type];
+      if (ids)
+         *ids++ = msg->id;
+
+      debug_delete_messages(debug, 1);
+   }
+
+   _mesa_unlock_debug_state(ctx);
+
+   return ret;
+}
+
+
+void GLAPIENTRY
+_mesa_DebugMessageControl(GLenum gl_source, GLenum gl_type,
+                          GLenum gl_severity, GLsizei count,
+                          const GLuint *ids, GLboolean enabled)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   enum mesa_debug_source source = gl_enum_to_debug_source(gl_source);
+   enum mesa_debug_type type = gl_enum_to_debug_type(gl_type);
+   enum mesa_debug_severity severity = gl_enum_to_debug_severity(gl_severity);
+   const char *callerstr;
+   struct gl_debug_state *debug;
+
+   if (_mesa_is_desktop_gl(ctx))
+      callerstr = "glDebugMessageControl";
+   else
+      callerstr = "glDebugMessageControlKHR";
+
+   if (count < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "%s(count=%d : count must not be negative)", callerstr,
+                  count);
+      return;
+   }
+
+   if (!validate_params(ctx, CONTROL, callerstr, gl_source, gl_type,
+                        gl_severity))
+      return; /* GL_INVALID_ENUM */
+
+   if (count && (gl_severity != GL_DONT_CARE || gl_type == GL_DONT_CARE
+                 || gl_source == GL_DONT_CARE)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(When passing an array of ids, severity must be"
+         " GL_DONT_CARE, and source and type must not be GL_DONT_CARE.",
+                  callerstr);
+      return;
+   }
+
+   debug = _mesa_lock_debug_state(ctx);
+   if (!debug)
+      return;
+
+   if (count) {
+      GLsizei i;
+      for (i = 0; i < count; i++)
+         debug_set_message_enable(debug, source, type, ids[i], enabled);
+   }
+   else {
+      debug_set_message_enable_all(debug, source, type, severity, enabled);
+   }
+
+   _mesa_unlock_debug_state(ctx);
+}
+
+
+void GLAPIENTRY
+_mesa_DebugMessageCallback(GLDEBUGPROC callback, const void *userParam)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_debug_state *debug = _mesa_lock_debug_state(ctx);
+   if (debug) {
+      debug->Callback = callback;
+      debug->CallbackData = userParam;
+      _mesa_unlock_debug_state(ctx);
+   }
+}
+
+
+void GLAPIENTRY
+_mesa_PushDebugGroup(GLenum source, GLuint id, GLsizei length,
+                     const GLchar *message)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   const char *callerstr;
+   struct gl_debug_state *debug;
+   struct gl_debug_message *emptySlot;
+
+   if (_mesa_is_desktop_gl(ctx))
+      callerstr = "glPushDebugGroup";
+   else
+      callerstr = "glPushDebugGroupKHR";
+
+   switch(source) {
+   case GL_DEBUG_SOURCE_APPLICATION:
+   case GL_DEBUG_SOURCE_THIRD_PARTY:
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM, "bad value passed to %s"
+                  "(source=0x%x)", callerstr, source);
+      return;
+   }
+
+   if (!validate_length(ctx, callerstr, length, message))
+      return; /* GL_INVALID_VALUE */
+
+   debug = _mesa_lock_debug_state(ctx);
+   if (!debug)
+      return;
+
+   if (debug->CurrentGroup >= MAX_DEBUG_GROUP_STACK_DEPTH-1) {
+      _mesa_unlock_debug_state(ctx);
+      _mesa_error(ctx, GL_STACK_OVERFLOW, "%s", callerstr);
+      return;
+   }
+
+   /* pop reuses the message details from push so we store this */
+   emptySlot = debug_get_group_message(debug);
+   debug_message_store(emptySlot,
+                       gl_enum_to_debug_source(source),
+                       gl_enum_to_debug_type(GL_DEBUG_TYPE_PUSH_GROUP),
+                       id,
+                       gl_enum_to_debug_severity(GL_DEBUG_SEVERITY_NOTIFICATION),
+                       length, message);
+
+   debug_push_group(debug);
+
+   log_msg_locked_and_unlock(ctx,
+         gl_enum_to_debug_source(source),
+         MESA_DEBUG_TYPE_PUSH_GROUP, id,
+         MESA_DEBUG_SEVERITY_NOTIFICATION, length,
+         message);
+}
+
+
+void GLAPIENTRY
+_mesa_PopDebugGroup(void)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   const char *callerstr;
+   struct gl_debug_state *debug;
+   struct gl_debug_message *gdmessage, msg;
+
+   if (_mesa_is_desktop_gl(ctx))
+      callerstr = "glPopDebugGroup";
+   else
+      callerstr = "glPopDebugGroupKHR";
+
+   debug = _mesa_lock_debug_state(ctx);
+   if (!debug)
+      return;
+
+   if (debug->CurrentGroup <= 0) {
+      _mesa_unlock_debug_state(ctx);
+      _mesa_error(ctx, GL_STACK_UNDERFLOW, "%s", callerstr);
+      return;
+   }
+
+   debug_pop_group(debug);
+
+   /* make a shallow copy */
+   gdmessage = debug_get_group_message(debug);
+   msg = *gdmessage;
+   gdmessage->message = NULL;
+   gdmessage->length = 0;
+
+   log_msg_locked_and_unlock(ctx,
+         msg.source,
+         gl_enum_to_debug_type(GL_DEBUG_TYPE_POP_GROUP),
+         msg.id,
+         gl_enum_to_debug_severity(GL_DEBUG_SEVERITY_NOTIFICATION),
+         msg.length, msg.message);
+
+   debug_message_clear(&msg);
+}
+
+
+void
+_mesa_init_debug_output(struct gl_context *ctx)
+{
+   mtx_init(&ctx->DebugMutex, mtx_plain);
+}
+
+
+void
+_mesa_free_errors_data(struct gl_context *ctx)
+{
+   if (ctx->Debug) {
+      debug_destroy(ctx->Debug);
+      /* set to NULL just in case it is used before context is completely gone. */
+      ctx->Debug = NULL;
+   }
+
+   mtx_destroy(&ctx->DebugMutex);
+}
+
+void GLAPIENTRY
+_mesa_StringMarkerGREMEDY(GLsizei len, const GLvoid *string)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   if (ctx->Extensions.GREMEDY_string_marker) {
+      /* if length not specified, string will be null terminated: */
+      if (len <= 0)
+         len = strlen(string);
+      ctx->Driver.EmitStringMarker(ctx, string, len);
+   } else {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "StringMarkerGREMEDY");
+   }
+}
diff --git a/src/mesa/main/debug_output.h b/src/mesa/main/debug_output.h
new file mode 100644
index 00000000000..9d8be4f2273
--- /dev/null
+++ b/src/mesa/main/debug_output.h
@@ -0,0 +1,107 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2016  Brian Paul, et al   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef DEBUG_OUTPUT_H
+#define DEBUG_OUTPUT_H
+
+
+#include <stdio.h>
+#include <stdarg.h>
+#include "compiler.h"
+#include "glheader.h"
+#include "mtypes.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+void
+_mesa_init_debug_output(struct gl_context *ctx);
+
+void
+_mesa_free_errors_data(struct gl_context *ctx);
+
+void
+_mesa_debug_get_id(GLuint *id);
+
+bool
+_mesa_set_debug_state_int(struct gl_context *ctx, GLenum pname, GLint val);
+
+GLint
+_mesa_get_debug_state_int(struct gl_context *ctx, GLenum pname);
+
+void *
+_mesa_get_debug_state_ptr(struct gl_context *ctx, GLenum pname);
+
+void
+_mesa_log_msg(struct gl_context *ctx, enum mesa_debug_source source,
+              enum mesa_debug_type type, GLuint id,
+              enum mesa_debug_severity severity, GLint len, const char *buf);
+
+bool
+_mesa_debug_is_message_enabled(const struct gl_debug_state *debug,
+                               enum mesa_debug_source source,
+                               enum mesa_debug_type type,
+                               GLuint id,
+                               enum mesa_debug_severity severity);
+
+void GLAPIENTRY
+_mesa_DebugMessageInsert(GLenum source, GLenum type, GLuint id,
+                         GLenum severity, GLint length,
+                         const GLchar* buf);
+
+GLuint GLAPIENTRY
+_mesa_GetDebugMessageLog(GLuint count, GLsizei logSize, GLenum* sources,
+                         GLenum* types, GLenum* ids, GLenum* severities,
+                         GLsizei* lengths, GLchar* messageLog);
+
+void GLAPIENTRY
+_mesa_DebugMessageControl(GLenum source, GLenum type, GLenum severity,
+                          GLsizei count, const GLuint *ids,
+                          GLboolean enabled);
+
+void GLAPIENTRY
+_mesa_DebugMessageCallback(GLDEBUGPROC callback,
+                           const void *userParam);
+
+void GLAPIENTRY
+_mesa_PushDebugGroup(GLenum source, GLuint id, GLsizei length,
+                     const GLchar *message);
+
+void GLAPIENTRY
+_mesa_PopDebugGroup(void);
+
+void GLAPIENTRY
+_mesa_StringMarkerGREMEDY(GLsizei len, const GLvoid *string);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* DEBUG_OUTPUT_H */
diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index f7941817845..3fd3c2747ea 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -31,6 +31,7 @@
 #include "glheader.h"
 #include "clip.h"
 #include "context.h"
+#include "debug_output.h"
 #include "enable.h"
 #include "errors.h"
 #include "light.h"
diff --git a/src/mesa/main/errors.c b/src/mesa/main/errors.c
index 674364c7b0c..9932b4a5a89 100644
--- a/src/mesa/main/errors.c
+++ b/src/mesa/main/errors.c
@@ -34,6 +34,7 @@
 #include "enums.h"
 #include "imports.h"
 #include "context.h"
+#include "debug_output.h"
 #include "dispatch.h"
 #include "hash.h"
 #include "mtypes.h"
@@ -41,1265 +42,6 @@
 #include "util/hash_table.h"
 #include "util/simple_list.h"
 
-static mtx_t DynamicIDMutex = _MTX_INITIALIZER_NP;
-static GLuint NextDynamicID = 1;
-
-/**
- * A namespace element.
- */
-struct gl_debug_element
-{
-   struct simple_node link;
-
-   GLuint ID;
-   /* at which severity levels (mesa_debug_severity) is the message enabled */
-   GLbitfield State;
-};
-
-struct gl_debug_namespace
-{
-   struct simple_node Elements;
-   GLbitfield DefaultState;
-};
-
-struct gl_debug_group {
-   struct gl_debug_namespace Namespaces[MESA_DEBUG_SOURCE_COUNT][MESA_DEBUG_TYPE_COUNT];
-};
-
-/**
- * An error, warning, or other piece of debug information for an application
- * to consume via GL_ARB_debug_output/GL_KHR_debug.
- */
-struct gl_debug_message
-{
-   enum mesa_debug_source source;
-   enum mesa_debug_type type;
-   GLuint id;
-   enum mesa_debug_severity severity;
-   /* length as given by the user - if message was explicitly null terminated,
-    * length can be negative */
-   GLsizei length;
-   GLcharARB *message;
-};
-
-/**
- * Debug message log.  It works like a ring buffer.
- */
-struct gl_debug_log {
-   struct gl_debug_message Messages[MAX_DEBUG_LOGGED_MESSAGES];
-   GLint NextMessage;
-   GLint NumMessages;
-};
-
-struct gl_debug_state
-{
-   GLDEBUGPROC Callback;
-   const void *CallbackData;
-   GLboolean SyncOutput;
-   GLboolean DebugOutput;
-
-   struct gl_debug_group *Groups[MAX_DEBUG_GROUP_STACK_DEPTH];
-   struct gl_debug_message GroupMessages[MAX_DEBUG_GROUP_STACK_DEPTH];
-   GLint CurrentGroup; // GroupStackDepth - 1
-
-   struct gl_debug_log Log;
-};
-
-static char out_of_memory[] = "Debugging error: out of memory";
-
-static const GLenum debug_source_enums[] = {
-   GL_DEBUG_SOURCE_API,
-   GL_DEBUG_SOURCE_WINDOW_SYSTEM,
-   GL_DEBUG_SOURCE_SHADER_COMPILER,
-   GL_DEBUG_SOURCE_THIRD_PARTY,
-   GL_DEBUG_SOURCE_APPLICATION,
-   GL_DEBUG_SOURCE_OTHER,
-};
-
-static const GLenum debug_type_enums[] = {
-   GL_DEBUG_TYPE_ERROR,
-   GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR,
-   GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR,
-   GL_DEBUG_TYPE_PORTABILITY,
-   GL_DEBUG_TYPE_PERFORMANCE,
-   GL_DEBUG_TYPE_OTHER,
-   GL_DEBUG_TYPE_MARKER,
-   GL_DEBUG_TYPE_PUSH_GROUP,
-   GL_DEBUG_TYPE_POP_GROUP,
-};
-
-static const GLenum debug_severity_enums[] = {
-   GL_DEBUG_SEVERITY_LOW,
-   GL_DEBUG_SEVERITY_MEDIUM,
-   GL_DEBUG_SEVERITY_HIGH,
-   GL_DEBUG_SEVERITY_NOTIFICATION,
-};
-
-
-static enum mesa_debug_source
-gl_enum_to_debug_source(GLenum e)
-{
-   unsigned i;
-
-   for (i = 0; i < ARRAY_SIZE(debug_source_enums); i++) {
-      if (debug_source_enums[i] == e)
-         break;
-   }
-   return i;
-}
-
-static enum mesa_debug_type
-gl_enum_to_debug_type(GLenum e)
-{
-   unsigned i;
-
-   for (i = 0; i < ARRAY_SIZE(debug_type_enums); i++) {
-      if (debug_type_enums[i] == e)
-         break;
-   }
-   return i;
-}
-
-static enum mesa_debug_severity
-gl_enum_to_debug_severity(GLenum e)
-{
-   unsigned i;
-
-   for (i = 0; i < ARRAY_SIZE(debug_severity_enums); i++) {
-      if (debug_severity_enums[i] == e)
-         break;
-   }
-   return i;
-}
-
-
-/**
- * Handles generating a GL_ARB_debug_output message ID generated by the GL or
- * GLSL compiler.
- *
- * The GL API has this "ID" mechanism, where the intention is to allow a
- * client to filter in/out messages based on source, type, and ID.  Of course,
- * building a giant enum list of all debug output messages that Mesa might
- * generate is ridiculous, so instead we have our caller pass us a pointer to
- * static storage where the ID should get stored.  This ID will be shared
- * across all contexts for that message (which seems like a desirable
- * property, even if it's not expected by the spec), but note that it won't be
- * the same between executions if messages aren't generated in the same order.
- */
-static void
-debug_get_id(GLuint *id)
-{
-   if (!(*id)) {
-      mtx_lock(&DynamicIDMutex);
-      if (!(*id))
-         *id = NextDynamicID++;
-      mtx_unlock(&DynamicIDMutex);
-   }
-}
-
-static void
-debug_message_clear(struct gl_debug_message *msg)
-{
-   if (msg->message != (char*)out_of_memory)
-      free(msg->message);
-   msg->message = NULL;
-   msg->length = 0;
-}
-
-static void
-debug_message_store(struct gl_debug_message *msg,
-                    enum mesa_debug_source source,
-                    enum mesa_debug_type type, GLuint id,
-                    enum mesa_debug_severity severity,
-                    GLsizei len, const char *buf)
-{
-   GLsizei length = len;
-
-   assert(!msg->message && !msg->length);
-
-   if (length < 0)
-      length = strlen(buf);
-
-   msg->message = malloc(length+1);
-   if (msg->message) {
-      (void) strncpy(msg->message, buf, (size_t)length);
-      msg->message[length] = '\0';
-
-      msg->length = len;
-      msg->source = source;
-      msg->type = type;
-      msg->id = id;
-      msg->severity = severity;
-   } else {
-      static GLuint oom_msg_id = 0;
-      debug_get_id(&oom_msg_id);
-
-      /* malloc failed! */
-      msg->message = out_of_memory;
-      msg->length = -1;
-      msg->source = MESA_DEBUG_SOURCE_OTHER;
-      msg->type = MESA_DEBUG_TYPE_ERROR;
-      msg->id = oom_msg_id;
-      msg->severity = MESA_DEBUG_SEVERITY_HIGH;
-   }
-}
-
-static void
-debug_namespace_init(struct gl_debug_namespace *ns)
-{
-   make_empty_list(&ns->Elements);
-
-   /* Enable all the messages with severity HIGH or MEDIUM by default */
-   ns->DefaultState = (1 << MESA_DEBUG_SEVERITY_MEDIUM ) |
-                      (1 << MESA_DEBUG_SEVERITY_HIGH) |
-                      (1 << MESA_DEBUG_SEVERITY_NOTIFICATION);
-}
-
-static void
-debug_namespace_clear(struct gl_debug_namespace *ns)
-{
-   struct simple_node *node, *tmp;
-
-   foreach_s(node, tmp, &ns->Elements)
-      free(node);
-}
-
-static bool
-debug_namespace_copy(struct gl_debug_namespace *dst,
-                     const struct gl_debug_namespace *src)
-{
-   struct simple_node *node;
-
-   dst->DefaultState = src->DefaultState;
-
-   make_empty_list(&dst->Elements);
-   foreach(node, &src->Elements) {
-      const struct gl_debug_element *elem =
-         (const struct gl_debug_element *) node;
-      struct gl_debug_element *copy;
-
-      copy = malloc(sizeof(*copy));
-      if (!copy) {
-         debug_namespace_clear(dst);
-         return false;
-      }
-
-      copy->ID = elem->ID;
-      copy->State = elem->State;
-      insert_at_tail(&dst->Elements, &copy->link);
-   }
-
-   return true;
-}
-
-/**
- * Set the state of \p id in the namespace.
- */
-static bool
-debug_namespace_set(struct gl_debug_namespace *ns,
-                    GLuint id, bool enabled)
-{
-   const uint32_t state = (enabled) ?
-      ((1 << MESA_DEBUG_SEVERITY_COUNT) - 1) : 0;
-   struct gl_debug_element *elem = NULL;
-   struct simple_node *node;
-
-   /* find the element */
-   foreach(node, &ns->Elements) {
-      struct gl_debug_element *tmp = (struct gl_debug_element *) node;
-      if (tmp->ID == id) {
-         elem = tmp;
-         break;
-      }
-   }
-
-   /* we do not need the element if it has the default state */
-   if (ns->DefaultState == state) {
-      if (elem) {
-         remove_from_list(&elem->link);
-         free(elem);
-      }
-      return true;
-   }
-
-   if (!elem) {
-      elem = malloc(sizeof(*elem));
-      if (!elem)
-         return false;
-
-      elem->ID = id;
-      insert_at_tail(&ns->Elements, &elem->link);
-   }
-
-   elem->State = state;
-
-   return true;
-}
-
-/**
- * Set the default state of the namespace for \p severity.  When \p severity
- * is MESA_DEBUG_SEVERITY_COUNT, the default values for all severities are
- * updated.
- */
-static void
-debug_namespace_set_all(struct gl_debug_namespace *ns,
-                        enum mesa_debug_severity severity,
-                        bool enabled)
-{
-   struct simple_node *node, *tmp;
-   uint32_t mask, val;
-
-   /* set all elements to the same state */
-   if (severity == MESA_DEBUG_SEVERITY_COUNT) {
-      ns->DefaultState = (enabled) ? ((1 << severity) - 1) : 0;
-      debug_namespace_clear(ns);
-      make_empty_list(&ns->Elements);
-      return;
-   }
-
-   mask = 1 << severity;
-   val = (enabled) ? mask : 0;
-
-   ns->DefaultState = (ns->DefaultState & ~mask) | val;
-
-   foreach_s(node, tmp, &ns->Elements) {
-      struct gl_debug_element *elem = (struct gl_debug_element *) node;
-
-      elem->State = (elem->State & ~mask) | val;
-      if (elem->State == ns->DefaultState) {
-         remove_from_list(node);
-         free(node);
-      }
-   }
-}
-
-/**
- * Get the state of \p id in the namespace.
- */
-static bool
-debug_namespace_get(const struct gl_debug_namespace *ns, GLuint id,
-                    enum mesa_debug_severity severity)
-{
-   struct simple_node *node;
-   uint32_t state;
-
-   state = ns->DefaultState;
-   foreach(node, &ns->Elements) {
-      struct gl_debug_element *elem = (struct gl_debug_element *) node;
-
-      if (elem->ID == id) {
-         state = elem->State;
-         break;
-      }
-   }
-
-   return (state & (1 << severity));
-}
-
-/**
- * Allocate and initialize context debug state.
- */
-static struct gl_debug_state *
-debug_create(void)
-{
-   struct gl_debug_state *debug;
-   int s, t;
-
-   debug = CALLOC_STRUCT(gl_debug_state);
-   if (!debug)
-      return NULL;
-
-   debug->Groups[0] = malloc(sizeof(*debug->Groups[0]));
-   if (!debug->Groups[0]) {
-      free(debug);
-      return NULL;
-   }
-
-   /* Initialize state for filtering known debug messages. */
-   for (s = 0; s < MESA_DEBUG_SOURCE_COUNT; s++) {
-      for (t = 0; t < MESA_DEBUG_TYPE_COUNT; t++)
-         debug_namespace_init(&debug->Groups[0]->Namespaces[s][t]);
-   }
-
-   return debug;
-}
-
-/**
- * Return true if the top debug group points to the group below it.
- */
-static bool
-debug_is_group_read_only(const struct gl_debug_state *debug)
-{
-   const GLint gstack = debug->CurrentGroup;
-   return (gstack > 0 && debug->Groups[gstack] == debug->Groups[gstack - 1]);
-}
-
-/**
- * Make the top debug group writable.
- */
-static bool
-debug_make_group_writable(struct gl_debug_state *debug)
-{
-   const GLint gstack = debug->CurrentGroup;
-   const struct gl_debug_group *src = debug->Groups[gstack];
-   struct gl_debug_group *dst;
-   int s, t;
-
-   if (!debug_is_group_read_only(debug))
-      return true;
-
-   dst = malloc(sizeof(*dst));
-   if (!dst)
-      return false;
-
-   for (s = 0; s < MESA_DEBUG_SOURCE_COUNT; s++) {
-      for (t = 0; t < MESA_DEBUG_TYPE_COUNT; t++) {
-         if (!debug_namespace_copy(&dst->Namespaces[s][t],
-                                   &src->Namespaces[s][t])) {
-            /* error path! */
-            for (t = t - 1; t >= 0; t--)
-               debug_namespace_clear(&dst->Namespaces[s][t]);
-            for (s = s - 1; s >= 0; s--) {
-               for (t = 0; t < MESA_DEBUG_TYPE_COUNT; t++)
-                  debug_namespace_clear(&dst->Namespaces[s][t]);
-            }
-            free(dst);
-            return false;
-         }
-      }
-   }
-
-   debug->Groups[gstack] = dst;
-
-   return true;
-}
-
-/**
- * Free the top debug group.
- */
-static void
-debug_clear_group(struct gl_debug_state *debug)
-{
-   const GLint gstack = debug->CurrentGroup;
-
-   if (!debug_is_group_read_only(debug)) {
-      struct gl_debug_group *grp = debug->Groups[gstack];
-      int s, t;
-
-      for (s = 0; s < MESA_DEBUG_SOURCE_COUNT; s++) {
-         for (t = 0; t < MESA_DEBUG_TYPE_COUNT; t++)
-            debug_namespace_clear(&grp->Namespaces[s][t]);
-      }
-
-      free(grp);
-   }
-
-   debug->Groups[gstack] = NULL;
-}
-
-/**
- * Loop through debug group stack tearing down states for
- * filtering debug messages.  Then free debug output state.
- */
-static void
-debug_destroy(struct gl_debug_state *debug)
-{
-   while (debug->CurrentGroup > 0) {
-      debug_clear_group(debug);
-      debug->CurrentGroup--;
-   }
-
-   debug_clear_group(debug);
-   free(debug);
-}
-
-/**
- * Sets the state of the given message source/type/ID tuple.
- */
-static void
-debug_set_message_enable(struct gl_debug_state *debug,
-                         enum mesa_debug_source source,
-                         enum mesa_debug_type type,
-                         GLuint id, GLboolean enabled)
-{
-   const GLint gstack = debug->CurrentGroup;
-   struct gl_debug_namespace *ns;
-
-   debug_make_group_writable(debug);
-   ns = &debug->Groups[gstack]->Namespaces[source][type];
-
-   debug_namespace_set(ns, id, enabled);
-}
-
-/*
- * Set the state of all message IDs found in the given intersection of
- * 'source', 'type', and 'severity'.  The _COUNT enum can be used for
- * GL_DONT_CARE (include all messages in the class).
- *
- * This requires both setting the state of all previously seen message
- * IDs in the hash table, and setting the default state for all
- * applicable combinations of source/type/severity, so that all the
- * yet-unknown message IDs that may be used in the future will be
- * impacted as if they were already known.
- */
-static void
-debug_set_message_enable_all(struct gl_debug_state *debug,
-                             enum mesa_debug_source source,
-                             enum mesa_debug_type type,
-                             enum mesa_debug_severity severity,
-                             GLboolean enabled)
-{
-   const GLint gstack = debug->CurrentGroup;
-   int s, t, smax, tmax;
-
-   if (source == MESA_DEBUG_SOURCE_COUNT) {
-      source = 0;
-      smax = MESA_DEBUG_SOURCE_COUNT;
-   } else {
-      smax = source+1;
-   }
-
-   if (type == MESA_DEBUG_TYPE_COUNT) {
-      type = 0;
-      tmax = MESA_DEBUG_TYPE_COUNT;
-   } else {
-      tmax = type+1;
-   }
-
-   debug_make_group_writable(debug);
-
-   for (s = source; s < smax; s++) {
-      for (t = type; t < tmax; t++) {
-         struct gl_debug_namespace *nspace =
-            &debug->Groups[gstack]->Namespaces[s][t];
-         debug_namespace_set_all(nspace, severity, enabled);
-      }
-   }
-}
-
-/**
- * Returns if the given message source/type/ID tuple is enabled.
- */
-static bool
-debug_is_message_enabled(const struct gl_debug_state *debug,
-                         enum mesa_debug_source source,
-                         enum mesa_debug_type type,
-                         GLuint id,
-                         enum mesa_debug_severity severity)
-{
-   const GLint gstack = debug->CurrentGroup;
-   struct gl_debug_group *grp = debug->Groups[gstack];
-   struct gl_debug_namespace *nspace = &grp->Namespaces[source][type];
-
-   if (!debug->DebugOutput)
-      return false;
-
-   return debug_namespace_get(nspace, id, severity);
-}
-
-/**
- * 'buf' is not necessarily a null-terminated string. When logging, copy
- * 'len' characters from it, store them in a new, null-terminated string,
- * and remember the number of bytes used by that string, *including*
- * the null terminator this time.
- */
-static void
-debug_log_message(struct gl_debug_state *debug,
-                  enum mesa_debug_source source,
-                  enum mesa_debug_type type, GLuint id,
-                  enum mesa_debug_severity severity,
-                  GLsizei len, const char *buf)
-{
-   struct gl_debug_log *log = &debug->Log;
-   GLint nextEmpty;
-   struct gl_debug_message *emptySlot;
-
-   assert(len < MAX_DEBUG_MESSAGE_LENGTH);
-
-   if (log->NumMessages == MAX_DEBUG_LOGGED_MESSAGES)
-      return;
-
-   nextEmpty = (log->NextMessage + log->NumMessages)
-      % MAX_DEBUG_LOGGED_MESSAGES;
-   emptySlot = &log->Messages[nextEmpty];
-
-   debug_message_store(emptySlot, source, type,
-                       id, severity, len, buf);
-
-   log->NumMessages++;
-}
-
-/**
- * Return the oldest debug message out of the log.
- */
-static const struct gl_debug_message *
-debug_fetch_message(const struct gl_debug_state *debug)
-{
-   const struct gl_debug_log *log = &debug->Log;
-
-   return (log->NumMessages) ? &log->Messages[log->NextMessage] : NULL;
-}
-
-/**
- * Delete the oldest debug messages out of the log.
- */
-static void
-debug_delete_messages(struct gl_debug_state *debug, int count)
-{
-   struct gl_debug_log *log = &debug->Log;
-
-   if (count > log->NumMessages)
-      count = log->NumMessages;
-
-   while (count--) {
-      struct gl_debug_message *msg = &log->Messages[log->NextMessage];
-
-      debug_message_clear(msg);
-
-      log->NumMessages--;
-      log->NextMessage++;
-      log->NextMessage %= MAX_DEBUG_LOGGED_MESSAGES;
-   }
-}
-
-static struct gl_debug_message *
-debug_get_group_message(struct gl_debug_state *debug)
-{
-   return &debug->GroupMessages[debug->CurrentGroup];
-}
-
-static void
-debug_push_group(struct gl_debug_state *debug)
-{
-   const GLint gstack = debug->CurrentGroup;
-
-   /* just point to the previous stack */
-   debug->Groups[gstack + 1] = debug->Groups[gstack];
-   debug->CurrentGroup++;
-}
-
-static void
-debug_pop_group(struct gl_debug_state *debug)
-{
-   debug_clear_group(debug);
-   debug->CurrentGroup--;
-}
-
-
-/**
- * Lock and return debug state for the context.  The debug state will be
- * allocated and initialized upon the first call.  When NULL is returned, the
- * debug state is not locked.
- */
-static struct gl_debug_state *
-_mesa_lock_debug_state(struct gl_context *ctx)
-{
-   mtx_lock(&ctx->DebugMutex);
-
-   if (!ctx->Debug) {
-      ctx->Debug = debug_create();
-      if (!ctx->Debug) {
-         GET_CURRENT_CONTEXT(cur);
-         mtx_unlock(&ctx->DebugMutex);
-
-         /*
-          * This function may be called from other threads.  When that is the
-          * case, we cannot record this OOM error.
-          */
-         if (ctx == cur)
-            _mesa_error(ctx, GL_OUT_OF_MEMORY, "allocating debug state");
-
-         return NULL;
-      }
-   }
-
-   return ctx->Debug;
-}
-
-static void
-_mesa_unlock_debug_state(struct gl_context *ctx)
-{
-   mtx_unlock(&ctx->DebugMutex);
-}
-
-/**
- * Set the integer debug state specified by \p pname.  This can be called from
- * _mesa_set_enable for example.
- */
-bool
-_mesa_set_debug_state_int(struct gl_context *ctx, GLenum pname, GLint val)
-{
-   struct gl_debug_state *debug = _mesa_lock_debug_state(ctx);
-
-   if (!debug)
-      return false;
-
-   switch (pname) {
-   case GL_DEBUG_OUTPUT:
-      debug->DebugOutput = (val != 0);
-      break;
-   case GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB:
-      debug->SyncOutput = (val != 0);
-      break;
-   default:
-      assert(!"unknown debug output param");
-      break;
-   }
-
-   _mesa_unlock_debug_state(ctx);
-
-   return true;
-}
-
-/**
- * Query the integer debug state specified by \p pname.  This can be called
- * _mesa_GetIntegerv for example.
- */
-GLint
-_mesa_get_debug_state_int(struct gl_context *ctx, GLenum pname)
-{
-   struct gl_debug_state *debug;
-   GLint val;
-
-   mtx_lock(&ctx->DebugMutex);
-   debug = ctx->Debug;
-   if (!debug) {
-      mtx_unlock(&ctx->DebugMutex);
-      return 0;
-   }
-
-   switch (pname) {
-   case GL_DEBUG_OUTPUT:
-      val = debug->DebugOutput;
-      break;
-   case GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB:
-      val = debug->SyncOutput;
-      break;
-   case GL_DEBUG_LOGGED_MESSAGES:
-      val = debug->Log.NumMessages;
-      break;
-   case GL_DEBUG_NEXT_LOGGED_MESSAGE_LENGTH:
-      val = (debug->Log.NumMessages) ?
-         debug->Log.Messages[debug->Log.NextMessage].length : 0;
-      break;
-   case GL_DEBUG_GROUP_STACK_DEPTH:
-      val = debug->CurrentGroup + 1;
-      break;
-   default:
-      assert(!"unknown debug output param");
-      val = 0;
-      break;
-   }
-
-   mtx_unlock(&ctx->DebugMutex);
-
-   return val;
-}
-
-/**
- * Query the pointer debug state specified by \p pname.  This can be called
- * _mesa_GetPointerv for example.
- */
-void *
-_mesa_get_debug_state_ptr(struct gl_context *ctx, GLenum pname)
-{
-   struct gl_debug_state *debug;
-   void *val;
-
-   mtx_lock(&ctx->DebugMutex);
-   debug = ctx->Debug;
-   if (!debug) {
-      mtx_unlock(&ctx->DebugMutex);
-      return NULL;
-   }
-
-   switch (pname) {
-   case GL_DEBUG_CALLBACK_FUNCTION_ARB:
-      val = (void *) debug->Callback;
-      break;
-   case GL_DEBUG_CALLBACK_USER_PARAM_ARB:
-      val = (void *) debug->CallbackData;
-      break;
-   default:
-      assert(!"unknown debug output param");
-      val = NULL;
-      break;
-   }
-
-   mtx_unlock(&ctx->DebugMutex);
-
-   return val;
-}
-
-/**
- * Insert a debug message.  The mutex is assumed to be locked, and will be
- * unlocked by this call.
- */
-static void
-log_msg_locked_and_unlock(struct gl_context *ctx,
-                          enum mesa_debug_source source,
-                          enum mesa_debug_type type, GLuint id,
-                          enum mesa_debug_severity severity,
-                          GLint len, const char *buf)
-{
-   struct gl_debug_state *debug = ctx->Debug;
-
-   if (!debug_is_message_enabled(debug, source, type, id, severity)) {
-      _mesa_unlock_debug_state(ctx);
-      return;
-   }
-
-   if (ctx->Debug->Callback) {
-      GLenum gl_source = debug_source_enums[source];
-      GLenum gl_type = debug_type_enums[type];
-      GLenum gl_severity = debug_severity_enums[severity];
-      GLDEBUGPROC callback = ctx->Debug->Callback;
-      const void *data = ctx->Debug->CallbackData;
-
-      /*
-       * When ctx->Debug->SyncOutput is GL_FALSE, the client is prepared for
-       * unsynchronous calls.  When it is GL_TRUE, we will not spawn threads.
-       * In either case, we can call the callback unlocked.
-       */
-      _mesa_unlock_debug_state(ctx);
-      callback(gl_source, gl_type, id, gl_severity, len, buf, data);
-   }
-   else {
-      debug_log_message(ctx->Debug, source, type, id, severity, len, buf);
-      _mesa_unlock_debug_state(ctx);
-   }
-}
-
-/**
- * Log a client or driver debug message.
- */
-static void
-log_msg(struct gl_context *ctx, enum mesa_debug_source source,
-        enum mesa_debug_type type, GLuint id,
-        enum mesa_debug_severity severity, GLint len, const char *buf)
-{
-   struct gl_debug_state *debug = _mesa_lock_debug_state(ctx);
-
-   if (!debug)
-      return;
-
-   log_msg_locked_and_unlock(ctx, source, type, id, severity, len, buf);
-}
-
-
-/**
- * Verify that source, type, and severity are valid enums.
- *
- * The 'caller' param is used for handling values available
- * only in glDebugMessageInsert or glDebugMessageControl
- */
-static GLboolean
-validate_params(struct gl_context *ctx, unsigned caller,
-                const char *callerstr, GLenum source, GLenum type,
-                GLenum severity)
-{
-#define INSERT 1
-#define CONTROL 2
-   switch(source) {
-   case GL_DEBUG_SOURCE_APPLICATION_ARB:
-   case GL_DEBUG_SOURCE_THIRD_PARTY_ARB:
-      break;
-   case GL_DEBUG_SOURCE_API_ARB:
-   case GL_DEBUG_SOURCE_SHADER_COMPILER_ARB:
-   case GL_DEBUG_SOURCE_WINDOW_SYSTEM_ARB:
-   case GL_DEBUG_SOURCE_OTHER_ARB:
-      if (caller != INSERT)
-         break;
-      else
-         goto error;
-   case GL_DONT_CARE:
-      if (caller == CONTROL)
-         break;
-      else
-         goto error;
-   default:
-      goto error;
-   }
-
-   switch(type) {
-   case GL_DEBUG_TYPE_ERROR_ARB:
-   case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB:
-   case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_ARB:
-   case GL_DEBUG_TYPE_PERFORMANCE_ARB:
-   case GL_DEBUG_TYPE_PORTABILITY_ARB:
-   case GL_DEBUG_TYPE_OTHER_ARB:
-   case GL_DEBUG_TYPE_MARKER:
-   case GL_DEBUG_TYPE_PUSH_GROUP:
-   case GL_DEBUG_TYPE_POP_GROUP:
-      break;
-   case GL_DONT_CARE:
-      if (caller == CONTROL)
-         break;
-      else
-         goto error;
-   default:
-      goto error;
-   }
-
-   switch(severity) {
-   case GL_DEBUG_SEVERITY_HIGH_ARB:
-   case GL_DEBUG_SEVERITY_MEDIUM_ARB:
-   case GL_DEBUG_SEVERITY_LOW_ARB:
-   case GL_DEBUG_SEVERITY_NOTIFICATION:
-      break;
-   case GL_DONT_CARE:
-      if (caller == CONTROL)
-         break;
-      else
-         goto error;
-   default:
-      goto error;
-   }
-   return GL_TRUE;
-
-error:
-   _mesa_error(ctx, GL_INVALID_ENUM, "bad values passed to %s"
-               "(source=0x%x, type=0x%x, severity=0x%x)", callerstr,
-               source, type, severity);
-
-   return GL_FALSE;
-}
-
-
-static GLboolean
-validate_length(struct gl_context *ctx, const char *callerstr, GLsizei length,
-                const GLchar *buf)
-{
-
-   if (length < 0) {
-      GLsizei len = strlen(buf);
-
-      if (len >= MAX_DEBUG_MESSAGE_LENGTH) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                    "%s(null terminated string length=%d, is not less than "
-                    "GL_MAX_DEBUG_MESSAGE_LENGTH=%d)", callerstr, len,
-                    MAX_DEBUG_MESSAGE_LENGTH);
-         return GL_FALSE;
-      }
-   }
-
-   if (length >= MAX_DEBUG_MESSAGE_LENGTH) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                 "%s(length=%d, which is not less than "
-                 "GL_MAX_DEBUG_MESSAGE_LENGTH=%d)", callerstr, length,
-                 MAX_DEBUG_MESSAGE_LENGTH);
-      return GL_FALSE;
-   }
-
-   return GL_TRUE;
-}
-
-
-void GLAPIENTRY
-_mesa_DebugMessageInsert(GLenum source, GLenum type, GLuint id,
-                         GLenum severity, GLint length,
-                         const GLchar *buf)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   const char *callerstr;
-
-   if (_mesa_is_desktop_gl(ctx))
-      callerstr = "glDebugMessageInsert";
-   else
-      callerstr = "glDebugMessageInsertKHR";
-
-   if (!validate_params(ctx, INSERT, callerstr, source, type, severity))
-      return; /* GL_INVALID_ENUM */
-
-   if (!validate_length(ctx, callerstr, length, buf))
-      return; /* GL_INVALID_VALUE */
-
-   log_msg(ctx, gl_enum_to_debug_source(source),
-           gl_enum_to_debug_type(type), id,
-           gl_enum_to_debug_severity(severity),
-           length, buf);
-
-   if (type == GL_DEBUG_TYPE_MARKER && ctx->Driver.EmitStringMarker) {
-      /* if length not specified, string will be null terminated: */
-      if (length < 0)
-         length = strlen(buf);
-      ctx->Driver.EmitStringMarker(ctx, buf, length);
-   }
-}
-
-
-GLuint GLAPIENTRY
-_mesa_GetDebugMessageLog(GLuint count, GLsizei logSize, GLenum *sources,
-                         GLenum *types, GLenum *ids, GLenum *severities,
-                         GLsizei *lengths, GLchar *messageLog)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   struct gl_debug_state *debug;
-   const char *callerstr;
-   GLuint ret;
-
-   if (_mesa_is_desktop_gl(ctx))
-      callerstr = "glGetDebugMessageLog";
-   else
-      callerstr = "glGetDebugMessageLogKHR";
-
-   if (!messageLog)
-      logSize = 0;
-
-   if (logSize < 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "%s(logSize=%d : logSize must not be negative)",
-                  callerstr, logSize);
-      return 0;
-   }
-
-   debug = _mesa_lock_debug_state(ctx);
-   if (!debug)
-      return 0;
-
-   for (ret = 0; ret < count; ret++) {
-      const struct gl_debug_message *msg = debug_fetch_message(debug);
-      GLsizei len;
-
-      if (!msg)
-         break;
-
-      len = msg->length;
-      if (len < 0)
-         len = strlen(msg->message);
-
-      if (logSize < len+1 && messageLog != NULL)
-         break;
-
-      if (messageLog) {
-         assert(msg->message[len] == '\0');
-         (void) strncpy(messageLog, msg->message, (size_t)len+1);
-
-         messageLog += len+1;
-         logSize -= len+1;
-      }
-
-      if (lengths)
-         *lengths++ = len+1;
-      if (severities)
-         *severities++ = debug_severity_enums[msg->severity];
-      if (sources)
-         *sources++ = debug_source_enums[msg->source];
-      if (types)
-         *types++ = debug_type_enums[msg->type];
-      if (ids)
-         *ids++ = msg->id;
-
-      debug_delete_messages(debug, 1);
-   }
-
-   _mesa_unlock_debug_state(ctx);
-
-   return ret;
-}
-
-
-void GLAPIENTRY
-_mesa_DebugMessageControl(GLenum gl_source, GLenum gl_type,
-                          GLenum gl_severity, GLsizei count,
-                          const GLuint *ids, GLboolean enabled)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   enum mesa_debug_source source = gl_enum_to_debug_source(gl_source);
-   enum mesa_debug_type type = gl_enum_to_debug_type(gl_type);
-   enum mesa_debug_severity severity = gl_enum_to_debug_severity(gl_severity);
-   const char *callerstr;
-   struct gl_debug_state *debug;
-
-   if (_mesa_is_desktop_gl(ctx))
-      callerstr = "glDebugMessageControl";
-   else
-      callerstr = "glDebugMessageControlKHR";
-
-   if (count < 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "%s(count=%d : count must not be negative)", callerstr,
-                  count);
-      return;
-   }
-
-   if (!validate_params(ctx, CONTROL, callerstr, gl_source, gl_type,
-                        gl_severity))
-      return; /* GL_INVALID_ENUM */
-
-   if (count && (gl_severity != GL_DONT_CARE || gl_type == GL_DONT_CARE
-                 || gl_source == GL_DONT_CARE)) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "%s(When passing an array of ids, severity must be"
-         " GL_DONT_CARE, and source and type must not be GL_DONT_CARE.",
-                  callerstr);
-      return;
-   }
-
-   debug = _mesa_lock_debug_state(ctx);
-   if (!debug)
-      return;
-
-   if (count) {
-      GLsizei i;
-      for (i = 0; i < count; i++)
-         debug_set_message_enable(debug, source, type, ids[i], enabled);
-   }
-   else {
-      debug_set_message_enable_all(debug, source, type, severity, enabled);
-   }
-
-   _mesa_unlock_debug_state(ctx);
-}
-
-
-void GLAPIENTRY
-_mesa_DebugMessageCallback(GLDEBUGPROC callback, const void *userParam)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   struct gl_debug_state *debug = _mesa_lock_debug_state(ctx);
-   if (debug) {
-      debug->Callback = callback;
-      debug->CallbackData = userParam;
-      _mesa_unlock_debug_state(ctx);
-   }
-}
-
-
-void GLAPIENTRY
-_mesa_PushDebugGroup(GLenum source, GLuint id, GLsizei length,
-                     const GLchar *message)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   const char *callerstr;
-   struct gl_debug_state *debug;
-   struct gl_debug_message *emptySlot;
-
-   if (_mesa_is_desktop_gl(ctx))
-      callerstr = "glPushDebugGroup";
-   else
-      callerstr = "glPushDebugGroupKHR";
-
-   switch(source) {
-   case GL_DEBUG_SOURCE_APPLICATION:
-   case GL_DEBUG_SOURCE_THIRD_PARTY:
-      break;
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM, "bad value passed to %s"
-                  "(source=0x%x)", callerstr, source);
-      return;
-   }
-
-   if (!validate_length(ctx, callerstr, length, message))
-      return; /* GL_INVALID_VALUE */
-
-   debug = _mesa_lock_debug_state(ctx);
-   if (!debug)
-      return;
-
-   if (debug->CurrentGroup >= MAX_DEBUG_GROUP_STACK_DEPTH-1) {
-      _mesa_unlock_debug_state(ctx);
-      _mesa_error(ctx, GL_STACK_OVERFLOW, "%s", callerstr);
-      return;
-   }
-
-   /* pop reuses the message details from push so we store this */
-   emptySlot = debug_get_group_message(debug);
-   debug_message_store(emptySlot,
-                       gl_enum_to_debug_source(source),
-                       gl_enum_to_debug_type(GL_DEBUG_TYPE_PUSH_GROUP),
-                       id,
-                       gl_enum_to_debug_severity(GL_DEBUG_SEVERITY_NOTIFICATION),
-                       length, message);
-
-   debug_push_group(debug);
-
-   log_msg_locked_and_unlock(ctx,
-         gl_enum_to_debug_source(source),
-         MESA_DEBUG_TYPE_PUSH_GROUP, id,
-         MESA_DEBUG_SEVERITY_NOTIFICATION, length,
-         message);
-}
-
-
-void GLAPIENTRY
-_mesa_PopDebugGroup(void)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   const char *callerstr;
-   struct gl_debug_state *debug;
-   struct gl_debug_message *gdmessage, msg;
-
-   if (_mesa_is_desktop_gl(ctx))
-      callerstr = "glPopDebugGroup";
-   else
-      callerstr = "glPopDebugGroupKHR";
-
-   debug = _mesa_lock_debug_state(ctx);
-   if (!debug)
-      return;
-
-   if (debug->CurrentGroup <= 0) {
-      _mesa_unlock_debug_state(ctx);
-      _mesa_error(ctx, GL_STACK_UNDERFLOW, "%s", callerstr);
-      return;
-   }
-
-   debug_pop_group(debug);
-
-   /* make a shallow copy */
-   gdmessage = debug_get_group_message(debug);
-   msg = *gdmessage;
-   gdmessage->message = NULL;
-   gdmessage->length = 0;
-
-   log_msg_locked_and_unlock(ctx,
-         msg.source,
-         gl_enum_to_debug_type(GL_DEBUG_TYPE_POP_GROUP),
-         msg.id,
-         gl_enum_to_debug_severity(GL_DEBUG_SEVERITY_NOTIFICATION),
-         msg.length, msg.message);
-
-   debug_message_clear(&msg);
-}
-
-
-void
-_mesa_init_errors(struct gl_context *ctx)
-{
-   mtx_init(&ctx->DebugMutex, mtx_plain);
-}
-
-
-void
-_mesa_free_errors_data(struct gl_context *ctx)
-{
-   if (ctx->Debug) {
-      debug_destroy(ctx->Debug);
-      /* set to NULL just in case it is used before context is completely gone. */
-      ctx->Debug = NULL;
-   }
-
-   mtx_destroy(&ctx->DebugMutex);
-}
-
-void GLAPIENTRY
-_mesa_StringMarkerGREMEDY(GLsizei len, const GLvoid *string)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   if (ctx->Extensions.GREMEDY_string_marker) {
-      /* if length not specified, string will be null terminated: */
-      if (len <= 0)
-         len = strlen(string);
-      ctx->Driver.EmitStringMarker(ctx, string, len);
-   } else {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "StringMarkerGREMEDY");
-   }
-}
-
-/**********************************************************************/
-/** \name Diagnostics */
-/*@{*/
 
 static FILE *LogFile = NULL;
 
@@ -1492,11 +234,11 @@ _mesa_gl_vdebug(struct gl_context *ctx,
    char s[MAX_DEBUG_MESSAGE_LENGTH];
    int len;
 
-   debug_get_id(id);
+   _mesa_debug_get_id(id);
 
    len = _mesa_vsnprintf(s, MAX_DEBUG_MESSAGE_LENGTH, fmtString, args);
 
-   log_msg(ctx, source, type, *id, severity, len, s);
+   _mesa_log_msg(ctx, source, type, *id, severity, len, s);
 }
 
 
@@ -1536,17 +278,17 @@ _mesa_error( struct gl_context *ctx, GLenum error, const char *fmtString, ... )
     */
    static GLuint error_msg_id = 0;
 
-   debug_get_id(&error_msg_id);
+   _mesa_debug_get_id(&error_msg_id);
 
    do_output = should_output(ctx, error, fmtString);
 
    mtx_lock(&ctx->DebugMutex);
    if (ctx->Debug) {
-      do_log = debug_is_message_enabled(ctx->Debug,
-                                        MESA_DEBUG_SOURCE_API,
-                                        MESA_DEBUG_TYPE_ERROR,
-                                        error_msg_id,
-                                        MESA_DEBUG_SEVERITY_HIGH);
+      do_log = _mesa_debug_is_message_enabled(ctx->Debug,
+                                              MESA_DEBUG_SOURCE_API,
+                                              MESA_DEBUG_TYPE_ERROR,
+                                              error_msg_id,
+                                              MESA_DEBUG_SEVERITY_HIGH);
    }
    else {
       do_log = GL_FALSE;
@@ -1585,8 +327,8 @@ _mesa_error( struct gl_context *ctx, GLenum error, const char *fmtString, ... )
 
       /* Log the error via ARB_debug_output if needed.*/
       if (do_log) {
-         log_msg(ctx, MESA_DEBUG_SOURCE_API, MESA_DEBUG_TYPE_ERROR,
-                 error_msg_id, MESA_DEBUG_SEVERITY_HIGH, len, s2);
+         _mesa_log_msg(ctx, MESA_DEBUG_SOURCE_API, MESA_DEBUG_TYPE_ERROR,
+                       error_msg_id, MESA_DEBUG_SEVERITY_HIGH, len, s2);
       }
    }
 
@@ -1652,7 +394,7 @@ _mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint *id,
    enum mesa_debug_severity severity = MESA_DEBUG_SEVERITY_HIGH;
    int len;
 
-   debug_get_id(id);
+   _mesa_debug_get_id(id);
 
    len = strlen(msg);
 
@@ -1660,7 +402,5 @@ _mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint *id,
    if (len >= MAX_DEBUG_MESSAGE_LENGTH)
       len = MAX_DEBUG_MESSAGE_LENGTH - 1;
 
-   log_msg(ctx, source, type, *id, severity, len, msg);
+   _mesa_log_msg(ctx, source, type, *id, severity, len, msg);
 }
-
-/*@}*/
diff --git a/src/mesa/main/errors.h b/src/mesa/main/errors.h
index 92df2ac868a..5ad5254ff1b 100644
--- a/src/mesa/main/errors.h
+++ b/src/mesa/main/errors.h
@@ -47,14 +47,6 @@
 extern "C" {
 #endif
 
-struct _glapi_table;
-
-extern void
-_mesa_init_errors( struct gl_context *ctx );
-
-extern void
-_mesa_free_errors_data( struct gl_context *ctx );
-
 extern void
 _mesa_warning( struct gl_context *gc, const char *fmtString, ... ) PRINTFLIKE(2, 3);
 
@@ -76,6 +68,10 @@ _mesa_log(const char *fmtString, ...) PRINTFLIKE(1, 2);
 extern FILE *
 _mesa_get_log_file(void);
 
+void
+_mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint *id,
+                   const char *msg);
+
 extern void
 _mesa_gl_vdebug(struct gl_context *ctx,
                 GLuint *id,
@@ -104,42 +100,6 @@ _mesa_gl_debug(struct gl_context *ctx,
    }                                                                      \
 } while (0)
 
-bool
-_mesa_set_debug_state_int(struct gl_context *ctx, GLenum pname, GLint val);
-
-GLint
-_mesa_get_debug_state_int(struct gl_context *ctx, GLenum pname);
-
-void *
-_mesa_get_debug_state_ptr(struct gl_context *ctx, GLenum pname);
-
-extern void
-_mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint *id,
-                   const char *msg);
-
-void GLAPIENTRY
-_mesa_DebugMessageInsert(GLenum source, GLenum type, GLuint id,
-                         GLenum severity, GLint length,
-                         const GLchar* buf);
-GLuint GLAPIENTRY
-_mesa_GetDebugMessageLog(GLuint count, GLsizei logSize, GLenum* sources,
-                         GLenum* types, GLenum* ids, GLenum* severities,
-                         GLsizei* lengths, GLchar* messageLog);
-void GLAPIENTRY
-_mesa_DebugMessageControl(GLenum source, GLenum type, GLenum severity,
-                          GLsizei count, const GLuint *ids,
-                          GLboolean enabled);
-void GLAPIENTRY
-_mesa_DebugMessageCallback(GLDEBUGPROC callback,
-                           const void *userParam);
-void GLAPIENTRY
-_mesa_PushDebugGroup(GLenum source, GLuint id, GLsizei length,
-                     const GLchar *message);
-void GLAPIENTRY
-_mesa_PopDebugGroup(void);
-
-void GLAPIENTRY
-_mesa_StringMarkerGREMEDY(GLsizei len, const GLvoid *string);
 
 #ifdef __cplusplus
 }
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 8453a922549..9005dc5897d 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -26,6 +26,7 @@
 #include "glheader.h"
 #include "context.h"
 #include "blend.h"
+#include "debug_output.h"
 #include "enable.h"
 #include "enums.h"
 #include "errors.h"
diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c
index 87c5a3a194f..92f8a389cd9 100644
--- a/src/mesa/main/getstring.c
+++ b/src/mesa/main/getstring.c
@@ -26,6 +26,7 @@
 #include <stdbool.h>
 #include "glheader.h"
 #include "context.h"
+#include "debug_output.h"
 #include "get.h"
 #include "enums.h"
 #include "extensions.h"
diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index 385e26b946e..c16fa0b0c72 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -28,6 +28,7 @@
 #include "main/mtypes.h"
 #include "main/extensions.h"
 #include "main/context.h"
+#include "main/debug_output.h"
 #include "main/texobj.h"
 #include "main/teximage.h"
 #include "main/texstate.h"

From 3c3ef696961343d0078679672d3265dc6638f18e Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Feb 2016 09:29:38 -0700
Subject: [PATCH 16/94] st/mesa: minor formatting fixes in st_cb_bitmap.c

---
 src/mesa/state_tracker/st_cb_bitmap.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index d8c3dbdd793..87c606af896 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -198,8 +198,7 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized,
    GLuint i;
    float (*vertices)[3][4];  /**< vertex pos + color + texcoord */
 
-   if(!normalized)
-   {
+   if (!normalized) {
       sRight = (GLfloat) width;
       tBot = (GLfloat) height;
    }
@@ -488,7 +487,6 @@ st_flush_bitmap_cache(struct st_context *st)
 {
    if (!st->bitmap.cache->empty) {
       struct bitmap_cache *cache = st->bitmap.cache;
-
       struct pipe_context *pipe = st->pipe;
       struct pipe_sampler_view *sv;
 

From 37eb3f040059888a857a24315c74d905c5ac7b5d Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Feb 2016 09:29:38 -0700
Subject: [PATCH 17/94] tgsi: break gigantic tgsi_scan_shader() function into
 pieces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New functions for examining instructions, declarations, etc.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
---
 src/gallium/auxiliary/tgsi/tgsi_scan.c | 739 +++++++++++++------------
 1 file changed, 375 insertions(+), 364 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 687fb54830d..4199dbe54de 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -44,6 +44,375 @@
 
 
 
+static void
+scan_instruction(struct tgsi_shader_info *info,
+                 const struct tgsi_full_instruction *fullinst,
+                 unsigned *current_depth)
+{
+   unsigned i;
+
+   assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
+   info->opcode_count[fullinst->Instruction.Opcode]++;
+
+   switch (fullinst->Instruction.Opcode) {
+   case TGSI_OPCODE_IF:
+   case TGSI_OPCODE_UIF:
+   case TGSI_OPCODE_BGNLOOP:
+      (*current_depth)++;
+      info->max_depth = MAX2(info->max_depth, *current_depth);
+      break;
+   case TGSI_OPCODE_ENDIF:
+   case TGSI_OPCODE_ENDLOOP:
+      (*current_depth)--;
+      break;
+   default:
+      break;
+   }
+
+   if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID ||
+       fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+       fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+      const struct tgsi_full_src_register *src0 = &fullinst->Src[0];
+      unsigned input;
+
+      if (src0->Register.Indirect && src0->Indirect.ArrayID)
+         input = info->input_array_first[src0->Indirect.ArrayID];
+      else
+         input = src0->Register.Index;
+
+      /* For the INTERP opcodes, the interpolation is always
+       * PERSPECTIVE unless LINEAR is specified.
+       */
+      switch (info->input_interpolate[input]) {
+      case TGSI_INTERPOLATE_COLOR:
+      case TGSI_INTERPOLATE_CONSTANT:
+      case TGSI_INTERPOLATE_PERSPECTIVE:
+         switch (fullinst->Instruction.Opcode) {
+         case TGSI_OPCODE_INTERP_CENTROID:
+            info->uses_persp_opcode_interp_centroid = true;
+            break;
+         case TGSI_OPCODE_INTERP_OFFSET:
+            info->uses_persp_opcode_interp_offset = true;
+            break;
+         case TGSI_OPCODE_INTERP_SAMPLE:
+            info->uses_persp_opcode_interp_sample = true;
+            break;
+         }
+         break;
+
+      case TGSI_INTERPOLATE_LINEAR:
+         switch (fullinst->Instruction.Opcode) {
+         case TGSI_OPCODE_INTERP_CENTROID:
+            info->uses_linear_opcode_interp_centroid = true;
+            break;
+         case TGSI_OPCODE_INTERP_OFFSET:
+            info->uses_linear_opcode_interp_offset = true;
+            break;
+         case TGSI_OPCODE_INTERP_SAMPLE:
+            info->uses_linear_opcode_interp_sample = true;
+            break;
+         }
+         break;
+      }
+   }
+
+   if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
+       fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
+      info->uses_doubles = true;
+
+   for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
+      const struct tgsi_full_src_register *src = &fullinst->Src[i];
+      int ind = src->Register.Index;
+
+      /* Mark which inputs are effectively used */
+      if (src->Register.File == TGSI_FILE_INPUT) {
+         unsigned usage_mask;
+         usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i);
+         if (src->Register.Indirect) {
+            for (ind = 0; ind < info->num_inputs; ++ind) {
+               info->input_usage_mask[ind] |= usage_mask;
+            }
+         } else {
+            assert(ind >= 0);
+            assert(ind < PIPE_MAX_SHADER_INPUTS);
+            info->input_usage_mask[ind] |= usage_mask;
+         }
+
+         if (info->processor == TGSI_PROCESSOR_FRAGMENT &&
+             !src->Register.Indirect) {
+            unsigned name =
+               info->input_semantic_name[src->Register.Index];
+            unsigned index =
+               info->input_semantic_index[src->Register.Index];
+
+            if (name == TGSI_SEMANTIC_POSITION &&
+                (src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
+                 src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
+                 src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
+                 src->Register.SwizzleW == TGSI_SWIZZLE_Z))
+               info->reads_z = TRUE;
+
+            if (name == TGSI_SEMANTIC_COLOR) {
+               unsigned mask =
+                  (1 << src->Register.SwizzleX) |
+                  (1 << src->Register.SwizzleY) |
+                  (1 << src->Register.SwizzleZ) |
+                  (1 << src->Register.SwizzleW);
+
+               info->colors_read |= mask << (index * 4);
+            }
+         }
+      }
+
+      /* check for indirect register reads */
+      if (src->Register.Indirect) {
+         info->indirect_files |= (1 << src->Register.File);
+         info->indirect_files_read |= (1 << src->Register.File);
+      }
+
+      /* MSAA samplers */
+      if (src->Register.File == TGSI_FILE_SAMPLER) {
+         assert(fullinst->Instruction.Texture);
+         assert(src->Register.Index < Elements(info->is_msaa_sampler));
+
+         if (fullinst->Instruction.Texture &&
+             (fullinst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
+              fullinst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) {
+            info->is_msaa_sampler[src->Register.Index] = TRUE;
+         }
+      }
+   }
+
+   /* check for indirect register writes */
+   for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) {
+      const struct tgsi_full_dst_register *dst = &fullinst->Dst[i];
+      if (dst->Register.Indirect) {
+         info->indirect_files |= (1 << dst->Register.File);
+         info->indirect_files_written |= (1 << dst->Register.File);
+      }
+   }
+
+   info->num_instructions++;
+}
+     
+
+static void
+scan_declaration(struct tgsi_shader_info *info,
+                 const struct tgsi_full_declaration *fulldecl)
+{
+   const uint file = fulldecl->Declaration.File;
+   const unsigned procType = info->processor;
+   uint reg;
+
+   if (fulldecl->Declaration.Array) {
+      unsigned array_id = fulldecl->Array.ArrayID;
+
+      switch (file) {
+      case TGSI_FILE_INPUT:
+         assert(array_id < ARRAY_SIZE(info->input_array_first));
+         info->input_array_first[array_id] = fulldecl->Range.First;
+         info->input_array_last[array_id] = fulldecl->Range.Last;
+         break;
+      case TGSI_FILE_OUTPUT:
+         assert(array_id < ARRAY_SIZE(info->output_array_first));
+         info->output_array_first[array_id] = fulldecl->Range.First;
+         info->output_array_last[array_id] = fulldecl->Range.Last;
+         break;
+      }
+      info->array_max[file] = MAX2(info->array_max[file], array_id);
+   }
+
+   for (reg = fulldecl->Range.First; reg <= fulldecl->Range.Last; reg++) {
+      unsigned semName = fulldecl->Semantic.Name;
+      unsigned semIndex = fulldecl->Semantic.Index +
+         (reg - fulldecl->Range.First);
+
+      /* only first 32 regs will appear in this bitfield */
+      info->file_mask[file] |= (1 << reg);
+      info->file_count[file]++;
+      info->file_max[file] = MAX2(info->file_max[file], (int)reg);
+
+      if (file == TGSI_FILE_CONSTANT) {
+         int buffer = 0;
+
+         if (fulldecl->Declaration.Dimension)
+            buffer = fulldecl->Dim.Index2D;
+
+         info->const_file_max[buffer] =
+            MAX2(info->const_file_max[buffer], (int)reg);
+      }
+      else if (file == TGSI_FILE_INPUT) {
+         info->input_semantic_name[reg] = (ubyte) semName;
+         info->input_semantic_index[reg] = (ubyte) semIndex;
+         info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate;
+         info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location;
+         info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
+         info->num_inputs++;
+
+         /* Only interpolated varyings. Don't include POSITION.
+          * Don't include integer varyings, because they are not
+          * interpolated.
+          */
+         if (semName == TGSI_SEMANTIC_GENERIC ||
+             semName == TGSI_SEMANTIC_TEXCOORD ||
+             semName == TGSI_SEMANTIC_COLOR ||
+             semName == TGSI_SEMANTIC_BCOLOR ||
+             semName == TGSI_SEMANTIC_FOG ||
+             semName == TGSI_SEMANTIC_CLIPDIST ||
+             semName == TGSI_SEMANTIC_CULLDIST) {
+            switch (fulldecl->Interp.Interpolate) {
+            case TGSI_INTERPOLATE_COLOR:
+            case TGSI_INTERPOLATE_PERSPECTIVE:
+               switch (fulldecl->Interp.Location) {
+               case TGSI_INTERPOLATE_LOC_CENTER:
+                  info->uses_persp_center = true;
+                  break;
+               case TGSI_INTERPOLATE_LOC_CENTROID:
+                  info->uses_persp_centroid = true;
+                  break;
+               case TGSI_INTERPOLATE_LOC_SAMPLE:
+                  info->uses_persp_sample = true;
+                  break;
+               }
+               break;
+            case TGSI_INTERPOLATE_LINEAR:
+               switch (fulldecl->Interp.Location) {
+               case TGSI_INTERPOLATE_LOC_CENTER:
+                  info->uses_linear_center = true;
+                  break;
+               case TGSI_INTERPOLATE_LOC_CENTROID:
+                  info->uses_linear_centroid = true;
+                  break;
+               case TGSI_INTERPOLATE_LOC_SAMPLE:
+                  info->uses_linear_sample = true;
+                  break;
+               }
+               break;
+               /* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */
+            }
+         }
+
+         if (semName == TGSI_SEMANTIC_PRIMID)
+            info->uses_primid = TRUE;
+         else if (procType == TGSI_PROCESSOR_FRAGMENT) {
+            if (semName == TGSI_SEMANTIC_POSITION)
+               info->reads_position = TRUE;
+            else if (semName == TGSI_SEMANTIC_FACE)
+               info->uses_frontface = TRUE;
+         }
+      }
+      else if (file == TGSI_FILE_SYSTEM_VALUE) {
+         unsigned index = fulldecl->Range.First;
+
+         info->system_value_semantic_name[index] = semName;
+         info->num_system_values = MAX2(info->num_system_values, index + 1);
+
+         if (semName == TGSI_SEMANTIC_INSTANCEID) {
+            info->uses_instanceid = TRUE;
+         }
+         else if (semName == TGSI_SEMANTIC_VERTEXID) {
+            info->uses_vertexid = TRUE;
+         }
+         else if (semName == TGSI_SEMANTIC_VERTEXID_NOBASE) {
+            info->uses_vertexid_nobase = TRUE;
+         }
+         else if (semName == TGSI_SEMANTIC_BASEVERTEX) {
+            info->uses_basevertex = TRUE;
+         }
+         else if (semName == TGSI_SEMANTIC_PRIMID) {
+            info->uses_primid = TRUE;
+         } else if (semName == TGSI_SEMANTIC_INVOCATIONID) {
+            info->uses_invocationid = TRUE;
+         } else if (semName == TGSI_SEMANTIC_POSITION)
+            info->reads_position = TRUE;
+         else if (semName == TGSI_SEMANTIC_FACE)
+            info->uses_frontface = TRUE;
+         else if (semName == TGSI_SEMANTIC_SAMPLEMASK)
+            info->reads_samplemask = TRUE;
+      }
+      else if (file == TGSI_FILE_OUTPUT) {
+         info->output_semantic_name[reg] = (ubyte) semName;
+         info->output_semantic_index[reg] = (ubyte) semIndex;
+         info->num_outputs++;
+
+         if (semName == TGSI_SEMANTIC_COLOR)
+            info->colors_written |= 1 << semIndex;
+
+         if (procType == TGSI_PROCESSOR_VERTEX ||
+             procType == TGSI_PROCESSOR_GEOMETRY ||
+             procType == TGSI_PROCESSOR_TESS_CTRL ||
+             procType == TGSI_PROCESSOR_TESS_EVAL) {
+            if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) {
+               info->writes_viewport_index = TRUE;
+            }
+            else if (semName == TGSI_SEMANTIC_LAYER) {
+               info->writes_layer = TRUE;
+            }
+            else if (semName == TGSI_SEMANTIC_PSIZE) {
+               info->writes_psize = TRUE;
+            }
+            else if (semName == TGSI_SEMANTIC_CLIPVERTEX) {
+               info->writes_clipvertex = TRUE;
+            }
+         }
+
+         if (procType == TGSI_PROCESSOR_FRAGMENT) {
+            if (semName == TGSI_SEMANTIC_POSITION) {
+               info->writes_z = TRUE;
+            }
+            else if (semName == TGSI_SEMANTIC_STENCIL) {
+               info->writes_stencil = TRUE;
+            } else if (semName == TGSI_SEMANTIC_SAMPLEMASK) {
+               info->writes_samplemask = TRUE;
+            }
+         }
+
+         if (procType == TGSI_PROCESSOR_VERTEX) {
+            if (semName == TGSI_SEMANTIC_EDGEFLAG) {
+               info->writes_edgeflag = TRUE;
+            }
+         }
+      } else if (file == TGSI_FILE_SAMPLER) {
+         info->samplers_declared |= 1 << reg;
+      }
+   }
+}
+
+
+static void
+scan_immediate(struct tgsi_shader_info *info)
+{
+   uint reg = info->immediate_count++;
+   uint file = TGSI_FILE_IMMEDIATE;
+
+   info->file_mask[file] |= (1 << reg);
+   info->file_count[file]++;
+   info->file_max[file] = MAX2(info->file_max[file], (int)reg);
+}
+
+
+static void
+scan_property(struct tgsi_shader_info *info,
+              const struct tgsi_full_property *fullprop)
+{
+   unsigned name = fullprop->Property.PropertyName;
+   unsigned value = fullprop->u[0].Data;
+
+   assert(name < Elements(info->properties));
+   info->properties[name] = value;
+
+   switch (name) {
+   case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
+      info->num_written_clipdistance = value;
+      info->clipdist_writemask |= (1 << value) - 1;
+      break;
+   case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
+      info->num_written_culldistance = value;
+      info->culldist_writemask |= (1 << value) - 1;
+      break;
+   }
+}
+
 
 /**
  * Scan the given TGSI shader to collect information such as number of
@@ -93,378 +462,20 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
 
       switch( parse.FullToken.Token.Type ) {
       case TGSI_TOKEN_TYPE_INSTRUCTION:
-         {
-            const struct tgsi_full_instruction *fullinst
-               = &parse.FullToken.FullInstruction;
-            uint i;
-
-            assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
-            info->opcode_count[fullinst->Instruction.Opcode]++;
-
-            switch (fullinst->Instruction.Opcode) {
-            case TGSI_OPCODE_IF:
-            case TGSI_OPCODE_UIF:
-            case TGSI_OPCODE_BGNLOOP:
-               current_depth++;
-               info->max_depth = MAX2(info->max_depth, current_depth);
-               break;
-            case TGSI_OPCODE_ENDIF:
-            case TGSI_OPCODE_ENDLOOP:
-               current_depth--;
-               break;
-            default:
-               break;
-            }
-
-            if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID ||
-                fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
-                fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
-               const struct tgsi_full_src_register *src0 = &fullinst->Src[0];
-               unsigned input;
-
-               if (src0->Register.Indirect && src0->Indirect.ArrayID)
-                  input = info->input_array_first[src0->Indirect.ArrayID];
-               else
-                  input = src0->Register.Index;
-
-               /* For the INTERP opcodes, the interpolation is always
-                * PERSPECTIVE unless LINEAR is specified.
-                */
-               switch (info->input_interpolate[input]) {
-               case TGSI_INTERPOLATE_COLOR:
-               case TGSI_INTERPOLATE_CONSTANT:
-               case TGSI_INTERPOLATE_PERSPECTIVE:
-                  switch (fullinst->Instruction.Opcode) {
-                  case TGSI_OPCODE_INTERP_CENTROID:
-                     info->uses_persp_opcode_interp_centroid = true;
-                     break;
-                  case TGSI_OPCODE_INTERP_OFFSET:
-                     info->uses_persp_opcode_interp_offset = true;
-                     break;
-                  case TGSI_OPCODE_INTERP_SAMPLE:
-                     info->uses_persp_opcode_interp_sample = true;
-                     break;
-                  }
-                  break;
-
-               case TGSI_INTERPOLATE_LINEAR:
-                  switch (fullinst->Instruction.Opcode) {
-                  case TGSI_OPCODE_INTERP_CENTROID:
-                     info->uses_linear_opcode_interp_centroid = true;
-                     break;
-                  case TGSI_OPCODE_INTERP_OFFSET:
-                     info->uses_linear_opcode_interp_offset = true;
-                     break;
-                  case TGSI_OPCODE_INTERP_SAMPLE:
-                     info->uses_linear_opcode_interp_sample = true;
-                     break;
-                  }
-                  break;
-               }
-            }
-
-            if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
-                fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
-               info->uses_doubles = true;
-
-            for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
-               const struct tgsi_full_src_register *src =
-                  &fullinst->Src[i];
-               int ind = src->Register.Index;
-
-               /* Mark which inputs are effectively used */
-               if (src->Register.File == TGSI_FILE_INPUT) {
-                  unsigned usage_mask;
-                  usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i);
-                  if (src->Register.Indirect) {
-                     for (ind = 0; ind < info->num_inputs; ++ind) {
-                        info->input_usage_mask[ind] |= usage_mask;
-                     }
-                  } else {
-                     assert(ind >= 0);
-                     assert(ind < PIPE_MAX_SHADER_INPUTS);
-                     info->input_usage_mask[ind] |= usage_mask;
-                  }
-
-                  if (procType == TGSI_PROCESSOR_FRAGMENT &&
-                      !src->Register.Indirect) {
-                     unsigned name =
-                        info->input_semantic_name[src->Register.Index];
-                     unsigned index =
-                        info->input_semantic_index[src->Register.Index];
-
-                     if (name == TGSI_SEMANTIC_POSITION &&
-                         (src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
-                          src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
-                          src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
-                          src->Register.SwizzleW == TGSI_SWIZZLE_Z))
-                        info->reads_z = TRUE;
-
-                     if (name == TGSI_SEMANTIC_COLOR) {
-                        unsigned mask =
-                              (1 << src->Register.SwizzleX) |
-                              (1 << src->Register.SwizzleY) |
-                              (1 << src->Register.SwizzleZ) |
-                              (1 << src->Register.SwizzleW);
-
-                        info->colors_read |= mask << (index * 4);
-                     }
-                  }
-               }
-
-               /* check for indirect register reads */
-               if (src->Register.Indirect) {
-                  info->indirect_files |= (1 << src->Register.File);
-                  info->indirect_files_read |= (1 << src->Register.File);
-               }
-
-               /* MSAA samplers */
-               if (src->Register.File == TGSI_FILE_SAMPLER) {
-                  assert(fullinst->Instruction.Texture);
-                  assert(src->Register.Index < Elements(info->is_msaa_sampler));
-
-                  if (fullinst->Instruction.Texture &&
-                      (fullinst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
-                       fullinst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) {
-                     info->is_msaa_sampler[src->Register.Index] = TRUE;
-                  }
-               }
-            }
-
-            /* check for indirect register writes */
-            for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) {
-               const struct tgsi_full_dst_register *dst = &fullinst->Dst[i];
-               if (dst->Register.Indirect) {
-                  info->indirect_files |= (1 << dst->Register.File);
-                  info->indirect_files_written |= (1 << dst->Register.File);
-               }
-            }
-
-            info->num_instructions++;
-         }
+         scan_instruction(info, &parse.FullToken.FullInstruction,
+                          &current_depth);
          break;
-
       case TGSI_TOKEN_TYPE_DECLARATION:
-         {
-            const struct tgsi_full_declaration *fulldecl
-               = &parse.FullToken.FullDeclaration;
-            const uint file = fulldecl->Declaration.File;
-            uint reg;
-
-            if (fulldecl->Declaration.Array) {
-               unsigned array_id = fulldecl->Array.ArrayID;
-
-               switch (file) {
-               case TGSI_FILE_INPUT:
-                  assert(array_id < ARRAY_SIZE(info->input_array_first));
-                  info->input_array_first[array_id] = fulldecl->Range.First;
-                  info->input_array_last[array_id] = fulldecl->Range.Last;
-                  break;
-               case TGSI_FILE_OUTPUT:
-                  assert(array_id < ARRAY_SIZE(info->output_array_first));
-                  info->output_array_first[array_id] = fulldecl->Range.First;
-                  info->output_array_last[array_id] = fulldecl->Range.Last;
-                  break;
-               }
-               info->array_max[file] = MAX2(info->array_max[file], array_id);
-            }
-
-            for (reg = fulldecl->Range.First;
-                 reg <= fulldecl->Range.Last;
-                 reg++) {
-               unsigned semName = fulldecl->Semantic.Name;
-               unsigned semIndex =
-                  fulldecl->Semantic.Index + (reg - fulldecl->Range.First);
-
-               /* only first 32 regs will appear in this bitfield */
-               info->file_mask[file] |= (1 << reg);
-               info->file_count[file]++;
-               info->file_max[file] = MAX2(info->file_max[file], (int)reg);
-
-               if (file == TGSI_FILE_CONSTANT) {
-                  int buffer = 0;
-
-                  if (fulldecl->Declaration.Dimension)
-                     buffer = fulldecl->Dim.Index2D;
-
-                  info->const_file_max[buffer] =
-                        MAX2(info->const_file_max[buffer], (int)reg);
-               }
-               else if (file == TGSI_FILE_INPUT) {
-                  info->input_semantic_name[reg] = (ubyte) semName;
-                  info->input_semantic_index[reg] = (ubyte) semIndex;
-                  info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate;
-                  info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location;
-                  info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
-                  info->num_inputs++;
-
-                  /* Only interpolated varyings. Don't include POSITION.
-                   * Don't include integer varyings, because they are not
-                   * interpolated.
-                   */
-                  if (semName == TGSI_SEMANTIC_GENERIC ||
-                      semName == TGSI_SEMANTIC_TEXCOORD ||
-                      semName == TGSI_SEMANTIC_COLOR ||
-                      semName == TGSI_SEMANTIC_BCOLOR ||
-                      semName == TGSI_SEMANTIC_FOG ||
-                      semName == TGSI_SEMANTIC_CLIPDIST ||
-                      semName == TGSI_SEMANTIC_CULLDIST) {
-                     switch (fulldecl->Interp.Interpolate) {
-                     case TGSI_INTERPOLATE_COLOR:
-                     case TGSI_INTERPOLATE_PERSPECTIVE:
-                        switch (fulldecl->Interp.Location) {
-                        case TGSI_INTERPOLATE_LOC_CENTER:
-                           info->uses_persp_center = true;
-                           break;
-                        case TGSI_INTERPOLATE_LOC_CENTROID:
-                           info->uses_persp_centroid = true;
-                           break;
-                        case TGSI_INTERPOLATE_LOC_SAMPLE:
-                           info->uses_persp_sample = true;
-                           break;
-                        }
-                        break;
-                     case TGSI_INTERPOLATE_LINEAR:
-                        switch (fulldecl->Interp.Location) {
-                        case TGSI_INTERPOLATE_LOC_CENTER:
-                           info->uses_linear_center = true;
-                           break;
-                        case TGSI_INTERPOLATE_LOC_CENTROID:
-                           info->uses_linear_centroid = true;
-                           break;
-                        case TGSI_INTERPOLATE_LOC_SAMPLE:
-                           info->uses_linear_sample = true;
-                           break;
-                        }
-                        break;
-                     /* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */
-                     }
-                  }
-
-                  if (semName == TGSI_SEMANTIC_PRIMID)
-                     info->uses_primid = TRUE;
-                  else if (procType == TGSI_PROCESSOR_FRAGMENT) {
-                     if (semName == TGSI_SEMANTIC_POSITION)
-                        info->reads_position = TRUE;
-                     else if (semName == TGSI_SEMANTIC_FACE)
-                        info->uses_frontface = TRUE;
-                  }
-               }
-               else if (file == TGSI_FILE_SYSTEM_VALUE) {
-                  unsigned index = fulldecl->Range.First;
-
-                  info->system_value_semantic_name[index] = semName;
-                  info->num_system_values = MAX2(info->num_system_values,
-                                                 index + 1);
-
-                  if (semName == TGSI_SEMANTIC_INSTANCEID) {
-                     info->uses_instanceid = TRUE;
-                  }
-                  else if (semName == TGSI_SEMANTIC_VERTEXID) {
-                     info->uses_vertexid = TRUE;
-                  }
-                  else if (semName == TGSI_SEMANTIC_VERTEXID_NOBASE) {
-                     info->uses_vertexid_nobase = TRUE;
-                  }
-                  else if (semName == TGSI_SEMANTIC_BASEVERTEX) {
-                     info->uses_basevertex = TRUE;
-                  }
-                  else if (semName == TGSI_SEMANTIC_PRIMID) {
-                     info->uses_primid = TRUE;
-                  } else if (semName == TGSI_SEMANTIC_INVOCATIONID) {
-                     info->uses_invocationid = TRUE;
-                  } else if (semName == TGSI_SEMANTIC_POSITION)
-                     info->reads_position = TRUE;
-                  else if (semName == TGSI_SEMANTIC_FACE)
-                     info->uses_frontface = TRUE;
-                  else if (semName == TGSI_SEMANTIC_SAMPLEMASK)
-                     info->reads_samplemask = TRUE;
-               }
-               else if (file == TGSI_FILE_OUTPUT) {
-                  info->output_semantic_name[reg] = (ubyte) semName;
-                  info->output_semantic_index[reg] = (ubyte) semIndex;
-                  info->num_outputs++;
-
-                  if (semName == TGSI_SEMANTIC_COLOR)
-                     info->colors_written |= 1 << semIndex;
-
-                  if (procType == TGSI_PROCESSOR_VERTEX ||
-                      procType == TGSI_PROCESSOR_GEOMETRY ||
-                      procType == TGSI_PROCESSOR_TESS_CTRL ||
-                      procType == TGSI_PROCESSOR_TESS_EVAL) {
-                     if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) {
-                        info->writes_viewport_index = TRUE;
-                     }
-                     else if (semName == TGSI_SEMANTIC_LAYER) {
-                        info->writes_layer = TRUE;
-                     }
-                     else if (semName == TGSI_SEMANTIC_PSIZE) {
-                        info->writes_psize = TRUE;
-                     }
-                     else if (semName == TGSI_SEMANTIC_CLIPVERTEX) {
-                        info->writes_clipvertex = TRUE;
-                     }
-                  }
-
-                  if (procType == TGSI_PROCESSOR_FRAGMENT) {
-                     if (semName == TGSI_SEMANTIC_POSITION) {
-                        info->writes_z = TRUE;
-                     }
-                     else if (semName == TGSI_SEMANTIC_STENCIL) {
-                        info->writes_stencil = TRUE;
-                     } else if (semName == TGSI_SEMANTIC_SAMPLEMASK) {
-                        info->writes_samplemask = TRUE;
-                     }
-                  }
-
-                  if (procType == TGSI_PROCESSOR_VERTEX) {
-                     if (semName == TGSI_SEMANTIC_EDGEFLAG) {
-                        info->writes_edgeflag = TRUE;
-                     }
-                  }
-               } else if (file == TGSI_FILE_SAMPLER) {
-                  info->samplers_declared |= 1 << reg;
-               }
-            }
-         }
+         scan_declaration(info, &parse.FullToken.FullDeclaration);
          break;
-
       case TGSI_TOKEN_TYPE_IMMEDIATE:
-         {
-            uint reg = info->immediate_count++;
-            uint file = TGSI_FILE_IMMEDIATE;
-
-            info->file_mask[file] |= (1 << reg);
-            info->file_count[file]++;
-            info->file_max[file] = MAX2(info->file_max[file], (int)reg);
-         }
+         scan_immediate(info);
          break;
-
       case TGSI_TOKEN_TYPE_PROPERTY:
-         {
-            const struct tgsi_full_property *fullprop
-               = &parse.FullToken.FullProperty;
-            unsigned name = fullprop->Property.PropertyName;
-            unsigned value = fullprop->u[0].Data;
-
-            assert(name < Elements(info->properties));
-            info->properties[name] = value;
-
-            switch (name) {
-            case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
-               info->num_written_clipdistance = value;
-               info->clipdist_writemask |= (1 << value) - 1;
-               break;
-            case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
-               info->num_written_culldistance = value;
-               info->culldist_writemask |= (1 << value) - 1;
-               break;
-            }
-         }
+         scan_property(info, &parse.FullToken.FullProperty);
          break;
-
       default:
-         assert( 0 );
+         assert(!"Unexpected TGSI token type");
       }
    }
 

From da6e879a6c6ea2ff13188fa9a179c7abcbf1e8c2 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Feb 2016 09:29:38 -0700
Subject: [PATCH 18/94] tgsi: use switches instead of big if/else ifs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
---
 src/gallium/auxiliary/tgsi/tgsi_scan.c | 60 +++++++++++++++-----------
 1 file changed, 36 insertions(+), 24 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 4199dbe54de..a7d4b0c8476 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -307,28 +307,35 @@ scan_declaration(struct tgsi_shader_info *info,
          info->system_value_semantic_name[index] = semName;
          info->num_system_values = MAX2(info->num_system_values, index + 1);
 
-         if (semName == TGSI_SEMANTIC_INSTANCEID) {
+         switch (semName) {
+         case TGSI_SEMANTIC_INSTANCEID:
             info->uses_instanceid = TRUE;
-         }
-         else if (semName == TGSI_SEMANTIC_VERTEXID) {
+            break;
+         case TGSI_SEMANTIC_VERTEXID:
             info->uses_vertexid = TRUE;
-         }
-         else if (semName == TGSI_SEMANTIC_VERTEXID_NOBASE) {
+            break;
+         case TGSI_SEMANTIC_VERTEXID_NOBASE:
             info->uses_vertexid_nobase = TRUE;
-         }
-         else if (semName == TGSI_SEMANTIC_BASEVERTEX) {
+            break;
+         case TGSI_SEMANTIC_BASEVERTEX:
             info->uses_basevertex = TRUE;
-         }
-         else if (semName == TGSI_SEMANTIC_PRIMID) {
+            break;
+         case TGSI_SEMANTIC_PRIMID:
             info->uses_primid = TRUE;
-         } else if (semName == TGSI_SEMANTIC_INVOCATIONID) {
+            break;
+         case TGSI_SEMANTIC_INVOCATIONID:
             info->uses_invocationid = TRUE;
-         } else if (semName == TGSI_SEMANTIC_POSITION)
+            break;
+         case TGSI_SEMANTIC_POSITION:
             info->reads_position = TRUE;
-         else if (semName == TGSI_SEMANTIC_FACE)
+            break;
+         case TGSI_SEMANTIC_FACE:
             info->uses_frontface = TRUE;
-         else if (semName == TGSI_SEMANTIC_SAMPLEMASK)
+            break;
+         case TGSI_SEMANTIC_SAMPLEMASK:
             info->reads_samplemask = TRUE;
+            break;
+         }
       }
       else if (file == TGSI_FILE_OUTPUT) {
          info->output_semantic_name[reg] = (ubyte) semName;
@@ -342,28 +349,33 @@ scan_declaration(struct tgsi_shader_info *info,
              procType == TGSI_PROCESSOR_GEOMETRY ||
              procType == TGSI_PROCESSOR_TESS_CTRL ||
              procType == TGSI_PROCESSOR_TESS_EVAL) {
-            if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) {
+            switch (semName) {
+            case TGSI_SEMANTIC_VIEWPORT_INDEX:
                info->writes_viewport_index = TRUE;
-            }
-            else if (semName == TGSI_SEMANTIC_LAYER) {
+               break;
+            case TGSI_SEMANTIC_LAYER:
                info->writes_layer = TRUE;
-            }
-            else if (semName == TGSI_SEMANTIC_PSIZE) {
+               break;
+            case TGSI_SEMANTIC_PSIZE:
                info->writes_psize = TRUE;
-            }
-            else if (semName == TGSI_SEMANTIC_CLIPVERTEX) {
+               break;
+            case TGSI_SEMANTIC_CLIPVERTEX:
                info->writes_clipvertex = TRUE;
+               break;
             }
          }
 
          if (procType == TGSI_PROCESSOR_FRAGMENT) {
-            if (semName == TGSI_SEMANTIC_POSITION) {
+            switch (semName) {
+            case TGSI_SEMANTIC_POSITION:
                info->writes_z = TRUE;
-            }
-            else if (semName == TGSI_SEMANTIC_STENCIL) {
+               break;
+            case TGSI_SEMANTIC_STENCIL:
                info->writes_stencil = TRUE;
-            } else if (semName == TGSI_SEMANTIC_SAMPLEMASK) {
+               break;
+            case TGSI_SEMANTIC_SAMPLEMASK:
                info->writes_samplemask = TRUE;
+               break;
             }
          }
 

From 42246ab1f5f985aba97d3d7ebc178b02ec6c316e Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Feb 2016 09:29:38 -0700
Subject: [PATCH 19/94] tgsi: s/true/TRUE/ in tgsi_scan.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Just to be consistent.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
---
 src/gallium/auxiliary/tgsi/tgsi_scan.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index a7d4b0c8476..42b62aa172c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -89,13 +89,13 @@ scan_instruction(struct tgsi_shader_info *info,
       case TGSI_INTERPOLATE_PERSPECTIVE:
          switch (fullinst->Instruction.Opcode) {
          case TGSI_OPCODE_INTERP_CENTROID:
-            info->uses_persp_opcode_interp_centroid = true;
+            info->uses_persp_opcode_interp_centroid = TRUE;
             break;
          case TGSI_OPCODE_INTERP_OFFSET:
-            info->uses_persp_opcode_interp_offset = true;
+            info->uses_persp_opcode_interp_offset = TRUE;
             break;
          case TGSI_OPCODE_INTERP_SAMPLE:
-            info->uses_persp_opcode_interp_sample = true;
+            info->uses_persp_opcode_interp_sample = TRUE;
             break;
          }
          break;
@@ -103,13 +103,13 @@ scan_instruction(struct tgsi_shader_info *info,
       case TGSI_INTERPOLATE_LINEAR:
          switch (fullinst->Instruction.Opcode) {
          case TGSI_OPCODE_INTERP_CENTROID:
-            info->uses_linear_opcode_interp_centroid = true;
+            info->uses_linear_opcode_interp_centroid = TRUE;
             break;
          case TGSI_OPCODE_INTERP_OFFSET:
-            info->uses_linear_opcode_interp_offset = true;
+            info->uses_linear_opcode_interp_offset = TRUE;
             break;
          case TGSI_OPCODE_INTERP_SAMPLE:
-            info->uses_linear_opcode_interp_sample = true;
+            info->uses_linear_opcode_interp_sample = TRUE;
             break;
          }
          break;
@@ -118,7 +118,7 @@ scan_instruction(struct tgsi_shader_info *info,
 
    if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
        fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
-      info->uses_doubles = true;
+      info->uses_doubles = TRUE;
 
    for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
       const struct tgsi_full_src_register *src = &fullinst->Src[i];
@@ -265,26 +265,26 @@ scan_declaration(struct tgsi_shader_info *info,
             case TGSI_INTERPOLATE_PERSPECTIVE:
                switch (fulldecl->Interp.Location) {
                case TGSI_INTERPOLATE_LOC_CENTER:
-                  info->uses_persp_center = true;
+                  info->uses_persp_center = TRUE;
                   break;
                case TGSI_INTERPOLATE_LOC_CENTROID:
-                  info->uses_persp_centroid = true;
+                  info->uses_persp_centroid = TRUE;
                   break;
                case TGSI_INTERPOLATE_LOC_SAMPLE:
-                  info->uses_persp_sample = true;
+                  info->uses_persp_sample = TRUE;
                   break;
                }
                break;
             case TGSI_INTERPOLATE_LINEAR:
                switch (fulldecl->Interp.Location) {
                case TGSI_INTERPOLATE_LOC_CENTER:
-                  info->uses_linear_center = true;
+                  info->uses_linear_center = TRUE;
                   break;
                case TGSI_INTERPOLATE_LOC_CENTROID:
-                  info->uses_linear_centroid = true;
+                  info->uses_linear_centroid = TRUE;
                   break;
                case TGSI_INTERPOLATE_LOC_SAMPLE:
-                  info->uses_linear_sample = true;
+                  info->uses_linear_sample = TRUE;
                   break;
                }
                break;

From 59251610ed3a0239ec9b400e97e9b5598fe80f70 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Feb 2016 09:29:38 -0700
Subject: [PATCH 20/94] tgsi: minor whitespace fixes in tgsi_scan.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
---
 src/gallium/auxiliary/tgsi/tgsi_scan.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 42b62aa172c..489423d7f12 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -462,12 +462,10 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
           procType == TGSI_PROCESSOR_COMPUTE);
    info->processor = procType;
 
-
    /**
     ** Loop over incoming program tokens/instructions
     */
-   while( !tgsi_parse_end_of_tokens( &parse ) ) {
-
+   while (!tgsi_parse_end_of_tokens(&parse)) {
       info->num_tokens++;
 
       tgsi_parse_token( &parse );
@@ -510,7 +508,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
       }
    }
 
-   tgsi_parse_free (&parse);
+   tgsi_parse_free(&parse);
 }
 
 

From 01dacc83ff43a054513277e3e1296c3fc8cd750a Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Feb 2016 10:52:02 -0700
Subject: [PATCH 21/94] dri/common: include debug_output.h to silence warning

---
 src/mesa/drivers/dri/common/dri_util.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c
index 5cfa2f8ca4f..a6545084e31 100644
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -46,6 +46,7 @@
 #include "main/mtypes.h"
 #include "main/framebuffer.h"
 #include "main/version.h"
+#include "main/debug_output.h"
 #include "main/errors.h"
 #include "main/macros.h"
 

From c300559fbfa6127320b78c130061fdb6a454658d Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 8 Feb 2016 15:32:12 -0800
Subject: [PATCH 22/94] i965/vec4: Update vec4 unit tests for commit
 01dacc83ff.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94050
---
 .../dri/i965/test_vec4_cmod_propagation.cpp        | 14 ++++++++++----
 .../dri/i965/test_vec4_copy_propagation.cpp        | 10 +++++++---
 .../dri/i965/test_vec4_register_coalesce.cpp       | 10 +++++++---
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
index e5e566c60bc..8d4a447a88b 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
@@ -40,6 +40,7 @@ public:
    struct gl_context *ctx;
    struct gl_shader_program *shader_prog;
    struct brw_vertex_program *vp;
+   struct brw_vue_prog_data *prog_data;
    vec4_visitor *v;
 };
 
@@ -47,9 +48,13 @@ class cmod_propagation_vec4_visitor : public vec4_visitor
 {
 public:
    cmod_propagation_vec4_visitor(struct brw_compiler *compiler,
-                                 nir_shader *shader)
-      : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
-                     false, -1) {}
+                                 nir_shader *shader,
+                                 struct brw_vue_prog_data *prog_data)
+      : vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL,
+                     false, -1)
+      {
+         prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
+      }
 
 protected:
    /* Dummy implementation for pure virtual methods */
@@ -96,13 +101,14 @@ void cmod_propagation_test::SetUp()
    ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
    compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
    devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+   prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data));
    compiler->devinfo = devinfo;
 
    vp = ralloc(NULL, struct brw_vertex_program);
 
    nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
 
-   v = new cmod_propagation_vec4_visitor(compiler, shader);
+   v = new cmod_propagation_vec4_visitor(compiler, shader, prog_data);
 
    _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);
 
diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
index 12667ffd23c..311f07a7cca 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
@@ -39,6 +39,7 @@ public:
    struct gl_context *ctx;
    struct gl_shader_program *shader_prog;
    struct brw_vertex_program *vp;
+   struct brw_vue_prog_data *prog_data;
    vec4_visitor *v;
 };
 
@@ -46,10 +47,12 @@ class copy_propagation_vec4_visitor : public vec4_visitor
 {
 public:
    copy_propagation_vec4_visitor(struct brw_compiler *compiler,
-                                 nir_shader *shader)
-      : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
+                                 nir_shader *shader,
+                                 struct brw_vue_prog_data *prog_data)
+      : vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL,
                      false /* no_spills */, -1)
    {
+      prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
    }
 
 protected:
@@ -91,13 +94,14 @@ void copy_propagation_test::SetUp()
    ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
    compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
    devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+   prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data));
    compiler->devinfo = devinfo;
 
    vp = ralloc(NULL, struct brw_vertex_program);
 
    nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
 
-   v = new copy_propagation_vec4_visitor(compiler, shader);
+   v = new copy_propagation_vec4_visitor(compiler, shader, prog_data);
 
    _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);
 
diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
index 34dcf95dc48..cc4a2de89d5 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
@@ -41,6 +41,7 @@ public:
    struct gl_context *ctx;
    struct gl_shader_program *shader_prog;
    struct brw_vertex_program *vp;
+   struct brw_vue_prog_data *prog_data;
    vec4_visitor *v;
 };
 
@@ -49,10 +50,12 @@ class register_coalesce_vec4_visitor : public vec4_visitor
 {
 public:
    register_coalesce_vec4_visitor(struct brw_compiler *compiler,
-                                  nir_shader *shader)
-      : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
+                                  nir_shader *shader,
+                                  struct brw_vue_prog_data *prog_data)
+      : vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL,
                      false /* no_spills */, -1)
    {
+      prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
    }
 
 protected:
@@ -94,13 +97,14 @@ void register_coalesce_test::SetUp()
    ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
    compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
    devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+   prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data));
    compiler->devinfo = devinfo;
 
    vp = ralloc(NULL, struct brw_vertex_program);
 
    nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
 
-   v = new register_coalesce_vec4_visitor(compiler, shader);
+   v = new register_coalesce_vec4_visitor(compiler, shader, prog_data);
 
    _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);
 

From 1817e3c07a63c6404d6df323fbd443ccd9304b02 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Sat, 6 Feb 2016 18:43:45 -0800
Subject: [PATCH 23/94] i965/fs: Don't emit unnecessary SEL instruction from
 emit_image_atomic().

The SEL instruction with predication mode NONE emitted when the atomic
operation doesn't need to be predicated is a no-op and might rely on
undocumented hardware behaviour.  Noticed by chance while looking at
the assembly output.

Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
index 45694ec0894..081dbadadfe 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
@@ -1122,7 +1122,7 @@ namespace brw {
                                               dims, rsize, op, pred);
 
          /* An unbound surface access should give zero as result. */
-         if (rsize)
+         if (rsize && pred)
             set_predicate(pred, bld.SEL(tmp, tmp, brw_imm_d(0)));
 
          return tmp;

From 0aa4f99f562a05880a779707cbcd46be459863bf Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Thu, 14 Jan 2016 11:22:46 -0800
Subject: [PATCH 24/94] i965: Fix cache pollution race during L3 partitioning
 set-up.

We need to split the stalling flush from the RO cache invalidation
into a different PIPE_CONTROL command to make sure that the top of the
pipe invalidation happens after any previous rendering is complete.
Otherwise it's possible for previous rendering to pollute the L3 cache
in the short window of time between RO invalidation and the completion
of the stalling flush.  Fixes rendering artifacts on Unigine Heaven,
Metro Last Light Redux and Metro 2033 Redux.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93540
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93599
Tested-by: Darius Spitznagel <d.spitznagel@goodbytez.de>
Tested-by: Martin Peres <martin.peres@linux.intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/gen7_l3_state.c | 31 +++++++++++++++++------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c
index b63e61ca8f0..85f18d0e012 100644
--- a/src/mesa/drivers/dri/i965/gen7_l3_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
@@ -330,20 +330,35 @@ setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg)
 
    /* According to the hardware docs, the L3 partitioning can only be changed
     * while the pipeline is completely drained and the caches are flushed,
-    * which involves a first PIPE_CONTROL flush which stalls the pipeline and
-    * initiates invalidation of the relevant caches...
+    * which involves a first PIPE_CONTROL flush which stalls the pipeline...
+    */
+   brw_emit_pipe_control_flush(brw,
+                               PIPE_CONTROL_DATA_CACHE_INVALIDATE |
+                               PIPE_CONTROL_NO_WRITE |
+                               PIPE_CONTROL_CS_STALL);
+
+   /* ...followed by a second pipelined PIPE_CONTROL that initiates
+    * invalidation of the relevant caches.  Note that because RO invalidation
+    * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL
+    * command is processed by the CS) we cannot combine it with the previous
+    * stalling flush as the hardware documentation suggests, because that
+    * would cause the CS to stall on previous rendering *after* RO
+    * invalidation and wouldn't prevent the RO caches from being polluted by
+    * concurrent rendering before the stall completes.  This intentionally
+    * doesn't implement the SKL+ hardware workaround suggesting to enable CS
+    * stall on PIPE_CONTROLs with the texture cache invalidation bit set for
+    * GPGPU workloads because the previous and subsequent PIPE_CONTROLs
+    * already guarantee that there is no concurrent GPGPU kernel execution
+    * (see SKL HSD 2132585).
     */
    brw_emit_pipe_control_flush(brw,
                                PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
                                PIPE_CONTROL_CONST_CACHE_INVALIDATE |
                                PIPE_CONTROL_INSTRUCTION_INVALIDATE |
-                               PIPE_CONTROL_DATA_CACHE_INVALIDATE |
-                               PIPE_CONTROL_NO_WRITE |
-                               PIPE_CONTROL_CS_STALL);
+                               PIPE_CONTROL_NO_WRITE);
 
-   /* ...followed by a second stalling flush which guarantees that
-    * invalidation is complete when the L3 configuration registers are
-    * modified.
+   /* Now send a third stalling flush to make sure that invalidation is
+    * complete when the L3 configuration registers are modified.
     */
    brw_emit_pipe_control_flush(brw,
                                PIPE_CONTROL_DATA_CACHE_INVALIDATE |

From 10d84ba9f084174a1e8e7639dfb05dd855ba86e8 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Thu, 14 Jan 2016 12:17:01 -0800
Subject: [PATCH 25/94] i965: Invalidate state cache before L3 partitioning
 set-up.

The state cache is also L3-backed so it seems sensible to make sure
it's clean as we do for other RO caches before repartitioning the L3.
This wasn't part of my original L3 partitioning code because I was
able to reproduce hangs on Gen7 hardware when the state cache
invalidation happened asynchronously with previous 3D rendering, which
should no longer be possible after the previous change.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/gen7_l3_state.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c
index 85f18d0e012..ff67c90fe87 100644
--- a/src/mesa/drivers/dri/i965/gen7_l3_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
@@ -355,6 +355,7 @@ setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg)
                                PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
                                PIPE_CONTROL_CONST_CACHE_INVALIDATE |
                                PIPE_CONTROL_INSTRUCTION_INVALIDATE |
+                               PIPE_CONTROL_STATE_CACHE_INVALIDATE |
                                PIPE_CONTROL_NO_WRITE);
 
    /* Now send a third stalling flush to make sure that invalidation is

From 53739fddc65a4cb34a2da14b873e95a451916267 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Thu, 14 Jan 2016 12:20:46 -0800
Subject: [PATCH 26/94] i965: Rename define for the PIPE_CONTROL DC flush bit.

Its previous name was somewhat misleading, this really behaves like a
RW cache flush rather than an invalidation.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_misc_state.c   | 2 +-
 src/mesa/drivers/dri/i965/brw_pipe_control.c | 2 +-
 src/mesa/drivers/dri/i965/brw_program.c      | 2 +-
 src/mesa/drivers/dri/i965/gen7_l3_state.c    | 4 ++--
 src/mesa/drivers/dri/i965/intel_reg.h        | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 319c2a5669f..ab1a0d7255f 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -919,7 +919,7 @@ brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
        *   MI_PIPELINE_SELECT command to change the Pipeline Select Mode.
        */
       const unsigned dc_flush =
-         brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_INVALIDATE : 0;
+         brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
 
       if (brw->gen == 6) {
          /* Hardware workaround: SNB B-Spec says:
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index 6c636d26139..b41e28e1ec8 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -51,7 +51,7 @@ gen8_add_cs_stall_workaround_bits(uint32_t *flags)
                       PIPE_CONTROL_WRITE_TIMESTAMP |
                       PIPE_CONTROL_STALL_AT_SCOREBOARD |
                       PIPE_CONTROL_DEPTH_STALL |
-                      PIPE_CONTROL_DATA_CACHE_INVALIDATE;
+                      PIPE_CONTROL_DATA_CACHE_FLUSH;
 
    /* If we're doing a CS stall, and don't already have one of the
     * workaround bits set, add "Stall at Pixel Scoreboard."
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 368efeecb2d..3112c0c4014 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -209,7 +209,7 @@ static void
 brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
 {
    struct brw_context *brw = brw_context(ctx);
-   unsigned bits = (PIPE_CONTROL_DATA_CACHE_INVALIDATE |
+   unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH |
                     PIPE_CONTROL_NO_WRITE |
                     PIPE_CONTROL_CS_STALL);
    assert(brw->gen >= 7 && brw->gen <= 9);
diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c
index ff67c90fe87..0c1813f9048 100644
--- a/src/mesa/drivers/dri/i965/gen7_l3_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
@@ -333,7 +333,7 @@ setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg)
     * which involves a first PIPE_CONTROL flush which stalls the pipeline...
     */
    brw_emit_pipe_control_flush(brw,
-                               PIPE_CONTROL_DATA_CACHE_INVALIDATE |
+                               PIPE_CONTROL_DATA_CACHE_FLUSH |
                                PIPE_CONTROL_NO_WRITE |
                                PIPE_CONTROL_CS_STALL);
 
@@ -362,7 +362,7 @@ setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg)
     * complete when the L3 configuration registers are modified.
     */
    brw_emit_pipe_control_flush(brw,
-                               PIPE_CONTROL_DATA_CACHE_INVALIDATE |
+                               PIPE_CONTROL_DATA_CACHE_FLUSH |
                                PIPE_CONTROL_NO_WRITE |
                                PIPE_CONTROL_CS_STALL);
 
diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h
index 8888d6f776c..365c045b8b0 100644
--- a/src/mesa/drivers/dri/i965/intel_reg.h
+++ b/src/mesa/drivers/dri/i965/intel_reg.h
@@ -86,7 +86,7 @@
 #define PIPE_CONTROL_INTERRUPT_ENABLE	(1 << 8)
 #define PIPE_CONTROL_FLUSH_ENABLE	(1 << 7) /* Gen7+ only */
 /* GT */
-#define PIPE_CONTROL_DATA_CACHE_INVALIDATE	(1 << 5)
+#define PIPE_CONTROL_DATA_CACHE_FLUSH   	(1 << 5)
 #define PIPE_CONTROL_VF_CACHE_INVALIDATE	(1 << 4)
 #define PIPE_CONTROL_CONST_CACHE_INVALIDATE	(1 << 3)
 #define PIPE_CONTROL_STATE_CACHE_INVALIDATE	(1 << 2)

From 52801766a00ba3b2a93e14d1ac80afd8980d15d8 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 8 Feb 2016 11:09:29 +1000
Subject: [PATCH 27/94] glsl/ir: add param index to variable.

We have a requirement to store the index into the mesa parameterlist
for uniforms. Up until now we've overwritten var->data.location with
this info. However this then stops us accessing UniformStorage,
which is needed to do proper dereferencing.

Add a new variable to ir_variable to store this value in, and change
the two uses to use it correctly.

Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/compiler/glsl/ir.h                     | 8 ++++++++
 src/mesa/program/ir_to_mesa.cpp            | 5 ++---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h
index 09e21b22188..bf9b7caffae 100644
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -863,6 +863,14 @@ public:
        */
       int location;
 
+      /**
+       * for glsl->tgsi/mesa IR we need to store the index into the
+       * parameters for uniforms, initially the code overloaded location
+       * but this causes problems with indirect samplers and AoA.
+       * This is assigned in _mesa_generate_parameters_list_for_uniforms.
+       */
+      int param_index;
+
       /**
        * Vertex stream output identifier.
        */
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 768d9216483..68cc4a5e0cd 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1389,7 +1389,7 @@ ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
       switch (var->data.mode) {
       case ir_var_uniform:
 	 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
-					       var->data.location);
+					       var->data.param_index);
 	 this->variables.push_tail(entry);
 	 break;
       case ir_var_shader_in:
@@ -2268,8 +2268,7 @@ public:
    {
       this->idx = -1;
       this->program_resource_visitor::process(var);
-
-      var->data.location = this->idx;
+      var->data.param_index = this->idx;
    }
 
 private:
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index b8182de0be8..4b5f2a3fc57 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2350,7 +2350,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
       switch (var->data.mode) {
       case ir_var_uniform:
          entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
-                                               var->data.location);
+                                               var->data.param_index);
          this->variables.push_tail(entry);
          break;
       case ir_var_shader_in:

From bb8bbe34e3d355c457578060c5926dfc23abecfd Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 4 Feb 2016 16:48:18 +1000
Subject: [PATCH 28/94] st/mesa: handle indirect samplers in arrays/structs
 properly (v4.1)

The state tracker never handled this properly, and it finally
annoyed me for the second time so I decided to fix it properly.

This is inspired by the NIR sampler lowering code and I only realised
NIR seems to do its deref ordering different to GLSL at the last
minute, once I got that things got much easier.

it fixes a bunch of tests in
tests/spec/arb_gpu_shader5/execution/sampler_array_indexing/

v2: fix AoA tests when forced on.
I was right I didn't need all that code, fixing the AoA code
meant cleaning up a chunk of code I didn't like in the array
handling.

v3: start generalising the code a bit more for atomics.
v3.1: use UniformRemapTable

v4: handle uniforms differently using the param_index,
and go back to UniformStorage
fix issues identified by Timothy with deref handling.
v4.1: squash const fix and move handling 1D const out
of recursive function.

Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
Acked-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 141 ++++++++++++++++++---
 1 file changed, 123 insertions(+), 18 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 4b5f2a3fc57..ea59d78e154 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -40,7 +40,6 @@
 #include "main/uniforms.h"
 #include "main/shaderapi.h"
 #include "program/prog_instruction.h"
-#include "program/sampler.h"
 
 #include "pipe/p_context.h"
 #include "pipe/p_screen.h"
@@ -257,6 +256,7 @@ public:
    GLboolean cond_update;
    bool saturate;
    st_src_reg sampler; /**< sampler register */
+   int sampler_base;
    int sampler_array_size; /**< 1-based size of sampler array, 1 if not array */
    int tex_target; /**< One of TEXTURE_*_INDEX */
    glsl_base_type tex_type;
@@ -502,6 +502,19 @@ public:
 
    void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
 
+   void get_deref_offsets(ir_dereference *ir,
+                          unsigned *array_size,
+                          unsigned *base,
+                          unsigned *index,
+                          st_src_reg *reladdr);
+  void calc_deref_offsets(ir_dereference *head,
+                          ir_dereference *tail,
+                          unsigned *array_elements,
+                          unsigned *base,
+                          unsigned *index,
+                          st_src_reg *indirect,
+                          unsigned *location);
+
    bool try_emit_mad(ir_expression *ir,
               int mul_operand);
    bool try_emit_mad_for_and_not(ir_expression *ir,
@@ -3436,18 +3449,113 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
    this->result = entry->return_reg;
 }
 
+void
+glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *head,
+                                         ir_dereference *tail,
+                                         unsigned *array_elements,
+                                         unsigned *base,
+                                         unsigned *index,
+                                         st_src_reg *indirect,
+                                         unsigned *location)
+{
+   switch (tail->ir_type) {
+   case ir_type_dereference_record: {
+      ir_dereference_record *deref_record = tail->as_dereference_record();
+      const glsl_type *struct_type = deref_record->record->type;
+      int field_index = deref_record->record->type->field_index(deref_record->field);
+
+      calc_deref_offsets(head, deref_record->record->as_dereference(), array_elements, base, index, indirect, location);
+
+      assert(field_index >= 0);
+      *location += struct_type->record_location_offset(field_index);
+      break;
+   }
+
+   case ir_type_dereference_array: {
+      ir_dereference_array *deref_arr = tail->as_dereference_array();
+      ir_constant *array_index = deref_arr->array_index->constant_expression_value();
+
+      if (!array_index) {
+         st_src_reg temp_reg;
+         st_dst_reg temp_dst;
+
+         temp_reg = get_temp(glsl_type::uint_type);
+         temp_dst = st_dst_reg(temp_reg);
+         temp_dst.writemask = 1;
+
+         deref_arr->array_index->accept(this);
+         if (*array_elements != 1)
+            emit_asm(NULL, TGSI_OPCODE_MUL, temp_dst, this->result, st_src_reg_for_int(*array_elements));
+         else
+            emit_asm(NULL, TGSI_OPCODE_MOV, temp_dst, this->result);
+
+         if (indirect->file == PROGRAM_UNDEFINED)
+            *indirect = temp_reg;
+         else {
+            temp_dst = st_dst_reg(*indirect);
+            temp_dst.writemask = 1;
+            emit_asm(NULL, TGSI_OPCODE_ADD, temp_dst, *indirect, temp_reg);
+         }
+      } else
+         *index += array_index->value.u[0] * *array_elements;
+
+      *array_elements *= deref_arr->array->type->length;
+
+      calc_deref_offsets(head, deref_arr->array->as_dereference(), array_elements, base, index, indirect, location);
+      break;
+   }
+   default:
+      break;
+   }
+}
+
+void
+glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir,
+                                        unsigned *array_size,
+                                        unsigned *base,
+                                        unsigned *index,
+                                        st_src_reg *reladdr)
+{
+   GLuint shader = _mesa_program_enum_to_shader_stage(this->prog->Target);
+   unsigned location = 0;
+   ir_variable *var = ir->variable_referenced();
+
+   memset(reladdr, 0, sizeof(*reladdr));
+   reladdr->file = PROGRAM_UNDEFINED;
+
+   *base = 0;
+   *array_size = 1;
+
+   assert(var);
+   location = var->data.location;
+   calc_deref_offsets(ir, ir, array_size, base, index, reladdr, &location);
+
+   /*
+    * If we end up with no indirect then adjust the base to the index,
+    * and set the array size to 1.
+    */
+   if (reladdr->file == PROGRAM_UNDEFINED) {
+      *base = *index;
+      *array_size = 1;
+   }
+
+   if (location != 0xffffffff) {
+      *base += this->shader_program->UniformStorage[location].opaque[shader].index;
+      *index += this->shader_program->UniformStorage[location].opaque[shader].index;
+   }
+}
+
 void
 glsl_to_tgsi_visitor::visit(ir_texture *ir)
 {
    st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy;
    st_src_reg offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component;
-   st_src_reg levels_src;
+   st_src_reg levels_src, reladdr;
    st_dst_reg result_dst, coord_dst, cube_sc_dst;
    glsl_to_tgsi_instruction *inst = NULL;
    unsigned opcode = TGSI_OPCODE_NOP;
    const glsl_type *sampler_type = ir->sampler->type;
-   ir_rvalue *sampler_index =
-      _mesa_get_sampler_array_nonconst_index(ir->sampler);
+   unsigned sampler_array_size = 1, sampler_index = 0, sampler_base = 0;
    bool is_cube_array = false;
    unsigned i;
 
@@ -3669,10 +3777,10 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
       coord_dst.writemask = WRITEMASK_XYZW;
    }
 
-   if (sampler_index) {
-      sampler_index->accept(this);
-      emit_arl(ir, sampler_reladdr, this->result);
-   }
+   get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base,
+                     &sampler_index, &reladdr);
+   if (reladdr.file != PROGRAM_UNDEFINED)
+      emit_arl(ir, sampler_reladdr, reladdr);
 
    if (opcode == TGSI_OPCODE_TXD)
       inst = emit_asm(ir, opcode, result_dst, coord, dx, dy);
@@ -3705,16 +3813,13 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
    if (ir->shadow_comparitor)
       inst->tex_shadow = GL_TRUE;
 
-   inst->sampler.index = _mesa_get_sampler_uniform_value(ir->sampler,
-                                                         this->shader_program,
-                                                         this->prog);
-   if (sampler_index) {
+   inst->sampler.index = sampler_index;
+   inst->sampler_array_size = sampler_array_size;
+   inst->sampler_base = sampler_base;
+
+   if (reladdr.file != PROGRAM_UNDEFINED) {
       inst->sampler.reladdr = ralloc(mem_ctx, st_src_reg);
-      memcpy(inst->sampler.reladdr, &sampler_reladdr, sizeof(sampler_reladdr));
-      inst->sampler_array_size =
-         ir->sampler->as_dereference_array()->array->type->array_size();
-   } else {
-      inst->sampler_array_size = 1;
+      memcpy(inst->sampler.reladdr, &reladdr, sizeof(reladdr));
    }
 
    if (ir->offset) {
@@ -3915,7 +4020,7 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
    foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
       if (inst->info->is_tex) {
          for (int i = 0; i < inst->sampler_array_size; i++) {
-            unsigned idx = inst->sampler.index + i;
+            unsigned idx = inst->sampler_base + i;
             v->samplers_used |= 1 << idx;
 
             debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types));

From 90bbe3d781ce15ecd6316ee63f431a82c311878e Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@gmail.com>
Date: Fri, 5 Feb 2016 13:34:29 +1000
Subject: [PATCH 29/94] mesa: drop unused nonconst sampler functions.

Since we fixed the glsl->tgsi conversion we no longer need
this function.

Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/mesa/program/sampler.cpp | 10 ----------
 src/mesa/program/sampler.h   |  4 ----
 2 files changed, 14 deletions(-)

diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp
index f118552d64e..994495af2ae 100644
--- a/src/mesa/program/sampler.cpp
+++ b/src/mesa/program/sampler.cpp
@@ -132,13 +132,3 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler,
           getname.offset;
 }
 
-
-class ir_rvalue *
-_mesa_get_sampler_array_nonconst_index(class ir_dereference *sampler)
-{
-   ir_dereference_array *deref_arr = sampler->as_dereference_array();
-   if (!deref_arr || deref_arr->array_index->as_constant())
-      return NULL;
-
-   return deref_arr->array_index;
-}
diff --git a/src/mesa/program/sampler.h b/src/mesa/program/sampler.h
index 61c7f5851e7..397805a224f 100644
--- a/src/mesa/program/sampler.h
+++ b/src/mesa/program/sampler.h
@@ -32,8 +32,4 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler,
 				struct gl_shader_program *shader_program,
 				const struct gl_program *prog);
 
-class ir_rvalue *
-_mesa_get_sampler_array_nonconst_index(class ir_dereference *sampler);
-
-
 #endif /* SAMPLER_H */

From b74e8c89a684c5c632df9b39f15585de584148c1 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@gmail.com>
Date: Fri, 5 Feb 2016 12:00:38 +1000
Subject: [PATCH 30/94] st/mesa: add atomic AoA support

reuse the sampler deref handling code to do the same
thing for atomics.

Acked-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index ea59d78e154..ce93aec4e71 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3160,19 +3160,17 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
 
    /* Calculate the surface offset */
    st_src_reg offset;
-   ir_dereference_array *deref_array = deref->as_dereference_array();
+   unsigned array_size = 0, base = 0, index = 0;
 
-   if (deref_array) {
-      offset = get_temp(glsl_type::uint_type);
-
-      deref_array->array_index->accept(this);
+   get_deref_offsets(deref, &array_size, &base, &index, &offset);
 
+   if (offset.file != PROGRAM_UNDEFINED) {
       emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset),
-               this->result, st_src_reg_for_int(ATOMIC_COUNTER_SIZE));
+               offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE));
       emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset),
-               offset, st_src_reg_for_int(location->data.offset));
+               offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE));
    } else {
-      offset = st_src_reg_for_int(location->data.offset);
+      offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE);
    }
 
    ir->return_deref->accept(this);

From 6502b3f60e193b314bd20261a8290709a4a56674 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 4 Feb 2016 17:38:54 +1000
Subject: [PATCH 31/94] st/mesa: enable AoA for gallium drivers reporting GLSL
 1.30

Acked-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 docs/GL3.txt                           | 4 ++--
 docs/relnotes/11.2.0.html              | 1 +
 src/mesa/state_tracker/st_extensions.c | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 257fc73225c..e2aa52cc388 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -149,7 +149,7 @@ GL 4.2, GLSL 4.20:
 
 GL 4.3, GLSL 4.30:
 
-  GL_ARB_arrays_of_arrays                              DONE (i965)
+  GL_ARB_arrays_of_arrays                              DONE (all drivers that support GLSL 1.30)
   GL_ARB_ES3_compatibility                             DONE (all drivers that support GLSL 3.30)
   GL_ARB_clear_buffer_object                           DONE (all drivers)
   GL_ARB_compute_shader                                DONE (i965)
@@ -209,7 +209,7 @@ GL 4.5, GLSL 4.50:
 
 These are the extensions cherry-picked to make GLES 3.1
 GLES3.1, GLSL ES 3.1
-  GL_ARB_arrays_of_arrays                              DONE (i965)
+  GL_ARB_arrays_of_arrays                              DONE (all drivers that support GLSL 1.30)
   GL_ARB_compute_shader                                DONE (i965)
   GL_ARB_draw_indirect                                 DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_explicit_uniform_location                     DONE (all drivers that support GLSL)
diff --git a/docs/relnotes/11.2.0.html b/docs/relnotes/11.2.0.html
index 0d92ed41ee8..069eca2b70c 100644
--- a/docs/relnotes/11.2.0.html
+++ b/docs/relnotes/11.2.0.html
@@ -44,6 +44,7 @@ Note: some of the new features are only available with certain drivers.
 </p>
 
 <ul>
+<li>GL_ARB_arrays_of_arrays on all gallium drivers that provide GLSL 1.30</li>
 <li>GL_ARB_base_instance on freedreno/a4xx</li>
 <li>GL_ARB_compute_shader on i965</li>
 <li>GL_ARB_copy_image on r600</li>
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index f25bd742f79..feabe6290eb 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -808,6 +808,7 @@ void st_init_extensions(struct pipe_screen *screen,
       }
 
       extensions->EXT_shader_integer_mix = GL_TRUE;
+      extensions->ARB_arrays_of_arrays = GL_TRUE;
    } else {
       /* Optional integer support for GLSL 1.2. */
       if (screen->get_shader_param(screen, PIPE_SHADER_VERTEX,

From d0e1d6b7e27bf5f05436e47080d326d7daa63af2 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 4 Feb 2016 08:10:02 -0800
Subject: [PATCH 32/94] i965: Don't add barrier deps for FB write messages.

There are never render target reads, so there are no scheduling hazards.

Giving the extra flexibility to the scheduler makes it possible to do
FB writes as soon as their sources are available, reducing register
pressure.  It also makes it possible to do the payload setup for more
than one FB write message at a time, which could better hide latency.

shader-db results on Skylake:

total instructions in shared programs: 9110254 -> 9110211 (-0.00%)
instructions in affected programs: 2898 -> 2855 (-1.48%)
helped: 3
HURT:   0
LOST:   0
GAINED: 1

A reduction in instruction counts is surprising, but legitimate:
the three shaders helped were spilling, and reducing register
pressure allowed us to issue fewer spills/fills.

total cycles in shared programs: 69035108 -> 68928820 (-0.15%)
cycles in affected programs: 4412402 -> 4306114 (-2.41%)
helped: 4457
HURT: 213

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 60f7fd9cfcd..4f97577515a 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -939,8 +939,9 @@ fs_instruction_scheduler::calculate_deps()
    foreach_in_list(schedule_node, n, &instructions) {
       fs_inst *inst = (fs_inst *)n->inst;
 
-      if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT ||
-         inst->has_side_effects())
+      if ((inst->opcode == FS_OPCODE_PLACEHOLDER_HALT ||
+           inst->has_side_effects()) &&
+          inst->opcode != FS_OPCODE_FB_WRITE)
          add_barrier_deps(n);
 
       /* read-after-write deps. */
@@ -1195,7 +1196,7 @@ vec4_instruction_scheduler::calculate_deps()
    foreach_in_list(schedule_node, n, &instructions) {
       vec4_instruction *inst = (vec4_instruction *)n->inst;
 
-      if (inst->has_side_effects())
+      if (inst->has_side_effects() && inst->opcode != FS_OPCODE_FB_WRITE)
          add_barrier_deps(n);
 
       /* read-after-write deps. */

From edc108765e71795755efcf7be479e3bcdd7b7ecf Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Sat, 6 Feb 2016 09:30:00 +1100
Subject: [PATCH 33/94] mesa: compute sampler index in ir_to_mesa rather than
 using UniformHash

The aim of this is to work towards removing UniformHash from the program
struct so that we don't need to hold onto it in memory and pass it around
outside the linker.

Reviewed-by: Dave Airlie <airlied@redhat.com>
---
 src/mesa/program/ir_to_mesa.cpp | 81 +++++++++++++++++++++++++++++++--
 1 file changed, 78 insertions(+), 3 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 68cc4a5e0cd..f2902c9f863 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1539,6 +1539,82 @@ get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v)
    return dst_reg(v->result);
 }
 
+/* Calculate the sampler index and also calculate the base uniform location
+ * for struct members.
+ */
+static void
+calc_sampler_offsets(struct gl_shader_program *prog, ir_dereference *deref,
+                     unsigned *offset, unsigned *array_elements,
+                     unsigned *location)
+{
+   if (deref->ir_type == ir_type_dereference_variable)
+      return;
+
+   switch (deref->ir_type) {
+   case ir_type_dereference_array: {
+      ir_dereference_array *deref_arr = deref->as_dereference_array();
+      ir_constant *array_index =
+         deref_arr->array_index->constant_expression_value();
+
+      if (!array_index) {
+	 /* GLSL 1.10 and 1.20 allowed variable sampler array indices,
+	  * while GLSL 1.30 requires that the array indices be
+	  * constant integer expressions.  We don't expect any driver
+	  * to actually work with a really variable array index, so
+	  * all that would work would be an unrolled loop counter that ends
+	  * up being constant above.
+	  */
+	 ralloc_strcat(&prog->InfoLog,
+		       "warning: Variable sampler array index unsupported.\n"
+		       "This feature of the language was removed in GLSL 1.20 "
+		       "and is unlikely to be supported for 1.10 in Mesa.\n");
+      } else {
+         *offset += array_index->value.u[0] * *array_elements;
+      }
+
+      *array_elements *= deref_arr->array->type->length;
+
+      calc_sampler_offsets(prog, deref_arr->array->as_dereference(),
+                           offset, array_elements, location);
+      break;
+   }
+
+   case ir_type_dereference_record: {
+      ir_dereference_record *deref_record = deref->as_dereference_record();
+      unsigned field_index =
+         deref_record->record->type->field_index(deref_record->field);
+      *location +=
+         deref_record->record->type->record_location_offset(field_index);
+      calc_sampler_offsets(prog, deref_record->record->as_dereference(),
+                           offset, array_elements, location);
+      break;
+   }
+
+   default:
+      unreachable("Invalid deref type");
+      break;
+   }
+}
+
+static int
+get_sampler_uniform_value(class ir_dereference *sampler,
+                          struct gl_shader_program *shader_program,
+                          const struct gl_program *prog)
+{
+   GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target);
+   ir_variable *var = sampler->variable_referenced();
+   unsigned location = var->data.location;
+   unsigned array_elements = 1;
+   unsigned offset = 0;
+
+   calc_sampler_offsets(shader_program, sampler, &offset, &array_elements,
+                        &location);
+
+   assert(shader_program->UniformStorage[location].opaque[shader].active);
+   return shader_program->UniformStorage[location].opaque[shader].index +
+          offset;
+}
+
 /**
  * Process the condition of a conditional assignment
  *
@@ -1988,9 +2064,8 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
    if (ir->shadow_comparitor)
       inst->tex_shadow = GL_TRUE;
 
-   inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
-						   this->shader_program,
-						   this->prog);
+   inst->sampler = get_sampler_uniform_value(ir->sampler, shader_program,
+                                             prog);
 
    switch (sampler_type->sampler_dimensionality) {
    case GLSL_SAMPLER_DIM_1D:

From 184afd8fd9e7891322224f57a12c2e0fe52b46cb Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Sat, 6 Feb 2016 09:38:55 +1100
Subject: [PATCH 34/94] mesa: remove now unused sampler index handing code

Reviewed-by: Dave Airlie <airlied@redhat.com>
---
 src/mesa/Makefile.sources       |   2 -
 src/mesa/program/ir_to_mesa.cpp |   1 -
 src/mesa/program/sampler.cpp    | 134 --------------------------------
 src/mesa/program/sampler.h      |  35 ---------
 4 files changed, 172 deletions(-)
 delete mode 100644 src/mesa/program/sampler.cpp
 delete mode 100644 src/mesa/program/sampler.h

diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 35405e7d1e0..6669f295399 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -532,8 +532,6 @@ PROGRAM_FILES = \
 	program/program_parser.h \
 	program/prog_statevars.c \
 	program/prog_statevars.h \
-	program/sampler.cpp \
-	program/sampler.h \
 	program/string_to_uint_map.cpp \
 	program/symbol_table.c \
 	program/symbol_table.h
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index f2902c9f863..a5e32746362 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -51,7 +51,6 @@
 #include "program/prog_print.h"
 #include "program/program.h"
 #include "program/prog_parameter.h"
-#include "program/sampler.h"
 
 
 static int swizzle_for_size(int size);
diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp
deleted file mode 100644
index 994495af2ae..00000000000
--- a/src/mesa/program/sampler.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
- * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
- * Copyright © 2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/mtypes.h"
-#include "compiler/glsl_types.h"
-#include "compiler/glsl/ir.h"
-#include "compiler/glsl/ir_uniform.h"
-#include "compiler/glsl/ir_visitor.h"
-#include "compiler/glsl/program.h"
-#include "program/hash_table.h"
-#include "program/prog_parameter.h"
-#include "program/program.h"
-
-
-class get_sampler_name : public ir_hierarchical_visitor
-{
-public:
-   get_sampler_name(ir_dereference *last,
-		    struct gl_shader_program *shader_program)
-   {
-      this->mem_ctx = ralloc_context(NULL);
-      this->shader_program = shader_program;
-      this->name = NULL;
-      this->offset = 0;
-      this->last = last;
-   }
-
-   ~get_sampler_name()
-   {
-      ralloc_free(this->mem_ctx);
-   }
-
-   virtual ir_visitor_status visit(ir_dereference_variable *ir)
-   {
-      this->name = ir->var->name;
-      return visit_continue;
-   }
-
-   virtual ir_visitor_status visit_leave(ir_dereference_record *ir)
-   {
-      this->name = ralloc_asprintf(mem_ctx, "%s.%s", name, ir->field);
-      return visit_continue;
-   }
-
-   virtual ir_visitor_status visit_leave(ir_dereference_array *ir)
-   {
-      ir_constant *index = ir->array_index->as_constant();
-      int i;
-
-      if (index) {
-	 i = index->value.i[0];
-      } else {
-	 /* GLSL 1.10 and 1.20 allowed variable sampler array indices,
-	  * while GLSL 1.30 requires that the array indices be
-	  * constant integer expressions.  We don't expect any driver
-	  * to actually work with a really variable array index, so
-	  * all that would work would be an unrolled loop counter that ends
-	  * up being constant above.
-	  */
-	 ralloc_strcat(&shader_program->InfoLog,
-		       "warning: Variable sampler array index unsupported.\n"
-		       "This feature of the language was removed in GLSL 1.20 "
-		       "and is unlikely to be supported for 1.10 in Mesa.\n");
-	 i = 0;
-      }
-      if (ir != last) {
-	 this->name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i);
-      } else {
-	 offset = i;
-      }
-      return visit_continue;
-   }
-
-   struct gl_shader_program *shader_program;
-   const char *name;
-   void *mem_ctx;
-   int offset;
-   ir_dereference *last;
-};
-
-
-int
-_mesa_get_sampler_uniform_value(class ir_dereference *sampler,
-				struct gl_shader_program *shader_program,
-				const struct gl_program *prog)
-{
-   get_sampler_name getname(sampler, shader_program);
-
-   GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target);
-
-   sampler->accept(&getname);
-
-   unsigned location;
-   if (!shader_program->UniformHash->get(location, getname.name)) {
-      linker_error(shader_program,
-		   "failed to find sampler named %s.\n", getname.name);
-      return 0;
-   }
-
-   if (!shader_program->UniformStorage[location].opaque[shader].active) {
-      assert(0 && "cannot return a sampler");
-      linker_error(shader_program,
-		   "cannot return a sampler named %s, because it is not "
-                   "used in this shader stage. This is a driver bug.\n",
-                   getname.name);
-      return 0;
-   }
-
-   return shader_program->UniformStorage[location].opaque[shader].index +
-          getname.offset;
-}
-
diff --git a/src/mesa/program/sampler.h b/src/mesa/program/sampler.h
deleted file mode 100644
index 397805a224f..00000000000
--- a/src/mesa/program/sampler.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
- * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
- * Copyright © 2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SAMPLER_H
-#define SAMPLER_H
-
-
-int
-_mesa_get_sampler_uniform_value(class ir_dereference *sampler,
-				struct gl_shader_program *shader_program,
-				const struct gl_program *prog);
-
-#endif /* SAMPLER_H */

From 74f956c416d5b0b37b4c2d6b957167bb203502c3 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 21 Jan 2016 16:37:20 -0800
Subject: [PATCH 35/94] i965: Use nir_lower_load_const_to_scalar().

I don't know why, but we never hooked up this pass Eric wrote.
Otherwise, you can end up with stupid scalarized code such as:

   vec4 ssa_7 = load_const (0.0, 0.0, 0.0, 0.0)
   vec4 ssa_8 = ...
   vec1 ssa_9 = feq ssa_8, ssa_7
   vec1 ssa_10 = feq ssa_8.y, ssa_7.y
   vec1 ssa_11 = feq ssa_8, ssa_7.z
   vec1 ssa_12 = feq ssa_8.y, ssa_7.w

ssa_8.xyxy == <0, 0, 0, 0> should only take two feq instructions.

shader-db on Skylake:

total instructions in shared programs: 9121153 -> 9120749 (-0.00%)
instructions in affected programs: 32421 -> 32017 (-1.25%)
helped: 277
HURT: 69

total cycles in shared programs: 69003364 -> 69000912 (-0.00%)
cycles in affected programs: 899186 -> 896734 (-0.27%)
helped: 313
HURT: 403

This also prevents regressions when disabling channel expressions.

v2: Don't call opt_cse afterwards (requested by Matt).  It should
    happen in the optimization loop below anyway.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_nir.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 287f935d539..46b51163579 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -482,6 +482,10 @@ brw_preprocess_nir(nir_shader *nir, bool is_scalar)
 
    nir = nir_optimize(nir, is_scalar);
 
+   if (is_scalar) {
+      OPT_V(nir_lower_load_const_to_scalar);
+   }
+
    /* Lower a bunch of stuff */
    OPT_V(nir_lower_var_copies);
 

From 3fd42807593eb68d83a36215fc5fc49b3adc7724 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Fri, 5 Feb 2016 13:08:19 +1100
Subject: [PATCH 36/94] glsl: validate arrays of arrays on empty type
 delclarations

Fixes:
dEQP-GLES31.functional.shaders.arrays_of_arrays.invalid.empty_declaration_without_var_name_fragment
dEQP-GLES31.functional.shaders.arrays_of_arrays.invalid.empty_declaration_without_var_name_vertex

Reviewed-by: Dave Airlie <airlied@redhat.com>
---
 src/compiler/glsl/ast_to_hir.cpp | 65 +++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 26 deletions(-)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 3fca18a5087..92f4988c339 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -4210,33 +4210,46 @@ ast_declarator_list::hir(exec_list *instructions,
          _mesa_glsl_error(&loc, state,
                           "invalid type `%s' in empty declaration",
                           type_name);
-      } else if (decl_type->base_type == GLSL_TYPE_ATOMIC_UINT) {
-         /* Empty atomic counter declarations are allowed and useful
-          * to set the default offset qualifier.
-          */
-         return NULL;
-      } else if (this->type->qualifier.precision != ast_precision_none) {
-         if (this->type->specifier->structure != NULL) {
-            _mesa_glsl_error(&loc, state,
-                             "precision qualifiers can't be applied "
-                             "to structures");
-         } else {
-            static const char *const precision_names[] = {
-               "highp",
-               "highp",
-               "mediump",
-               "lowp"
-            };
-
-            _mesa_glsl_warning(&loc, state,
-                               "empty declaration with precision qualifier, "
-                               "to set the default precision, use "
-                               "`precision %s %s;'",
-                               precision_names[this->type->qualifier.precision],
-                               type_name);
+      } else {
+         if (decl_type->base_type == GLSL_TYPE_ARRAY) {
+            /* From Section 4.12 (Empty Declarations) of the GLSL 4.5 spec:
+             *
+             *    "The combinations of types and qualifiers that cause
+             *    compile-time or link-time errors are the same whether or not
+             *    the declaration is empty."
+             */
+            validate_array_dimensions(decl_type, state, &loc);
+         }
+
+         if (decl_type->base_type == GLSL_TYPE_ATOMIC_UINT) {
+            /* Empty atomic counter declarations are allowed and useful
+             * to set the default offset qualifier.
+             */
+            return NULL;
+         } else if (this->type->qualifier.precision != ast_precision_none) {
+            if (this->type->specifier->structure != NULL) {
+               _mesa_glsl_error(&loc, state,
+                                "precision qualifiers can't be applied "
+                                "to structures");
+            } else {
+               static const char *const precision_names[] = {
+                  "highp",
+                  "highp",
+                  "mediump",
+                  "lowp"
+               };
+
+               _mesa_glsl_warning(&loc, state,
+                                  "empty declaration with precision "
+                                  "qualifier, to set the default precision, "
+                                  "use `precision %s %s;'",
+                                  precision_names[this->type->
+                                     qualifier.precision],
+                                  type_name);
+            }
+         } else if (this->type->specifier->structure == NULL) {
+            _mesa_glsl_warning(&loc, state, "empty declaration");
          }
-      } else if (this->type->specifier->structure == NULL) {
-         _mesa_glsl_warning(&loc, state, "empty declaration");
       }
    }
 

From 9f02e3ab03da91162fc0c64b91a2e5ff213dff9b Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 1 Feb 2016 16:34:46 -0800
Subject: [PATCH 37/94] nir: Add opt_algebraic rules for xor with zero.

instructions in affected programs: 668 -> 664 (-0.60%)
helped: 4

Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/compiler/nir/nir_opt_algebraic.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index e7765faa146..60df69fadbd 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -149,6 +149,8 @@ optimizations = [
    (('ior', a, 0), a),
    (('fxor', a, a), 0.0),
    (('ixor', a, a), 0),
+   (('fxor', a, 0.0), a),
+   (('ixor', a, 0), a),
    (('inot', ('inot', a)), a),
    # DeMorgan's Laws
    (('iand', ('inot', a), ('inot', b)), ('inot', ('ior',  a, b))),

From a8f0960816e868783d56edac1851a6d218e09c5a Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Thu, 4 Feb 2016 12:00:15 -0800
Subject: [PATCH 38/94] nir: Recognize product of open-coded pow()s.

Prevents regressions in the next commit.

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/compiler/nir/nir_opt_algebraic.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 60df69fadbd..9b82bac6f6c 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -167,6 +167,8 @@ optimizations = [
    (('flog2', ('fexp2', a)), a), # lg2(2^a) = a
    (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
    (('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
+   (('fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),
+    ('fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d
    (('fpow', a, 1.0), a),
    (('fpow', a, 2.0), ('fmul', a, a)),
    (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),

From 7be8d07732295bd546130d9c8c8fb8ff64490686 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Thu, 4 Feb 2016 12:04:42 -0800
Subject: [PATCH 39/94] nir: Do opt_algebraic in reverse order.

Walking the SSA definitions in order means that we consider the smallest
algebraic optimizations before larger optimizations. So if a smaller
rule is part of a larger rule, the smaller one will happen first,
preventing the larger one from happening.

instructions in affected programs: 32721 -> 32611 (-0.34%)
helped: 106

In programs whose nir_optimize loop count changes (129 of them):

   before:  1164 optimization loops
   after:   1071 optimization loops

Of the 129 affected, 16 programs' optimization loop counts increased.

Prevents regressions and annoyances in the next commits.

Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/compiler/nir/nir_algebraic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py
index a30652f2afd..77ad35eda2a 100644
--- a/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@@ -216,7 +216,7 @@ ${pass_name}_block(nir_block *block, void *void_state)
 {
    struct opt_state *state = void_state;
 
-   nir_foreach_instr_safe(block, instr) {
+   nir_foreach_instr_reverse_safe(block, instr) {
       if (instr->type != nir_instr_type_alu)
          continue;
 
@@ -255,7 +255,7 @@ ${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags)
    state.progress = false;
    state.condition_flags = condition_flags;
 
-   nir_foreach_block(impl, ${pass_name}_block, &state);
+   nir_foreach_block_reverse(impl, ${pass_name}_block, &state);
 
    if (state.progress)
       nir_metadata_preserve(impl, nir_metadata_block_index |

From 2d0d9755da92f7153c1390728fa448b9978e9135 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 1 Feb 2016 16:35:41 -0800
Subject: [PATCH 40/94] nir: Handle large unsigned values in opt_algebraic.

The next patch adds an algebraic rule that uses the constant 0xff00ff00.

Without this change, the build fails with

   return hex(struct.unpack('I', struct.pack('i', self.value))[0])
   struct.error: 'i' format requires -2147483648 <= number <= 2147483647

The hex() function handles integers of any size, and assigning a
negative value to an unsigned does what we want in C. The pack/unpack is
unnecessary (and as we see, buggy).

Reviewed-by: Dylan Baker <baker.dylan.c@gmail.com>
---
 src/compiler/nir/nir_algebraic.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py
index 77ad35eda2a..2357b57117a 100644
--- a/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@@ -102,13 +102,10 @@ class Constant(Value):
       self.value = val
 
    def __hex__(self):
-      # Even if it's an integer, we still need to unpack as an unsigned
-      # int.  This is because, without C99, we can only assign to the first
-      # element of a union in an initializer.
       if isinstance(self.value, (bool)):
          return 'NIR_TRUE' if self.value else 'NIR_FALSE'
       if isinstance(self.value, (int, long)):
-         return hex(struct.unpack('I', struct.pack('i', self.value))[0])
+         return hex(self.value)
       elif isinstance(self.value, float):
          return hex(struct.unpack('I', struct.pack('f', self.value))[0])
       else:

From 371c4b3c48f665d530bc0c95990ef47f0e4c1b87 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 1 Feb 2016 16:44:18 -0800
Subject: [PATCH 41/94] nir: Recognize open-coded bitfield_reverse.

Helps 11 shaders in UnrealEngine4 demos.

I seriously hope they would have given us bitfieldReverse() if we
exposed GL 4.0 (but we do expose ARB_gpu_shader5, so why not use that
anyway?).

instructions in affected programs: 4875 -> 4633 (-4.96%)
cycles in affected programs: 270516 -> 244516 (-9.61%)

I suspect there's a *lot* of room to improve nir_search/opt_algebraic's
handling of this. We'd actually like to match, e.g., step2 by matching
step1 once and then doing a pointer comparison for the second instance
of step1, but unfortunately we generate an enormous tuple for instead.

The .text size increases by 6.5% and the .data by 17.5%.

   text     data  bss    dec    hex  filename
  22957    45224    0  68181  10a55  nir_libnir_la-nir_opt_algebraic.o
  24461    53160    0  77621  12f35  nir_libnir_la-nir_opt_algebraic.o

I'd be happy to remove this if Unreal4 uses bitfieldReverse() if it is
in a GL 4.0 context once we expose GL 4.0.

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/compiler/nir/nir_opt_algebraic.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 9b82bac6f6c..cc2c2299ab9 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -312,6 +312,19 @@ optimizations = [
      'options->lower_unpack_snorm_4x8'),
 ]
 
+# Unreal Engine 4 demo applications open-codes bitfieldReverse()
+def bitfield_reverse(u):
+    step1 = ('ior', ('ishl', u, 16), ('ushr', u, 16))
+    step2 = ('ior', ('ishl', ('iand', step1, 0x00ff00ff), 8), ('ushr', ('iand', step1, 0xff00ff00), 8))
+    step3 = ('ior', ('ishl', ('iand', step2, 0x0f0f0f0f), 4), ('ushr', ('iand', step2, 0xf0f0f0f0), 4))
+    step4 = ('ior', ('ishl', ('iand', step3, 0x33333333), 2), ('ushr', ('iand', step3, 0xcccccccc), 2))
+    step5 = ('ior', ('ishl', ('iand', step4, 0x55555555), 1), ('ushr', ('iand', step4, 0xaaaaaaaa), 1))
+
+    return step5
+
+optimizations += [(bitfield_reverse('x'), ('bitfield_reverse', 'x'))]
+
+
 # Add optimizations to handle the case where the result of a ternary is
 # compared to a constant.  This way we can take things like
 #

From 76cfb472077dc83c892b4cddf79333341deaa7b5 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Mon, 25 Jan 2016 21:56:18 +1100
Subject: [PATCH 42/94] glsl: don't attempt to link empty program

Previously an empty program would go through the entire
link_shaders() function and we would have to be careful
not to cause a segfault.

In core profile also now set link_status to false by
generating an error, it was previously set to true.

From Section 7.3 (PROGRAM OBJECTS) of the OpenGL 4.5 spec:

   "Linking can fail for a variety of reasons as specified in the
   OpenGL Shading Language Specification, as well as any of the
   following reasons:

    - No shader objects are attached to program."

V2: Only generate an error in core profile and add spec quote (Ian)

V3: generate error in ES too, remove previous check which was only
applying the rule to GL 4.5/ES 3.1 and above. My understand is that
this spec change is clarifying previously undefined behaviour and
therefore should be applied retrospectively. The ES CTS tests for
this are in ES 2 I suspect it was passing because it would have
generated an error for not having both a vertex and fragment shader.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/compiler/glsl/linker.cpp | 46 ++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index f1ac53abb0a..31efb57b035 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -4106,15 +4106,34 @@ disable_varying_optimizations_for_sso(struct gl_shader_program *prog)
 void
 link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
 {
+   prog->LinkStatus = true; /* All error paths will set this to false */
+   prog->Validated = false;
+   prog->_Used = false;
+
+   /* Section 7.3 (Program Objects) of the OpenGL 4.5 Core Profile spec says:
+    *
+    *     "Linking can fail for a variety of reasons as specified in the
+    *     OpenGL Shading Language Specification, as well as any of the
+    *     following reasons:
+    *
+    *     - No shader objects are attached to program."
+    *
+    * The Compatibility Profile specification does not list the error.  In
+    * Compatibility Profile missing shader stages are replaced by
+    * fixed-function.  This applies to the case where all stages are
+    * missing.
+    */
+   if (prog->NumShaders == 0) {
+      if (ctx->API != API_OPENGL_COMPAT)
+         linker_error(prog, "no shaders attached to the program\n");
+      return;
+   }
+
    tfeedback_decl *tfeedback_decls = NULL;
    unsigned num_tfeedback_decls = prog->TransformFeedback.NumVarying;
 
    void *mem_ctx = ralloc_context(NULL); // temporary linker context
 
-   prog->LinkStatus = true; /* All error paths will set this to false */
-   prog->Validated = false;
-   prog->_Used = false;
-
    prog->ARB_fragment_coord_conventions_enable = false;
 
    /* Separate the shaders into groups based on their type.
@@ -4163,25 +4182,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
    prog->Version = max_version;
    prog->IsES = is_es_prog;
 
-   /* From OpenGL 4.5 Core specification (7.3 Program Objects):
-    *     "Linking can fail for a variety of reasons as specified in the OpenGL
-    *     Shading Language Specification, as well as any of the following
-    *     reasons:
-    *
-    *     * No shader objects are attached to program.
-    *
-    *     ..."
-    *
-    *     Same rule applies for OpenGL ES >= 3.1.
-    */
-
-   if (prog->NumShaders == 0 &&
-       ((ctx->API == API_OPENGL_CORE && ctx->Version >= 45) ||
-        (ctx->API == API_OPENGLES2 && ctx->Version >= 31))) {
-      linker_error(prog, "No shader objects are attached to program.\n");
-      goto done;
-   }
-
    /* Some shaders have to be linked with some other shaders present.
     */
    if (num_shaders[MESA_SHADER_GEOMETRY] > 0 &&

From 20823992b41285f0fed77a4ba6f421420799c819 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Wed, 27 Jan 2016 15:34:53 +1100
Subject: [PATCH 43/94] glsl: small tidy up now that link_shaders() exits early
 with 0 shaders

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/compiler/glsl/linker.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 31efb57b035..69830937d3b 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -4149,13 +4149,11 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
 
    unsigned min_version = UINT_MAX;
    unsigned max_version = 0;
-   const bool is_es_prog =
-      (prog->NumShaders > 0 && prog->Shaders[0]->IsES) ? true : false;
    for (unsigned i = 0; i < prog->NumShaders; i++) {
       min_version = MIN2(min_version, prog->Shaders[i]->Version);
       max_version = MAX2(max_version, prog->Shaders[i]->Version);
 
-      if (prog->Shaders[i]->IsES != is_es_prog) {
+      if (prog->Shaders[i]->IsES != prog->Shaders[0]->IsES) {
 	 linker_error(prog, "all shaders must use same shading "
 		      "language version\n");
 	 goto done;
@@ -4173,14 +4171,14 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
    /* In desktop GLSL, different shader versions may be linked together.  In
     * GLSL ES, all shader versions must be the same.
     */
-   if (is_es_prog && min_version != max_version) {
+   if (prog->Shaders[0]->IsES && min_version != max_version) {
       linker_error(prog, "all shaders must use same shading "
 		   "language version\n");
       goto done;
    }
 
    prog->Version = max_version;
-   prog->IsES = is_es_prog;
+   prog->IsES = prog->Shaders[0]->IsES;
 
    /* Some shaders have to be linked with some other shaders present.
     */
@@ -4363,7 +4361,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
     *
     * This rule also applies to GLSL ES 3.00.
     */
-   if (max_version >= (is_es_prog ? 300 : 130)) {
+   if (max_version >= (prog->IsES ? 300 : 130)) {
       struct gl_shader *sh = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
       if (sh) {
 	 lower_discard_flow(sh->ir);

From 55fa3c44bc00a7761c2616bcea7eed7d5a775ffc Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Wed, 27 Jan 2016 15:42:58 +1100
Subject: [PATCH 44/94] glsl: simplify required stages for linking rules

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/compiler/glsl/linker.cpp | 84 ++++++++++++++++++------------------
 1 file changed, 41 insertions(+), 43 deletions(-)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 69830937d3b..a370643197b 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -4182,50 +4182,48 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
 
    /* Some shaders have to be linked with some other shaders present.
     */
-   if (num_shaders[MESA_SHADER_GEOMETRY] > 0 &&
-       num_shaders[MESA_SHADER_VERTEX] == 0 &&
-       !prog->SeparateShader) {
-      linker_error(prog, "Geometry shader must be linked with "
-		   "vertex shader\n");
-      goto done;
-   }
-   if (num_shaders[MESA_SHADER_TESS_EVAL] > 0 &&
-       num_shaders[MESA_SHADER_VERTEX] == 0 &&
-       !prog->SeparateShader) {
-      linker_error(prog, "Tessellation evaluation shader must be linked with "
-		   "vertex shader\n");
-      goto done;
-   }
-   if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 &&
-       num_shaders[MESA_SHADER_VERTEX] == 0 &&
-       !prog->SeparateShader) {
-      linker_error(prog, "Tessellation control shader must be linked with "
-		   "vertex shader\n");
-      goto done;
-   }
+   if (!prog->SeparateShader) {
+      if (num_shaders[MESA_SHADER_GEOMETRY] > 0 &&
+          num_shaders[MESA_SHADER_VERTEX] == 0) {
+         linker_error(prog, "Geometry shader must be linked with "
+		      "vertex shader\n");
+         goto done;
+      }
+      if (num_shaders[MESA_SHADER_TESS_EVAL] > 0 &&
+          num_shaders[MESA_SHADER_VERTEX] == 0) {
+         linker_error(prog, "Tessellation evaluation shader must be linked "
+		      "with vertex shader\n");
+         goto done;
+      }
+      if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 &&
+          num_shaders[MESA_SHADER_VERTEX] == 0) {
+         linker_error(prog, "Tessellation control shader must be linked with "
+		      "vertex shader\n");
+         goto done;
+      }
 
-   /* The spec is self-contradictory here. It allows linking without a tess
-    * eval shader, but that can only be used with transform feedback and
-    * rasterization disabled. However, transform feedback isn't allowed
-    * with GL_PATCHES, so it can't be used.
-    *
-    * More investigation showed that the idea of transform feedback after
-    * a tess control shader was dropped, because some hw vendors couldn't
-    * support tessellation without a tess eval shader, but the linker section
-    * wasn't updated to reflect that.
-    *
-    * All specifications (ARB_tessellation_shader, GL 4.0-4.5) have this
-    * spec bug.
-    *
-    * Do what's reasonable and always require a tess eval shader if a tess
-    * control shader is present.
-    */
-   if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 &&
-       num_shaders[MESA_SHADER_TESS_EVAL] == 0 &&
-       !prog->SeparateShader) {
-      linker_error(prog, "Tessellation control shader must be linked with "
-		   "tessellation evaluation shader\n");
-      goto done;
+      /* The spec is self-contradictory here. It allows linking without a tess
+       * eval shader, but that can only be used with transform feedback and
+       * rasterization disabled. However, transform feedback isn't allowed
+       * with GL_PATCHES, so it can't be used.
+       *
+       * More investigation showed that the idea of transform feedback after
+       * a tess control shader was dropped, because some hw vendors couldn't
+       * support tessellation without a tess eval shader, but the linker
+       * section wasn't updated to reflect that.
+       *
+       * All specifications (ARB_tessellation_shader, GL 4.0-4.5) have this
+       * spec bug.
+       *
+       * Do what's reasonable and always require a tess eval shader if a tess
+       * control shader is present.
+       */
+      if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 &&
+          num_shaders[MESA_SHADER_TESS_EVAL] == 0) {
+         linker_error(prog, "Tessellation control shader must be linked with "
+		      "tessellation evaluation shader\n");
+         goto done;
+      }
    }
 
    /* Compute shaders have additional restrictions. */

From fd0b89ad8d7ba10045683e4768a89811c8633a85 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Wed, 27 Jan 2016 16:16:01 +1100
Subject: [PATCH 45/94] glsl: simplify ES Vertex/Fragment shader requirements

We really just needed to skip the existing ES < 3.1 check if we have
a compute shader, all other scenarios are already covered.

* No shaders is a link error.
* Geom or Tess without Vertex is a link error which means we always
  require a Vertex shader and hence a Fragment shader.
* Finally a Compute shader linked with any other stage is a link error.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/compiler/glsl/linker.cpp | 56 ++++++++++++++++++------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index a370643197b..c6fdbe999ec 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -4566,38 +4566,38 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
    if (!prog->LinkStatus)
       goto done;
 
-   /* OpenGL ES requires that a vertex shader and a fragment shader both be
-    * present in a linked program. GL_ARB_ES2_compatibility doesn't say
+   /* OpenGL ES < 3.1 requires that a vertex shader and a fragment shader both
+    * be present in a linked program. GL_ARB_ES2_compatibility doesn't say
     * anything about shader linking when one of the shaders (vertex or
     * fragment shader) is absent. So, the extension shouldn't change the
     * behavior specified in GLSL specification.
+    *
+    * From OpenGL ES 3.1 specification (7.3 Program Objects):
+    *     "Linking can fail for a variety of reasons as specified in the
+    *     OpenGL ES Shading Language Specification, as well as any of the
+    *     following reasons:
+    *
+    *     ...
+    *
+    *     * program contains objects to form either a vertex shader or
+    *       fragment shader, and program is not separable, and does not
+    *       contain objects to form both a vertex shader and fragment
+    *       shader."
+    *
+    * However, the only scenario in 3.1+ where we don't require them both is
+    * when we have a compute shader. For example:
+    *
+    * - No shaders is a link error.
+    * - Geom or Tess without a Vertex shader is a link error which means we
+    *   always require a Vertex shader and hence a Fragment shader.
+    * - Finally a Compute shader linked with any other stage is a link error.
     */
-   if (!prog->SeparateShader && ctx->API == API_OPENGLES2) {
-      /* With ES < 3.1 one needs to have always vertex + fragment shader. */
-      if (ctx->Version < 31) {
-         if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) {
-	    linker_error(prog, "program lacks a vertex shader\n");
-         } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) {
-	    linker_error(prog, "program lacks a fragment shader\n");
-         }
-      } else {
-         /* From OpenGL ES 3.1 specification (7.3 Program Objects):
-          *     "Linking can fail for a variety of reasons as specified in the
-          *     OpenGL ES Shading Language Specification, as well as any of the
-          *     following reasons:
-          *
-          *     ...
-          *
-          *     * program contains objects to form either a vertex shader or
-          *       fragment shader, and program is not separable, and does not
-          *       contain objects to form both a vertex shader and fragment
-          *       shader."
-          */
-         if (!!prog->_LinkedShaders[MESA_SHADER_VERTEX] ^
-             !!prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) {
-            linker_error(prog, "Program needs to contain both vertex and "
-                         "fragment shaders.\n");
-         }
+   if (!prog->SeparateShader && ctx->API == API_OPENGLES2 &&
+       num_shaders[MESA_SHADER_COMPUTE] == 0) {
+      if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) {
+	 linker_error(prog, "program lacks a vertex shader\n");
+      } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) {
+	 linker_error(prog, "program lacks a fragment shader\n");
       }
    }
 

From 9dd6a4ea793dd050cebbacbd5f429d4e8e57ee26 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Sat, 23 Jan 2016 09:08:23 +1100
Subject: [PATCH 46/94] glsl: clean up and fix bug in varying linking rules

The existing code was very hard to follow and has been the source
of at least 3 bugs in the past year.

The existing code also has a bug for SSO where if we have a
multi-stage SSO for example a tes -> gs program, if we try to use
transform feedback with gs the existing code would look for the
transform feedback varyings in the tes stage and fail as it can't
find them.

V2: Add more code comments, always try to remove unused inputs
to the first stage.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/compiler/glsl/linker.cpp | 145 ++++++++++++++++-------------------
 1 file changed, 67 insertions(+), 78 deletions(-)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index c6fdbe999ec..a245a11046a 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -4462,93 +4462,82 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
          goto done;
    }
 
-   /* Linking the stages in the opposite order (from fragment to vertex)
-    * ensures that inter-shader outputs written to in an earlier stage are
-    * eliminated if they are (transitively) not used in a later stage.
+   /* If there is no fragment shader we need to set transform feedback.
+    *
+    * For SSO we need also need to assign output locations, we assign them
+    * here because we need to do it for both single stage programs and multi
+    * stage programs.
     */
-   int next;
+   if (last < MESA_SHADER_FRAGMENT &&
+       (num_tfeedback_decls != 0 || prog->SeparateShader)) {
+      if (!assign_varying_locations(ctx, mem_ctx, prog,
+                                    prog->_LinkedShaders[last], NULL,
+                                    num_tfeedback_decls, tfeedback_decls))
+         goto done;
+   }
 
-   if (first < MESA_SHADER_FRAGMENT) {
-      gl_shader *const sh = prog->_LinkedShaders[last];
-
-      if (first != MESA_SHADER_VERTEX) {
-         /* There was no vertex shader, but we still have to assign varying
-          * locations for use by tessellation/geometry shader inputs in SSO.
-          *
-          * If the shader is not separable (i.e., prog->SeparateShader is
-          * false), linking will have already failed when first is not
-          * MESA_SHADER_VERTEX.
-          */
-         if (!assign_varying_locations(ctx, mem_ctx, prog,
-                                       NULL, prog->_LinkedShaders[first],
-                                       num_tfeedback_decls, tfeedback_decls))
-            goto done;
-      }
-
-      if (last != MESA_SHADER_FRAGMENT &&
-         (num_tfeedback_decls != 0 || prog->SeparateShader)) {
-         /* There was no fragment shader, but we still have to assign varying
-          * locations for use by transform feedback.
-          */
-         if (!assign_varying_locations(ctx, mem_ctx, prog,
-                                       sh, NULL,
-                                       num_tfeedback_decls, tfeedback_decls))
-            goto done;
-      }
-
-      do_dead_builtin_varyings(ctx, sh, NULL,
-                               num_tfeedback_decls, tfeedback_decls);
-
-      remove_unused_shader_inputs_and_outputs(prog->SeparateShader, sh,
+   if (last <= MESA_SHADER_FRAGMENT) {
+      /* Remove unused varyings from the first/last stage unless SSO */
+      remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
+                                              prog->_LinkedShaders[first],
+                                              ir_var_shader_in);
+      remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
+                                              prog->_LinkedShaders[last],
                                               ir_var_shader_out);
-   }
-   else if (first == MESA_SHADER_FRAGMENT) {
-      /* If the program only contains a fragment shader...
-       */
-      gl_shader *const sh = prog->_LinkedShaders[first];
 
-      do_dead_builtin_varyings(ctx, NULL, sh,
-                               num_tfeedback_decls, tfeedback_decls);
+      /* If the program is made up of only a single stage */
+      if (first == last) {
 
-      if (prog->SeparateShader) {
-         if (!assign_varying_locations(ctx, mem_ctx, prog,
-                                       NULL /* producer */,
-                                       sh /* consumer */,
-                                       0 /* num_tfeedback_decls */,
-                                       NULL /* tfeedback_decls */))
-            goto done;
+         gl_shader *const sh = prog->_LinkedShaders[last];
+         if (prog->SeparateShader) {
+            /* Assign input locations for SSO, output locations are already
+             * assigned.
+             */
+            if (!assign_varying_locations(ctx, mem_ctx, prog,
+                                          NULL /* producer */,
+                                          sh /* consumer */,
+                                          0 /* num_tfeedback_decls */,
+                                          NULL /* tfeedback_decls */))
+               goto done;
+         }
+
+         do_dead_builtin_varyings(ctx, NULL, sh, 0, NULL);
+         do_dead_builtin_varyings(ctx, sh, NULL, num_tfeedback_decls,
+                                  tfeedback_decls);
       } else {
-         remove_unused_shader_inputs_and_outputs(false, sh,
-                                                 ir_var_shader_in);
+         /* Linking the stages in the opposite order (from fragment to vertex)
+          * ensures that inter-shader outputs written to in an earlier stage
+          * are eliminated if they are (transitively) not used in a later
+          * stage.
+          */
+         int next = last;
+         for (int i = next - 1; i >= 0; i--) {
+            if (prog->_LinkedShaders[i] == NULL)
+               continue;
+
+            gl_shader *const sh_i = prog->_LinkedShaders[i];
+            gl_shader *const sh_next = prog->_LinkedShaders[next];
+
+            if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next,
+                      next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
+                      tfeedback_decls))
+               goto done;
+
+            do_dead_builtin_varyings(ctx, sh_i, sh_next,
+                      next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
+                      tfeedback_decls);
+
+            /* This must be done after all dead varyings are eliminated. */
+            if (!check_against_output_limit(ctx, prog, sh_i))
+               goto done;
+            if (!check_against_input_limit(ctx, prog, sh_next))
+               goto done;
+
+            next = i;
+         }
       }
    }
 
-   next = last;
-   for (int i = next - 1; i >= 0; i--) {
-      if (prog->_LinkedShaders[i] == NULL)
-         continue;
-
-      gl_shader *const sh_i = prog->_LinkedShaders[i];
-      gl_shader *const sh_next = prog->_LinkedShaders[next];
-
-      if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next,
-                next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
-                tfeedback_decls))
-         goto done;
-
-      do_dead_builtin_varyings(ctx, sh_i, sh_next,
-                next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
-                tfeedback_decls);
-
-      /* This must be done after all dead varyings are eliminated. */
-      if (!check_against_output_limit(ctx, prog, sh_i))
-         goto done;
-      if (!check_against_input_limit(ctx, prog, sh_next))
-         goto done;
-
-      next = i;
-   }
-
    if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls))
       goto done;
 

From 6235b6913449243cec5213734881d1c5e1ccc861 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Sat, 30 Jan 2016 10:50:12 +1100
Subject: [PATCH 47/94] glsl: remove unrequired forward declaration

This was added in 2548092ad80156a4 although I don't see why as it
was already in the linker.h header.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/compiler/glsl/linker.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index a245a11046a..9dbb92698c1 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -82,8 +82,6 @@
 #include "main/enums.h"
 
 
-void linker_error(gl_shader_program *, const char *, ...);
-
 namespace {
 
 /**

From 1aae5e8cedcb4b9635965d784f3e3803007b2047 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Tue, 2 Feb 2016 11:53:57 +1100
Subject: [PATCH 48/94] nir: remove unused nir_variable fields

These are used in GLSL IR to removed unused varyings and match
transform feedback variables. There is no need to use these in NIR.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/compiler/nir/glsl_to_nir.cpp |  2 --
 src/compiler/nir/nir.h           | 18 ------------------
 2 files changed, 20 deletions(-)

diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp
index 365fd4d0995..3db27751289 100644
--- a/src/compiler/nir/glsl_to_nir.cpp
+++ b/src/compiler/nir/glsl_to_nir.cpp
@@ -364,8 +364,6 @@ nir_visitor::visit(ir_variable *ir)
    var->data.explicit_binding = ir->data.explicit_binding;
    var->data.has_initializer = ir->data.has_initializer;
    var->data.location_frac = ir->data.location_frac;
-   var->data.from_named_ifc_block_array = ir->data.from_named_ifc_block_array;
-   var->data.from_named_ifc_block_nonarray = ir->data.from_named_ifc_block_nonarray;
 
    switch (ir->data.depth_layout) {
    case ir_depth_layout_none:
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 4968460834f..a4dbfde21d0 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -223,24 +223,6 @@ typedef struct nir_variable {
        */
       unsigned location_frac:2;
 
-      /**
-       * Non-zero if this variable was created by lowering a named interface
-       * block which was not an array.
-       *
-       * Note that this variable and \c from_named_ifc_block_array will never
-       * both be non-zero.
-       */
-      unsigned from_named_ifc_block_nonarray:1;
-
-      /**
-       * Non-zero if this variable was created by lowering a named interface
-       * block which was an array.
-       *
-       * Note that this variable and \c from_named_ifc_block_nonarray will never
-       * both be non-zero.
-       */
-      unsigned from_named_ifc_block_array:1;
-
       /**
        * \brief Layout qualifier for gl_FragDepth.
        *

From 83b4d701c082bb43dc710be9ec423171ea11e8d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sun, 7 Feb 2016 20:25:01 +0100
Subject: [PATCH 49/94] winsys/radeon: fix a wrong NUM_TILE_PIPES value from
 the kernel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94019

Tested-by: Nick Sarnie <commendsarnex@gmail.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 35dc7e69dcf..49c310cfdf7 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -405,6 +405,12 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
             radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL,
                                  &ws->info.num_tile_pipes);
 
+            /* The kernel returns 12 for some cards for an unknown reason.
+             * I thought this was supposed to be a power of two.
+             */
+            if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12)
+                ws->info.num_tile_pipes = 8;
+
             if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL,
                                       &ws->info.r600_gb_backend_map))
                 ws->info.r600_gb_backend_map_valid = TRUE;

From 452e51bf1ea6d6896f944192d40547f334f09676 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nicolai.haehnle@amd.com>
Date: Thu, 21 Jan 2016 16:03:17 -0500
Subject: [PATCH 50/94] draw: use util_pstipple_create_fragment_shader

This reduces code duplication. It also adds support for drivers where the
fragment position is a system value.

Suggested-by: Jose Fonseca <jfonseca@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
---
 .../auxiliary/draw/draw_pipe_pstipple.c       | 209 +-----------------
 1 file changed, 12 insertions(+), 197 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index cf52ca48b26..e468cc38910 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -43,10 +43,10 @@
 #include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/u_pstipple.h"
 #include "util/u_sampler.h"
 
 #include "tgsi/tgsi_transform.h"
-#include "tgsi/tgsi_dump.h"
 
 #include "draw_context.h"
 #include "draw_pipe.h"
@@ -114,178 +114,6 @@ struct pstip_stage
 };
 
 
-
-/**
- * Subclass of tgsi_transform_context, used for transforming the
- * user's fragment shader to add the extra texture sample and fragment kill
- * instructions.
- */
-struct pstip_transform_context {
-   struct tgsi_transform_context base;
-   uint tempsUsed;  /**< bitmask */
-   int wincoordInput;
-   int maxInput;
-   uint samplersUsed;  /**< bitfield of samplers used */
-   bool hasSview;
-   int freeSampler;  /** an available sampler for the pstipple */
-   int texTemp;  /**< temp registers */
-   int numImmed;
-};
-
-
-/**
- * TGSI declaration transform callback.
- * Look for a free sampler, a free input attrib, and two free temp regs.
- */
-static void
-pstip_transform_decl(struct tgsi_transform_context *ctx,
-                     struct tgsi_full_declaration *decl)
-{
-   struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx;
-
-   if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
-      uint i;
-      for (i = decl->Range.First;
-           i <= decl->Range.Last; i++) {
-         pctx->samplersUsed |= 1 << i;
-      }
-   }
-   else if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
-      pctx->hasSview = true;
-   }
-   else if (decl->Declaration.File == TGSI_FILE_INPUT) {
-      pctx->maxInput = MAX2(pctx->maxInput, (int) decl->Range.Last);
-      if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
-         pctx->wincoordInput = (int) decl->Range.First;
-   }
-   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
-      uint i;
-      for (i = decl->Range.First;
-           i <= decl->Range.Last; i++) {
-         pctx->tempsUsed |= (1 << i);
-      }
-   }
-
-   ctx->emit_declaration(ctx, decl);
-}
-
-
-/**
- * TGSI immediate declaration transform callback.
- * We're just counting the number of immediates here.
- */
-static void
-pstip_transform_immed(struct tgsi_transform_context *ctx,
-                      struct tgsi_full_immediate *immed)
-{
-   struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx;
-   ctx->emit_immediate(ctx, immed); /* emit to output shader */
-   pctx->numImmed++;
-}
-
-
-/**
- * Find the lowest zero bit in the given word, or -1 if bitfield is all ones.
- */
-static int
-free_bit(uint bitfield)
-{
-   return ffs(~bitfield) - 1;
-}
-
-
-/**
- * TGSI transform prolog callback.
- */
-static void
-pstip_transform_prolog(struct tgsi_transform_context *ctx)
-{
-   struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx;
-   uint i;
-   int wincoordInput;
-
-   /* find free sampler */
-   pctx->freeSampler = free_bit(pctx->samplersUsed);
-   if (pctx->freeSampler >= PIPE_MAX_SAMPLERS)
-      pctx->freeSampler = PIPE_MAX_SAMPLERS - 1;
-
-   if (pctx->wincoordInput < 0)
-      wincoordInput = pctx->maxInput + 1;
-   else
-      wincoordInput = pctx->wincoordInput;
-
-   /* find one free temp reg */
-   for (i = 0; i < 32; i++) {
-      if ((pctx->tempsUsed & (1 << i)) == 0) {
-      /* found a free temp */
-      if (pctx->texTemp < 0)
-         pctx->texTemp  = i;
-      else
-         break;
-      }
-   }
-   assert(pctx->texTemp >= 0);
-
-   if (pctx->wincoordInput < 0) {
-      /* declare new position input reg */
-      tgsi_transform_input_decl(ctx, wincoordInput,
-                                TGSI_SEMANTIC_POSITION, 1,
-                                TGSI_INTERPOLATE_LINEAR);
-   }
-
-   /* declare new sampler */
-   tgsi_transform_sampler_decl(ctx, pctx->freeSampler);
-
-   /* if the src shader has SVIEW decl's for each SAMP decl, we
-    * need to continue the trend and ensure there is a matching
-    * SVIEW for the new SAMP we just created
-    */
-   if (pctx->hasSview) {
-      tgsi_transform_sampler_view_decl(ctx,
-                                       pctx->freeSampler,
-                                       TGSI_TEXTURE_2D,
-                                       TGSI_RETURN_TYPE_FLOAT);
-   }
-
-   /* declare new temp regs */
-   tgsi_transform_temp_decl(ctx, pctx->texTemp);
-
-   /* emit immediate = {1/32, 1/32, 1, 1}
-    * The index/position of this immediate will be pctx->numImmed
-    */
-   tgsi_transform_immediate_decl(ctx, 1.0/32.0, 1.0/32.0, 1.0, 1.0);
-
-   /* 
-    * Insert new MUL/TEX/KILL_IF instructions at start of program
-    * Take gl_FragCoord, divide by 32 (stipple size), sample the
-    * texture and kill fragment if needed.
-    *
-    * We'd like to use non-normalized texcoords to index into a RECT
-    * texture, but we can only use GL_REPEAT wrap mode with normalized
-    * texcoords.  Darn.
-    */
-
-   /* MUL texTemp, INPUT[wincoord], 1/32; */
-   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
-                           TGSI_FILE_TEMPORARY, pctx->texTemp,
-                           TGSI_WRITEMASK_XYZW,
-                           TGSI_FILE_INPUT, wincoordInput,
-                           TGSI_FILE_IMMEDIATE, pctx->numImmed);
-
-   /* TEX texTemp, texTemp, sampler; */
-   tgsi_transform_tex_2d_inst(ctx,
-                              TGSI_FILE_TEMPORARY, pctx->texTemp,
-                              TGSI_FILE_TEMPORARY, pctx->texTemp,
-                              pctx->freeSampler);
-
-   /* KILL_IF -texTemp.wwww;   # if -texTemp < 0, KILL fragment */
-   tgsi_transform_kill_inst(ctx,
-                            TGSI_FILE_TEMPORARY, pctx->texTemp,
-                            TGSI_SWIZZLE_W, TRUE);
-}
-
-
-
 /**
  * Generate the frag shader we'll use for doing polygon stipple.
  * This will be the user's shader prefixed with a TEX and KIL instruction.
@@ -293,40 +121,27 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
 static boolean
 generate_pstip_fs(struct pstip_stage *pstip)
 {
+   struct pipe_context *pipe = pstip->pipe;
+   struct pipe_screen *screen = pipe->screen;
    const struct pipe_shader_state *orig_fs = &pstip->fs->state;
    /*struct draw_context *draw = pstip->stage.draw;*/
    struct pipe_shader_state pstip_fs;
-   struct pstip_transform_context transform;
-   const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
+   enum tgsi_file_type wincoord_file;
+
+   wincoord_file = screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL) ?
+                   TGSI_FILE_SYSTEM_VALUE : TGSI_FILE_INPUT;
 
    pstip_fs = *orig_fs; /* copy to init */
-   pstip_fs.tokens = tgsi_alloc_tokens(newLen);
+   pstip_fs.tokens = util_pstipple_create_fragment_shader(orig_fs->tokens,
+                                                          &pstip->fs->sampler_unit,
+                                                          0,
+                                                          wincoord_file);
    if (pstip_fs.tokens == NULL)
       return FALSE;
 
-   memset(&transform, 0, sizeof(transform));
-   transform.wincoordInput = -1;
-   transform.maxInput = -1;
-   transform.texTemp = -1;
-   transform.base.prolog = pstip_transform_prolog;
-   transform.base.transform_declaration = pstip_transform_decl;
-   transform.base.transform_immediate = pstip_transform_immed;
-
-   tgsi_transform_shader(orig_fs->tokens,
-                         (struct tgsi_token *) pstip_fs.tokens,
-                         newLen, &transform.base);
-
-#if 0 /* DEBUG */
-   tgsi_dump(orig_fs->tokens, 0);
-   tgsi_dump(pstip_fs.tokens, 0);
-#endif
-
-   assert(pstip->fs);
-
-   pstip->fs->sampler_unit = transform.freeSampler;
    assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS);
 
-   pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs);
+   pstip->fs->pstip_fs = pstip->driver_create_fs_state(pipe, &pstip_fs);
    
    FREE((void *)pstip_fs.tokens);
 

From c260175677c20ed4f11a6679c45391783d07aaeb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nicolai.haehnle@amd.com>
Date: Thu, 21 Jan 2016 16:10:11 -0500
Subject: [PATCH 51/94] draw: use util_pstipple_* function for stipple pattern
 textures and samplers

This reduces code duplication.

Suggested-by: Jose Fonseca <jfonseca@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
---
 .../auxiliary/draw/draw_pipe_pstipple.c       | 121 ++----------------
 src/gallium/auxiliary/util/u_pstipple.c       |   4 +-
 src/gallium/auxiliary/util/u_pstipple.h       |   5 +
 3 files changed, 18 insertions(+), 112 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index e468cc38910..0298334a28f 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -152,113 +152,6 @@ generate_pstip_fs(struct pstip_stage *pstip)
 }
 
 
-/**
- * Load texture image with current stipple pattern.
- */
-static void
-pstip_update_texture(struct pstip_stage *pstip)
-{
-   static const uint bit31 = 1 << 31;
-   struct pipe_context *pipe = pstip->pipe;
-   struct pipe_transfer *transfer;
-   const uint *stipple = pstip->state.stipple->stipple;
-   uint i, j;
-   ubyte *data;
-
-   data = pipe_transfer_map(pipe, pstip->texture, 0, 0,
-                                PIPE_TRANSFER_WRITE, 0, 0, 32, 32, &transfer);
-
-   /*
-    * Load alpha texture.
-    * Note: 0 means keep the fragment, 255 means kill it.
-    * We'll negate the texel value and use KILL_IF which kills if value
-    * is negative.
-    */
-   for (i = 0; i < 32; i++) {
-      for (j = 0; j < 32; j++) {
-         if (stipple[i] & (bit31 >> j)) {
-            /* fragment "on" */
-            data[i * transfer->stride + j] = 0;
-         }
-         else {
-            /* fragment "off" */
-            data[i * transfer->stride + j] = 255;
-         }
-      }
-   }
-
-   /* unmap */
-   pipe_transfer_unmap(pipe, transfer);
-}
-
-
-/**
- * Create the texture map we'll use for stippling.
- */
-static boolean
-pstip_create_texture(struct pstip_stage *pstip)
-{
-   struct pipe_context *pipe = pstip->pipe;
-   struct pipe_screen *screen = pipe->screen;
-   struct pipe_resource texTemp;
-   struct pipe_sampler_view viewTempl;
-
-   memset(&texTemp, 0, sizeof(texTemp));
-   texTemp.target = PIPE_TEXTURE_2D;
-   texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
-   texTemp.last_level = 0;
-   texTemp.width0 = 32;
-   texTemp.height0 = 32;
-   texTemp.depth0 = 1;
-   texTemp.array_size = 1;
-   texTemp.bind = PIPE_BIND_SAMPLER_VIEW;
-
-   pstip->texture = screen->resource_create(screen, &texTemp);
-   if (pstip->texture == NULL)
-      return FALSE;
-
-   u_sampler_view_default_template(&viewTempl,
-                                   pstip->texture,
-                                   pstip->texture->format);
-   pstip->sampler_view = pipe->create_sampler_view(pipe,
-                                                   pstip->texture,
-                                                   &viewTempl);
-   if (!pstip->sampler_view) {
-      return FALSE;
-   }
-
-   return TRUE;
-}
-
-
-/**
- * Create the sampler CSO that'll be used for stippling.
- */
-static boolean
-pstip_create_sampler(struct pstip_stage *pstip)
-{
-   struct pipe_sampler_state sampler;
-   struct pipe_context *pipe = pstip->pipe;
-
-   memset(&sampler, 0, sizeof(sampler));
-   sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
-   sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
-   sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
-   sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
-   sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
-   sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
-   sampler.normalized_coords = 1;
-   sampler.min_lod = 0.0f;
-   sampler.max_lod = 0.0f;
-
-   pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler);
-   if (pstip->sampler_cso == NULL)
-      return FALSE;
-   
-   return TRUE;
-}
-
-
 /**
  * When we're about to draw our first stipple polygon in a batch, this function
  * is called to tell the driver to bind our modified fragment shader.
@@ -537,7 +430,8 @@ pstip_set_polygon_stipple(struct pipe_context *pipe,
    /* pass-through */
    pstip->driver_set_polygon_stipple(pstip->pipe, stipple);
 
-   pstip_update_texture(pstip);
+   util_pstipple_update_stipple_texture(pstip->pipe, pstip->texture,
+                                        pstip->state.stipple->stipple);
 }
 
 
@@ -573,10 +467,17 @@ draw_install_pstipple_stage(struct draw_context *draw,
    pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple;
 
    /* create special texture, sampler state */
-   if (!pstip_create_texture(pstip))
+   pstip->texture = util_pstipple_create_stipple_texture(pipe, NULL);
+   if (!pstip->texture)
       goto fail;
 
-   if (!pstip_create_sampler(pstip))
+   pstip->sampler_view = util_pstipple_create_sampler_view(pipe,
+                                                           pstip->texture);
+   if (!pstip->sampler_view)
+      goto fail;
+
+   pstip->sampler_cso = util_pstipple_create_sampler(pipe);
+   if (!pstip->sampler_cso)
       goto fail;
 
    /* override the driver's functions */
diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c
index 3428172203b..74e6f99da67 100644
--- a/src/gallium/auxiliary/util/u_pstipple.c
+++ b/src/gallium/auxiliary/util/u_pstipple.c
@@ -58,7 +58,7 @@
 #define NUM_NEW_TOKENS 53
 
 
-static void
+void
 util_pstipple_update_stipple_texture(struct pipe_context *pipe,
                                      struct pipe_resource *tex,
                                      const uint32_t pattern[32])
@@ -118,7 +118,7 @@ util_pstipple_create_stipple_texture(struct pipe_context *pipe,
 
    tex = screen->resource_create(screen, &templat);
 
-   if (tex)
+   if (tex && pattern)
       util_pstipple_update_stipple_texture(pipe, tex, pattern);
 
    return tex;
diff --git a/src/gallium/auxiliary/util/u_pstipple.h b/src/gallium/auxiliary/util/u_pstipple.h
index ef8396f4318..d1662be2839 100644
--- a/src/gallium/auxiliary/util/u_pstipple.h
+++ b/src/gallium/auxiliary/util/u_pstipple.h
@@ -36,6 +36,11 @@ struct pipe_resource;
 struct pipe_shader_state;
 
 
+extern void
+util_pstipple_update_stipple_texture(struct pipe_context *pipe,
+                                     struct pipe_resource *tex,
+                                     const uint32_t pattern[32]);
+
 extern struct pipe_resource *
 util_pstipple_create_stipple_texture(struct pipe_context *pipe,
                                      const uint32_t pattern[32]);

From 0d04ec2fd23f8b1baf62ea0ab8c7a86f23ada619 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 4 Feb 2016 12:47:42 -0500
Subject: [PATCH 52/94] ilo: add PIPE_QUERY_OCCLUSION_PREDICATE support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Chia-I Wu <olvaffe@gmail.com>
---
 src/gallium/drivers/ilo/ilo_draw.c   | 2 ++
 src/gallium/drivers/ilo/ilo_query.c  | 9 ++++++++-
 src/gallium/drivers/ilo/ilo_render.c | 2 ++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/ilo/ilo_draw.c b/src/gallium/drivers/ilo/ilo_draw.c
index 69f36ae5df6..6831d2c4eff 100644
--- a/src/gallium/drivers/ilo/ilo_draw.c
+++ b/src/gallium/drivers/ilo/ilo_draw.c
@@ -71,6 +71,7 @@ query_process_bo(const struct ilo_context *ilo, struct ilo_query *q)
 
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
    case PIPE_QUERY_TIME_ELAPSED:
    case PIPE_QUERY_PRIMITIVES_GENERATED:
    case PIPE_QUERY_PRIMITIVES_EMITTED:
@@ -157,6 +158,7 @@ ilo_init_draw_query(struct ilo_context *ilo, struct ilo_query *q)
 
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
    case PIPE_QUERY_TIME_ELAPSED:
    case PIPE_QUERY_PRIMITIVES_GENERATED:
    case PIPE_QUERY_PRIMITIVES_EMITTED:
diff --git a/src/gallium/drivers/ilo/ilo_query.c b/src/gallium/drivers/ilo/ilo_query.c
index 27d08128ab0..106bd42a335 100644
--- a/src/gallium/drivers/ilo/ilo_query.c
+++ b/src/gallium/drivers/ilo/ilo_query.c
@@ -47,7 +47,7 @@ static const struct {
 #define INFOX(prefix) { NULL, NULL, NULL, NULL, }
 
    [PIPE_QUERY_OCCLUSION_COUNTER]      = INFO(draw),
-   [PIPE_QUERY_OCCLUSION_PREDICATE]    = INFOX(draw),
+   [PIPE_QUERY_OCCLUSION_PREDICATE]    = INFO(draw),
    [PIPE_QUERY_TIMESTAMP]              = INFO(draw),
    [PIPE_QUERY_TIMESTAMP_DISJOINT]     = INFOX(draw),
    [PIPE_QUERY_TIME_ELAPSED]           = INFO(draw),
@@ -75,6 +75,7 @@ ilo_create_query(struct pipe_context *pipe, unsigned query_type, unsigned index)
 
    switch (query_type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
    case PIPE_QUERY_TIMESTAMP:
    case PIPE_QUERY_TIME_ELAPSED:
    case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -163,6 +164,12 @@ query_serialize(const struct ilo_query *q, void *buf)
          dst[0] = q->result.u64;
       }
       break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      {
+         uint64_t *dst = buf;
+         dst[0] = !!q->result.u64;
+      }
+      break;
    case PIPE_QUERY_PIPELINE_STATISTICS:
       {
          const struct pipe_query_data_pipeline_statistics *stats =
diff --git a/src/gallium/drivers/ilo/ilo_render.c b/src/gallium/drivers/ilo/ilo_render.c
index 8bc04df4fab..9a47ca80505 100644
--- a/src/gallium/drivers/ilo/ilo_render.c
+++ b/src/gallium/drivers/ilo/ilo_render.c
@@ -202,6 +202,7 @@ ilo_render_get_query_len(const struct ilo_render *render,
 
    switch (query_type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
    case PIPE_QUERY_TIMESTAMP:
    case PIPE_QUERY_TIME_ELAPSED:
       /* no reg */
@@ -268,6 +269,7 @@ ilo_render_emit_query(struct ilo_render *render,
 
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
       pipe_control_dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
                          GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT;
       break;

From 0cb1dda36e1a651173fec48c151fd3d07eb8077f Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 4 Feb 2016 12:48:06 -0500
Subject: [PATCH 53/94] nv30: add PIPE_QUERY_OCCLUSION_PREDICATE support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/nv30/nv30_query.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_query.c b/src/gallium/drivers/nouveau/nv30/nv30_query.c
index 3980be9579a..75a4b0446fe 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_query.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_query.c
@@ -120,6 +120,7 @@ nv30_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
       q->report = 1;
       break;
    case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
       q->enable = NV30_3D_QUERY_ENABLE;
       q->report = 1;
       break;
@@ -203,7 +204,6 @@ nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq,
    struct nv30_query *q = nv30_query(pq);
    volatile uint32_t *ntfy0 = nv30_ntfy(screen, q->qo[0]);
    volatile uint32_t *ntfy1 = nv30_ntfy(screen, q->qo[1]);
-   uint64_t *res64 = &result->u64;
 
    if (ntfy1) {
       while (ntfy1[3] & 0xff000000) {
@@ -227,7 +227,10 @@ nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq,
       nv30_query_object_del(screen, &q->qo[1]);
    }
 
-   *res64 = q->result;
+   if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE)
+      result->b = !!q->result;
+   else
+      result->u64 = q->result;
    return true;
 }
 

From 50235ab3ab9f22565aed596e5a915831d099314d Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 4 Feb 2016 12:48:29 -0500
Subject: [PATCH 54/94] nv50: add PIPE_QUERY_OCCLUSION_PREDICATE support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
index cccd3b71672..727b509372d 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -156,6 +156,7 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
 
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
       hq->nesting = nv50->screen->num_occlusion_queries_active++;
       if (hq->nesting) {
          nv50_hw_query_get(push, q, 0x10, 0x0100f002);
@@ -213,6 +214,7 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
 
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
       nv50_hw_query_get(push, q, 0, 0x0100f002);
       if (--nv50->screen->num_occlusion_queries_active == 0) {
          PUSH_SPACE(push, 2);
@@ -304,6 +306,9 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
    case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
       res64[0] = hq->data[1] - hq->data[5];
       break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      res8[0] = hq->data[1] != hq->data[5];
+      break;
    case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
    case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
       res64[0] = data64[0] - data64[2];
@@ -372,6 +377,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
 
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
       hq->rotate = 32;
       break;
    case PIPE_QUERY_PRIMITIVES_GENERATED:

From 7aca4bb9b130450574b42fd84667f645a0749226 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 4 Feb 2016 21:51:58 -0500
Subject: [PATCH 55/94] st/mesa: make use of the occlusion predicate query
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/mesa/state_tracker/st_cb_queryobj.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c
index fc239bc778c..cdb9efc762b 100644
--- a/src/mesa/state_tracker/st_cb_queryobj.c
+++ b/src/mesa/state_tracker/st_cb_queryobj.c
@@ -96,7 +96,8 @@ st_BeginQuery(struct gl_context *ctx, struct gl_query_object *q)
    switch (q->Target) {
    case GL_ANY_SAMPLES_PASSED:
    case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
-      /* fall-through */
+      type = PIPE_QUERY_OCCLUSION_PREDICATE;
+      break;
    case GL_SAMPLES_PASSED_ARB:
       type = PIPE_QUERY_OCCLUSION_COUNTER;
       break;
@@ -240,7 +241,14 @@ get_query_result(struct pipe_context *pipe,
       stq->base.Result = data.pipeline_statistics.c_primitives;
       break;
    default:
-      stq->base.Result = data.u64;
+      switch (stq->type) {
+      case PIPE_QUERY_OCCLUSION_PREDICATE:
+         stq->base.Result = !!data.b;
+         break;
+      default:
+         stq->base.Result = data.u64;
+         break;
+      }
       break;
    }
 

From 922be4eab9d2a5d169dc84b3f2d99f08f3d16e5c Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 4 Feb 2016 21:55:13 -0500
Subject: [PATCH 56/94] mesa: remove hack to fix up GL_ANY_SAMPLES_PASSED
 results
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both st/mesa and i965 should return a true/false result now, and the
only other driver implementing queries (radeon) doesn't support
ARB_occlusion_query2 which added that pname.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/mesa/main/queryobj.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c
index b86692a5f7e..7a70b592c47 100644
--- a/src/mesa/main/queryobj.c
+++ b/src/mesa/main/queryobj.c
@@ -807,11 +807,6 @@ invalid_enum:
       return;
    }
 
-   /* TODO: Have the driver be required to handle this fixup. */
-   if (q->Target == GL_ANY_SAMPLES_PASSED ||
-       q->Target == GL_ANY_SAMPLES_PASSED_CONSERVATIVE)
-      value = !!value;
-
    switch (ptype) {
    case GL_INT: {
       GLint *param = (GLint *)offset;

From 130d34ce65785e27ed3aa8bb9fa9b76995ea61da Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 2 Feb 2016 17:12:46 -0700
Subject: [PATCH 57/94] st/mesa: refactor some bitmap drawing code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move setup/restoration of rendering state into helper functions.
This makes the draw_bitmap_quad() function much more concise.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/mesa/state_tracker/st_cb_bitmap.c | 90 +++++++++++++++++----------
 1 file changed, 57 insertions(+), 33 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 87c606af896..31f57c455ec 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -248,24 +248,18 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized,
 }
 
 
-
 /**
- * Render a glBitmap by drawing a textured quad
+ * Setup pipeline state prior to rendering the bitmap textured quad.
  */
 static void
-draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
-                 GLsizei width, GLsizei height,
-                 struct pipe_sampler_view *sv,
-                 const GLfloat *color)
+setup_render_state(struct gl_context *ctx,
+                   struct pipe_sampler_view *sv,
+                   const GLfloat *color)
 {
    struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
    struct cso_context *cso = st->cso_context;
    struct st_fp_variant *fpv;
    struct st_fp_variant_key key;
-   GLuint maxSize;
-   GLuint offset;
-   struct pipe_resource *vbuf = NULL;
 
    memset(&key, 0, sizeof(key));
    key.st = st->has_shareable_shaders ? NULL : st;
@@ -291,16 +285,6 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
       COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave);
    }
 
-
-   /* limit checks */
-   /* XXX if the bitmap is larger than the max texture size, break
-    * it up into chunks.
-    */
-   maxSize = 1 << (pipe->screen->get_param(pipe->screen,
-                                    PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
-   assert(width <= (GLsizei)maxSize);
-   assert(height <= (GLsizei)maxSize);
-
    cso_save_rasterizer(cso);
    cso_save_fragment_samplers(cso);
    cso_save_fragment_sampler_views(cso);
@@ -372,6 +356,58 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
 
    cso_set_vertex_elements(cso, 3, st->velems_util_draw);
    cso_set_stream_outputs(st->cso_context, 0, NULL, NULL);
+}
+
+
+/**
+ * Restore pipeline state after rendering the bitmap textured quad.
+ */
+static void
+restore_render_state(struct gl_context *ctx)
+{
+   struct st_context *st = st_context(ctx);
+   struct cso_context *cso = st->cso_context;
+
+   cso_restore_rasterizer(cso);
+   cso_restore_fragment_samplers(cso);
+   cso_restore_fragment_sampler_views(cso);
+   cso_restore_viewport(cso);
+   cso_restore_fragment_shader(cso);
+   cso_restore_vertex_shader(cso);
+   cso_restore_tessctrl_shader(cso);
+   cso_restore_tesseval_shader(cso);
+   cso_restore_geometry_shader(cso);
+   cso_restore_vertex_elements(cso);
+   cso_restore_aux_vertex_buffer_slot(cso);
+   cso_restore_stream_outputs(cso);
+}
+
+
+/**
+ * Render a glBitmap by drawing a textured quad
+ */
+static void
+draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
+                 GLsizei width, GLsizei height,
+                 struct pipe_sampler_view *sv,
+                 const GLfloat *color)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_resource *vbuf = NULL;
+   GLuint maxSize;
+   GLuint offset;
+
+   /* limit checks */
+   /* XXX if the bitmap is larger than the max texture size, break
+    * it up into chunks.
+    */
+   maxSize = 1 << (pipe->screen->get_param(pipe->screen,
+                                    PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
+   assert(width <= (GLsizei)maxSize);
+   assert(height <= (GLsizei)maxSize);
+
+   setup_render_state(ctx, sv, color);
 
    /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
    z = z * 2.0f - 1.0f;
@@ -389,19 +425,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
                               3); /* attribs/vert */
    }
 
-   /* restore state */
-   cso_restore_rasterizer(cso);
-   cso_restore_fragment_samplers(cso);
-   cso_restore_fragment_sampler_views(cso);
-   cso_restore_viewport(cso);
-   cso_restore_fragment_shader(cso);
-   cso_restore_vertex_shader(cso);
-   cso_restore_tessctrl_shader(cso);
-   cso_restore_tesseval_shader(cso);
-   cso_restore_geometry_shader(cso);
-   cso_restore_vertex_elements(cso);
-   cso_restore_aux_vertex_buffer_slot(cso);
-   cso_restore_stream_outputs(cso);
+   restore_render_state(ctx);
 
    pipe_resource_reference(&vbuf, NULL);
 

From a5799de3dc8ddf0e90c2e64438664df3ce84f5ae Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 2 Feb 2016 17:24:34 -0700
Subject: [PATCH 58/94] st/mesa: move the setup_bitmap_vertex_data() code into
 draw_bitmap_quad()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now all the code to setup the vertex data and draw it is in one place.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/mesa/state_tracker/st_cb_bitmap.c | 168 ++++++++++++--------------
 1 file changed, 78 insertions(+), 90 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 31f57c455ec..c26ee7f1a31 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -176,77 +176,6 @@ make_bitmap_texture(struct gl_context *ctx, GLsizei width, GLsizei height,
    return pt;
 }
 
-static void
-setup_bitmap_vertex_data(struct st_context *st, bool normalized,
-                         int x, int y, int width, int height,
-                         float z, const float color[4],
-			 struct pipe_resource **vbuf,
-			 unsigned *vbuf_offset)
-{
-   const GLfloat fb_width = (GLfloat)st->state.framebuffer.width;
-   const GLfloat fb_height = (GLfloat)st->state.framebuffer.height;
-   const GLfloat x0 = (GLfloat)x;
-   const GLfloat x1 = (GLfloat)(x + width);
-   const GLfloat y0 = (GLfloat)y;
-   const GLfloat y1 = (GLfloat)(y + height);
-   GLfloat sLeft = (GLfloat)0.0, sRight = (GLfloat)1.0;
-   GLfloat tTop = (GLfloat)0.0, tBot = (GLfloat)1.0 - tTop;
-   const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0);
-   const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0);
-   const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0);
-   const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0);
-   GLuint i;
-   float (*vertices)[3][4];  /**< vertex pos + color + texcoord */
-
-   if (!normalized) {
-      sRight = (GLfloat) width;
-      tBot = (GLfloat) height;
-   }
-
-   u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), 4,
-                  vbuf_offset, vbuf, (void **) &vertices);
-   if (!*vbuf) {
-      return;
-   }
-
-   /* Positions are in clip coords since we need to do clipping in case
-    * the bitmap quad goes beyond the window bounds.
-    */
-   vertices[0][0][0] = clip_x0;
-   vertices[0][0][1] = clip_y0;
-   vertices[0][2][0] = sLeft;
-   vertices[0][2][1] = tTop;
-
-   vertices[1][0][0] = clip_x1;
-   vertices[1][0][1] = clip_y0;
-   vertices[1][2][0] = sRight;
-   vertices[1][2][1] = tTop;
-   
-   vertices[2][0][0] = clip_x1;
-   vertices[2][0][1] = clip_y1;
-   vertices[2][2][0] = sRight;
-   vertices[2][2][1] = tBot;
-   
-   vertices[3][0][0] = clip_x0;
-   vertices[3][0][1] = clip_y1;
-   vertices[3][2][0] = sLeft;
-   vertices[3][2][1] = tBot;
-   
-   /* same for all verts: */
-   for (i = 0; i < 4; i++) {
-      vertices[i][0][2] = z;
-      vertices[i][0][3] = 1.0f;
-      vertices[i][1][0] = color[0];
-      vertices[i][1][1] = color[1];
-      vertices[i][1][2] = color[2];
-      vertices[i][1][3] = color[3];
-      vertices[i][2][2] = 0.0; /*R*/
-      vertices[i][2][3] = 1.0; /*Q*/
-   }
-
-   u_upload_unmap(st->uploader);
-}
-
 
 /**
  * Setup pipeline state prior to rendering the bitmap textured quad.
@@ -395,36 +324,95 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
    struct st_context *st = st_context(ctx);
    struct pipe_context *pipe = st->pipe;
    struct pipe_resource *vbuf = NULL;
-   GLuint maxSize;
-   GLuint offset;
+   const float fb_width = (float) st->state.framebuffer.width;
+   const float fb_height = (float) st->state.framebuffer.height;
+   const float x0 = (float) x;
+   const float x1 = (float) (x + width);
+   const float y0 = (float) y;
+   const float y1 = (float) (y + height);
+   float sLeft = 0.0f, sRight = 1.0f;
+   float tTop = 0.0f, tBot = 1.0f - tTop;
+   const float clip_x0 = x0 / fb_width * 2.0f - 1.0f;
+   const float clip_y0 = y0 / fb_height * 2.0f - 1.0f;
+   const float clip_x1 = x1 / fb_width * 2.0f - 1.0f;
+   const float clip_y1 = y1 / fb_height * 2.0f - 1.0f;
+   float (*vertices)[3][4];  /**< vertex pos + color + texcoord */
+   unsigned offset, i;
 
    /* limit checks */
-   /* XXX if the bitmap is larger than the max texture size, break
-    * it up into chunks.
-    */
-   maxSize = 1 << (pipe->screen->get_param(pipe->screen,
+   {
+      /* XXX if the bitmap is larger than the max texture size, break
+       * it up into chunks.
+       */
+      GLuint maxSize = 1 << (pipe->screen->get_param(pipe->screen,
                                     PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
-   assert(width <= (GLsizei)maxSize);
-   assert(height <= (GLsizei)maxSize);
+      assert(width <= (GLsizei) maxSize);
+      assert(height <= (GLsizei) maxSize);
+   }
 
    setup_render_state(ctx, sv, color);
 
    /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
    z = z * 2.0f - 1.0f;
 
-   /* draw textured quad */
-   setup_bitmap_vertex_data(st, sv->texture->target != PIPE_TEXTURE_RECT,
-			    x, y, width, height, z, color, &vbuf, &offset);
-
-   if (vbuf) {
-      util_draw_vertex_buffer(pipe, st->cso_context, vbuf,
-                              cso_get_aux_vertex_buffer_slot(st->cso_context),
-                              offset,
-                              PIPE_PRIM_TRIANGLE_FAN,
-                              4,  /* verts */
-                              3); /* attribs/vert */
+   if (sv->texture->target == PIPE_TEXTURE_RECT) {
+      /* use non-normalized texcoords */
+      sRight = (float) width;
+      tBot = (float) height;
    }
 
+   u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), 4,
+                  &offset, &vbuf, (void **) &vertices);
+   if (!vbuf) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBitmap");
+      restore_render_state(ctx);
+      return;
+   }
+
+   /* Positions are in clip coords since we need to do clipping in case
+    * the bitmap quad goes beyond the window bounds.
+    */
+   vertices[0][0][0] = clip_x0;
+   vertices[0][0][1] = clip_y0;
+   vertices[0][2][0] = sLeft;
+   vertices[0][2][1] = tTop;
+
+   vertices[1][0][0] = clip_x1;
+   vertices[1][0][1] = clip_y0;
+   vertices[1][2][0] = sRight;
+   vertices[1][2][1] = tTop;
+
+   vertices[2][0][0] = clip_x1;
+   vertices[2][0][1] = clip_y1;
+   vertices[2][2][0] = sRight;
+   vertices[2][2][1] = tBot;
+
+   vertices[3][0][0] = clip_x0;
+   vertices[3][0][1] = clip_y1;
+   vertices[3][2][0] = sLeft;
+   vertices[3][2][1] = tBot;
+
+   /* same for all verts: */
+   for (i = 0; i < 4; i++) {
+      vertices[i][0][2] = z;
+      vertices[i][0][3] = 1.0f;
+      vertices[i][1][0] = color[0];
+      vertices[i][1][1] = color[1];
+      vertices[i][1][2] = color[2];
+      vertices[i][1][3] = color[3];
+      vertices[i][2][2] = 0.0; /*R*/
+      vertices[i][2][3] = 1.0; /*Q*/
+   }
+
+   u_upload_unmap(st->uploader);
+
+   util_draw_vertex_buffer(pipe, st->cso_context, vbuf,
+                           cso_get_aux_vertex_buffer_slot(st->cso_context),
+                           offset,
+                           PIPE_PRIM_TRIANGLE_FAN,
+                           4,  /* verts */
+                           3); /* attribs/vert */
+
    restore_render_state(ctx);
 
    pipe_resource_reference(&vbuf, NULL);

From 7d18faf8e7509a575f39b0a409b8167db7561153 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Feb 2016 09:59:40 -0700
Subject: [PATCH 59/94] st/mesa: don't allocate bitmap drawing state until
 needed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most apps don't use glBitmap so don't allocate the bitmap cache or
gallium state objects/shaders/etc until the first call to st_Bitmap().

v2: simplify a conditional, per Gustaw Smolarczyk.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/mesa/state_tracker/st_cb_bitmap.c | 155 ++++++++++++++------------
 src/mesa/state_tracker/st_cb_bitmap.h |   3 -
 src/mesa/state_tracker/st_context.c   |   1 -
 3 files changed, 82 insertions(+), 77 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index c26ee7f1a31..34809ad7163 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -497,8 +497,9 @@ create_cache_trans(struct st_context *st)
 void
 st_flush_bitmap_cache(struct st_context *st)
 {
-   if (!st->bitmap.cache->empty) {
-      struct bitmap_cache *cache = st->bitmap.cache;
+   struct bitmap_cache *cache = st->bitmap.cache;
+
+   if (cache && !cache->empty) {
       struct pipe_context *pipe = st->pipe;
       struct pipe_sampler_view *sv;
 
@@ -617,83 +618,22 @@ accum_bitmap(struct gl_context *ctx,
 }
 
 
-
 /**
- * Called via ctx->Driver.Bitmap()
+ * One-time init for drawing bitmaps.
  */
 static void
-st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
-          GLsizei width, GLsizei height,
-          const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap )
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_resource *pt;
-
-   assert(width > 0);
-   assert(height > 0);
-
-   /* We only need to validate state of the st dirty flags are set or
-    * any non-_NEW_PROGRAM_CONSTANTS mesa flags are set.  The VS we use
-    * for bitmap drawing uses no constants and the FS constants are
-    * explicitly uploaded in the draw_bitmap_quad() function.
-    */
-   if ((st->dirty.mesa & ~_NEW_PROGRAM_CONSTANTS) || st->dirty.st) {
-      st_validate_state(st);
-   }
-
-   if (!st->bitmap.vs) {
-      /* create pass-through vertex shader now */
-      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
-                                      TGSI_SEMANTIC_COLOR,
-        st->needs_texcoord_semantic ? TGSI_SEMANTIC_TEXCOORD :
-                                      TGSI_SEMANTIC_GENERIC };
-      const uint semantic_indexes[] = { 0, 0, 0 };
-      st->bitmap.vs = util_make_vertex_passthrough_shader(st->pipe, 3,
-                                                          semantic_names,
-                                                          semantic_indexes,
-                                                          FALSE);
-   }
-
-   if (UseBitmapCache && accum_bitmap(ctx, x, y, width, height, unpack, bitmap))
-      return;
-
-   pt = make_bitmap_texture(ctx, width, height, unpack, bitmap);
-   if (pt) {
-      struct pipe_sampler_view *sv =
-         st_create_texture_sampler_view(st->pipe, pt);
-
-      assert(pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT);
-
-      if (sv) {
-         draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2],
-                          width, height, sv,
-                          st->ctx->Current.RasterColor);
-
-         pipe_sampler_view_reference(&sv, NULL);
-      }
-
-      /* release/free the texture */
-      pipe_resource_reference(&pt, NULL);
-   }
-}
-
-
-/** Per-context init */
-void
-st_init_bitmap_functions(struct dd_function_table *functions)
-{
-   functions->Bitmap = st_Bitmap;
-}
-
-
-/** Per-context init */
-void
-st_init_bitmap(struct st_context *st)
+init_bitmap_state(struct st_context *st)
 {
    struct pipe_sampler_state *sampler = &st->bitmap.samplers[0];
    struct pipe_context *pipe = st->pipe;
    struct pipe_screen *screen = pipe->screen;
 
+   /* This function should only be called once */
+   assert(st->bitmap.cache == NULL);
+
+   /* alloc bitmap cache object */
+   st->bitmap.cache = ST_CALLOC_STRUCT(bitmap_cache);
+
    /* init sampler state once */
    memset(sampler, 0, sizeof(*sampler));
    sampler->wrap_s = PIPE_TEX_WRAP_CLAMP;
@@ -732,13 +672,82 @@ st_init_bitmap(struct st_context *st)
       assert(0);
    }
 
-   /* alloc bitmap cache object */
-   st->bitmap.cache = ST_CALLOC_STRUCT(bitmap_cache);
+   /* Create the vertex shader */
+   {
+      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+                                      TGSI_SEMANTIC_COLOR,
+        st->needs_texcoord_semantic ? TGSI_SEMANTIC_TEXCOORD :
+                                      TGSI_SEMANTIC_GENERIC };
+      const uint semantic_indexes[] = { 0, 0, 0 };
+      st->bitmap.vs = util_make_vertex_passthrough_shader(st->pipe, 3,
+                                                          semantic_names,
+                                                          semantic_indexes,
+                                                          FALSE);
+   }
 
    reset_cache(st);
 }
 
 
+/**
+ * Called via ctx->Driver.Bitmap()
+ */
+static void
+st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
+          GLsizei width, GLsizei height,
+          const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap )
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_resource *pt;
+
+   assert(width > 0);
+   assert(height > 0);
+
+   if (!st->bitmap.cache) {
+      init_bitmap_state(st);
+   }
+
+   /* We only need to validate state of the st dirty flags are set or
+    * any non-_NEW_PROGRAM_CONSTANTS mesa flags are set.  The VS we use
+    * for bitmap drawing uses no constants and the FS constants are
+    * explicitly uploaded in the draw_bitmap_quad() function.
+    */
+   if ((st->dirty.mesa & ~_NEW_PROGRAM_CONSTANTS) || st->dirty.st) {
+      st_validate_state(st);
+   }
+
+   if (UseBitmapCache && accum_bitmap(ctx, x, y, width, height, unpack, bitmap))
+      return;
+
+   pt = make_bitmap_texture(ctx, width, height, unpack, bitmap);
+   if (pt) {
+      struct pipe_sampler_view *sv =
+         st_create_texture_sampler_view(st->pipe, pt);
+
+      assert(pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT);
+
+      if (sv) {
+         draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2],
+                          width, height, sv,
+                          st->ctx->Current.RasterColor);
+
+         pipe_sampler_view_reference(&sv, NULL);
+      }
+
+      /* release/free the texture */
+      pipe_resource_reference(&pt, NULL);
+   }
+}
+
+
+/** Per-context init */
+void
+st_init_bitmap_functions(struct dd_function_table *functions)
+{
+   functions->Bitmap = st_Bitmap;
+}
+
+
 /** Per-context tear-down */
 void
 st_destroy_bitmap(struct st_context *st)
diff --git a/src/mesa/state_tracker/st_cb_bitmap.h b/src/mesa/state_tracker/st_cb_bitmap.h
index dc7e5cb5c9e..4d1ae222b81 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.h
+++ b/src/mesa/state_tracker/st_cb_bitmap.h
@@ -41,9 +41,6 @@ struct st_fragment_program;
 extern void
 st_init_bitmap_functions(struct dd_function_table *functions);
 
-extern void
-st_init_bitmap(struct st_context *st);
-
 extern void
 st_destroy_bitmap(struct st_context *st);
 
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index 446ebfb563f..9016846b148 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -231,7 +231,6 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
    st->cso_context = cso_create_context(pipe);
 
    st_init_atoms( st );
-   st_init_bitmap(st);
    st_init_clear(st);
    st_init_draw( st );
    st_init_pbo_upload(st);

From b1ddc03633c3bff7e81964ef0c4419cf66d40e02 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Feb 2016 11:29:07 -0700
Subject: [PATCH 60/94] mesa: whitespace clean-ups in dlist.h

And remove 'extern' qualifiers.
---
 src/mesa/main/dlist.h | 47 ++++++++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/src/mesa/main/dlist.h b/src/mesa/main/dlist.h
index a1214674c62..7a23208ba5a 100644
--- a/src/mesa/main/dlist.h
+++ b/src/mesa/main/dlist.h
@@ -38,46 +38,61 @@
 
 GLboolean GLAPIENTRY
 _mesa_IsList(GLuint list);
+
 void GLAPIENTRY
 _mesa_DeleteLists(GLuint list, GLsizei range);
+
 GLuint GLAPIENTRY
 _mesa_GenLists(GLsizei range);
+
 void GLAPIENTRY
 _mesa_NewList(GLuint name, GLenum mode);
+
 void GLAPIENTRY
 _mesa_EndList(void);
+
 void GLAPIENTRY
-_mesa_CallList( GLuint list );
+_mesa_CallList(GLuint list);
+
 void GLAPIENTRY
-_mesa_CallLists( GLsizei n, GLenum type, const GLvoid *lists );
+_mesa_CallLists(GLsizei n, GLenum type, const GLvoid *lists);
+
 void GLAPIENTRY
 _mesa_ListBase(GLuint base);
 
-extern struct gl_display_list *
+struct gl_display_list *
 _mesa_lookup_list(struct gl_context *ctx, GLuint list);
 
-extern void _mesa_compile_error( struct gl_context *ctx, GLenum error, const char *s );
+void
+_mesa_compile_error(struct gl_context *ctx, GLenum error, const char *s);
 
-extern void *_mesa_dlist_alloc(struct gl_context *ctx, GLuint opcode, GLuint sz);
+void *
+_mesa_dlist_alloc(struct gl_context *ctx, GLuint opcode, GLuint sz);
 
-extern void *
+void *
 _mesa_dlist_alloc_aligned(struct gl_context *ctx, GLuint opcode, GLuint bytes);
 
-extern GLint _mesa_dlist_alloc_opcode( struct gl_context *ctx, GLuint sz,
-                                       void (*execute)( struct gl_context *, void * ),
-                                       void (*destroy)( struct gl_context *, void * ),
-                                       void (*print)( struct gl_context *, void *, FILE * ) );
+GLint
+_mesa_dlist_alloc_opcode(struct gl_context *ctx, GLuint sz,
+                         void (*execute)(struct gl_context *, void *),
+                         void (*destroy)(struct gl_context *, void *),
+                         void (*print)(struct gl_context *, void *, FILE *));
 
-extern void _mesa_delete_list(struct gl_context *ctx, struct gl_display_list *dlist);
+void
+_mesa_delete_list(struct gl_context *ctx, struct gl_display_list *dlist);
 
-extern void _mesa_initialize_save_table(const struct gl_context *);
+void
+_mesa_initialize_save_table(const struct gl_context *);
 
-extern void _mesa_install_dlist_vtxfmt(struct _glapi_table *disp,
-                                       const GLvertexformat *vfmt);
+void
+_mesa_install_dlist_vtxfmt(struct _glapi_table *disp,
+                           const GLvertexformat *vfmt);
 
-extern void _mesa_init_display_list( struct gl_context * ctx );
+void
+_mesa_init_display_list(struct gl_context * ctx);
 
-extern void _mesa_free_display_list_data(struct gl_context *ctx);
+void
+_mesa_free_display_list_data(struct gl_context *ctx);
 
 
 #endif /* DLIST_H */

From 711d5347cf4e4cae60461487bcf416c915aa7395 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Feb 2016 15:30:39 -0700
Subject: [PATCH 61/94] mesa: add missing error check in _mesa_CallLists()

Generate GL_INVALID_VALUE if n < 0.  Return early if n==0 or lists==NULL.

v2: fix formatting, also check for lists==NULL.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/main/dlist.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index cd8e3b6a2f2..65f092936b3 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -9105,6 +9105,14 @@ _mesa_CallLists(GLsizei n, GLenum type, const GLvoid * lists)
       return;
    }
 
+   if (n < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glCallLists(n < 0)");
+      return;
+   } else if (n == 0 || lists == NULL) {
+      /* nothing to do */
+      return;
+   }
+
    /* Save the CompileFlag status, turn it off, execute display list,
     * and restore the CompileFlag.
     */

From 0193e20df531039e89de089bdb33abd4e2095e19 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Feb 2016 17:50:23 -0700
Subject: [PATCH 62/94] mesa: rewrite save_CallLists() code

When glCallLists() is compiled into a display list, preserve the call
as a single glCallLists rather than 'n' glCallList calls.  This will
matter for an upcoming display list optimization project.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/main/dlist.c | 61 +++++++++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 26 deletions(-)

diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 65f092936b3..fb31d2f2706 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -194,7 +194,7 @@ typedef enum
    OPCODE_BLEND_FUNC_SEPARATE_I,
 
    OPCODE_CALL_LIST,
-   OPCODE_CALL_LIST_OFFSET,
+   OPCODE_CALL_LISTS,
    OPCODE_CLEAR,
    OPCODE_CLEAR_ACCUM,
    OPCODE_CLEAR_COLOR,
@@ -706,6 +706,10 @@ _mesa_delete_list(struct gl_context *ctx, struct gl_display_list *dlist)
             free(get_pointer(&n[10]));
             n += InstSize[n[0].opcode];
             break;
+         case OPCODE_CALL_LISTS:
+            free(get_pointer(&n[3]));
+            n += InstSize[n[0].opcode];
+            break;
          case OPCODE_DRAW_PIXELS:
             free(get_pointer(&n[5]));
             n += InstSize[n[0].opcode];
@@ -1569,37 +1573,49 @@ static void GLAPIENTRY
 save_CallLists(GLsizei num, GLenum type, const GLvoid * lists)
 {
    GET_CURRENT_CONTEXT(ctx);
-   GLint i;
-   GLboolean typeErrorFlag;
+   unsigned type_size;
+   Node *n;
+   void *lists_copy;
 
    SAVE_FLUSH_VERTICES(ctx);
 
    switch (type) {
    case GL_BYTE:
    case GL_UNSIGNED_BYTE:
+      type_size = 1;
+      break;
    case GL_SHORT:
    case GL_UNSIGNED_SHORT:
+   case GL_2_BYTES:
+      type_size = 2;
+      break;
+   case GL_3_BYTES:
+      type_size = 3;
+      break;
    case GL_INT:
    case GL_UNSIGNED_INT:
    case GL_FLOAT:
-   case GL_2_BYTES:
-   case GL_3_BYTES:
    case GL_4_BYTES:
-      typeErrorFlag = GL_FALSE;
+      type_size = 4;
       break;
    default:
-      typeErrorFlag = GL_TRUE;
+      type_size = 0;
    }
 
-   for (i = 0; i < num; i++) {
-      GLint list = translate_id(i, type, lists);
-      Node *n = alloc_instruction(ctx, OPCODE_CALL_LIST_OFFSET, 2);
-      if (n) {
-         n[1].i = list;
-         n[2].b = typeErrorFlag;
-      }
+   if (num > 0 && type_size > 0) {
+      /* create a copy of the array of list IDs to save in the display list */
+      lists_copy = memdup(lists, num * type_size);
+   } else {
+      lists_copy = NULL;
    }
 
+   n = alloc_instruction(ctx, OPCODE_CALL_LISTS, 2 + POINTER_DWORDS);
+   if (n) {
+      n[1].i = num;
+      n[2].e = type;
+      save_pointer(&n[3], lists_copy);
+   };
+
    /* After this, we don't know what state we're in.  Invalidate all
     * cached information previously gathered:
     */
@@ -7772,15 +7788,9 @@ execute_list(struct gl_context *ctx, GLuint list)
                execute_list(ctx, n[1].ui);
             }
             break;
-         case OPCODE_CALL_LIST_OFFSET:
-            /* Generated by glCallLists() so we must add ListBase */
-            if (n[2].b) {
-               /* user specified a bad data type at compile time */
-               _mesa_error(ctx, GL_INVALID_ENUM, "glCallLists(type)");
-            }
-            else if (ctx->ListState.CallDepth < MAX_LIST_NESTING) {
-               GLuint list = (GLuint) (ctx->List.ListBase + n[1].i);
-               execute_list(ctx, list);
+         case OPCODE_CALL_LISTS:
+            if (ctx->ListState.CallDepth < MAX_LIST_NESTING) {
+               CALL_CallLists(ctx->Exec, (n[1].i, n[2].e, get_pointer(&n[3])));
             }
             break;
          case OPCODE_CLEAR:
@@ -9736,9 +9746,8 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname)
          case OPCODE_CALL_LIST:
             fprintf(f, "CallList %d\n", (int) n[1].ui);
             break;
-         case OPCODE_CALL_LIST_OFFSET:
-            fprintf(f, "CallList %d + offset %u = %u\n", (int) n[1].ui,
-                         ctx->List.ListBase, ctx->List.ListBase + n[1].ui);
+         case OPCODE_CALL_LISTS:
+            fprintf(f, "CallLists %d, %s\n", n[1].i, enum_string(n[1].e));
             break;
          case OPCODE_DISABLE:
             fprintf(f, "Disable %s\n", enum_string(n[1].e));

From fe14110f359b0665cb0c09aa14f13a5ebb33b1bc Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 9 Feb 2016 09:58:39 -0700
Subject: [PATCH 63/94] mesa: fix incorrect viewport position when
 GL_CLIP_ORIGIN = GL_LOWER_LEFT

Ilia Mirkin found/fixed the mistake.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93813
Cc: "11.1" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/mesa/main/viewport.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/viewport.c b/src/mesa/main/viewport.c
index 7d8914291c3..681e46bfcf7 100644
--- a/src/mesa/main/viewport.c
+++ b/src/mesa/main/viewport.c
@@ -456,11 +456,11 @@ _mesa_get_viewport_xform(struct gl_context *ctx, unsigned i,
    translate[0] = half_width + x;
    if (ctx->Transform.ClipOrigin == GL_UPPER_LEFT) {
       scale[1] = -half_height;
-      translate[1] = half_height - y;
    } else {
       scale[1] = half_height;
-      translate[1] = half_height + y;
    }
+   translate[1] = half_height + y;
+
    if (ctx->Transform.ClipDepthMode == GL_NEGATIVE_ONE_TO_ONE) {
       scale[2] = 0.5 * (f - n);
       translate[2] = 0.5 * (n + f);

From b8d31fdedfa3b2f060b42e7145771c5325340733 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 30 Jan 2016 15:10:11 +0100
Subject: [PATCH 64/94] st/mesa: unify variants and delete functions for TCS,
 TES, GS

no difference between those

Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
---
 src/mesa/state_tracker/st_atom_shader.c |   6 +-
 src/mesa/state_tracker/st_cb_program.c  |  18 +-
 src/mesa/state_tracker/st_context.h     |   6 +-
 src/mesa/state_tracker/st_program.c     | 208 +++++++++---------------
 src/mesa/state_tracker/st_program.h     |  88 ++--------
 5 files changed, 110 insertions(+), 216 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c
index 0f9ea101889..2d8a3c3fb57 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -163,7 +163,7 @@ static void
 update_gp( struct st_context *st )
 {
    struct st_geometry_program *stgp;
-   struct st_gp_variant_key key;
+   struct st_basic_variant_key key;
 
    if (!st->ctx->GeometryProgram._Current) {
       cso_set_geometry_shader_handle(st->cso_context, NULL);
@@ -199,7 +199,7 @@ static void
 update_tcp( struct st_context *st )
 {
    struct st_tessctrl_program *sttcp;
-   struct st_tcp_variant_key key;
+   struct st_basic_variant_key key;
 
    if (!st->ctx->TessCtrlProgram._Current) {
       cso_set_tessctrl_shader_handle(st->cso_context, NULL);
@@ -235,7 +235,7 @@ static void
 update_tep( struct st_context *st )
 {
    struct st_tesseval_program *sttep;
-   struct st_tep_variant_key key;
+   struct st_basic_variant_key key;
 
    if (!st->ctx->TessEvalProgram._Current) {
       cso_set_tesseval_shader_handle(st->cso_context, NULL);
diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c
index 2c4eccf1e06..6f9c53efc40 100644
--- a/src/mesa/state_tracker/st_cb_program.c
+++ b/src/mesa/state_tracker/st_cb_program.c
@@ -153,7 +153,8 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog)
          struct st_geometry_program *stgp =
             (struct st_geometry_program *) prog;
 
-         st_release_gp_variants(st, stgp);
+         st_release_basic_variants(st, stgp->Base.Base.Target,
+                                   &stgp->variants, &stgp->tgsi);
          
          if (stgp->glsl_to_tgsi)
             free_glsl_to_tgsi_visitor(stgp->glsl_to_tgsi);
@@ -175,7 +176,8 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog)
          struct st_tessctrl_program *sttcp =
             (struct st_tessctrl_program *) prog;
 
-         st_release_tcp_variants(st, sttcp);
+         st_release_basic_variants(st, sttcp->Base.Base.Target,
+                                   &sttcp->variants, &sttcp->tgsi);
 
          if (sttcp->glsl_to_tgsi)
             free_glsl_to_tgsi_visitor(sttcp->glsl_to_tgsi);
@@ -186,7 +188,8 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog)
          struct st_tesseval_program *sttep =
             (struct st_tesseval_program *) prog;
 
-         st_release_tep_variants(st, sttep);
+         st_release_basic_variants(st, sttep->Base.Base.Target,
+                                   &sttep->variants, &sttep->tgsi);
 
          if (sttep->glsl_to_tgsi)
             free_glsl_to_tgsi_visitor(sttep->glsl_to_tgsi);
@@ -239,7 +242,8 @@ st_program_string_notify( struct gl_context *ctx,
    else if (target == GL_GEOMETRY_PROGRAM_NV) {
       struct st_geometry_program *stgp = (struct st_geometry_program *) prog;
 
-      st_release_gp_variants(st, stgp);
+      st_release_basic_variants(st, stgp->Base.Base.Target,
+                                &stgp->variants, &stgp->tgsi);
       if (!st_translate_geometry_program(st, stgp))
          return false;
 
@@ -260,7 +264,8 @@ st_program_string_notify( struct gl_context *ctx,
       struct st_tessctrl_program *sttcp =
          (struct st_tessctrl_program *) prog;
 
-      st_release_tcp_variants(st, sttcp);
+      st_release_basic_variants(st, sttcp->Base.Base.Target,
+                                &sttcp->variants, &sttcp->tgsi);
       if (!st_translate_tessctrl_program(st, sttcp))
          return false;
 
@@ -271,7 +276,8 @@ st_program_string_notify( struct gl_context *ctx,
       struct st_tesseval_program *sttep =
          (struct st_tesseval_program *) prog;
 
-      st_release_tep_variants(st, sttep);
+      st_release_basic_variants(st, sttep->Base.Base.Target,
+                                &sttep->variants, &sttep->tgsi);
       if (!st_translate_tesseval_program(st, sttep))
          return false;
 
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index 57076ad0d18..352e795d06a 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -168,9 +168,9 @@ struct st_context
 
    struct st_vp_variant *vp_variant;
    struct st_fp_variant *fp_variant;
-   struct st_gp_variant *gp_variant;
-   struct st_tcp_variant *tcp_variant;
-   struct st_tep_variant *tep_variant;
+   struct st_basic_variant *gp_variant;
+   struct st_basic_variant *tcp_variant;
+   struct st_basic_variant *tep_variant;
 
    struct gl_texture_object *default_texture;
 
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index b3954547418..133869bc8c4 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -140,112 +140,54 @@ st_release_fp_variants(struct st_context *st, struct st_fragment_program *stfp)
 
 
 /**
- * Delete a geometry program variant.  Note the caller must unlink
+ * Delete a basic program variant.  Note the caller must unlink
  * the variant from the linked list.
  */
 static void
-delete_gp_variant(struct st_context *st, struct st_gp_variant *gpv)
+delete_basic_variant(struct st_context *st, struct st_basic_variant *v,
+                     GLenum target)
 {
-   if (gpv->driver_shader) 
-      cso_delete_geometry_shader(st->cso_context, gpv->driver_shader);
-      
-   free(gpv);
+   if (v->driver_shader) {
+      switch (target) {
+      case GL_TESS_CONTROL_PROGRAM_NV:
+         cso_delete_tessctrl_shader(st->cso_context, v->driver_shader);
+         break;
+      case GL_TESS_EVALUATION_PROGRAM_NV:
+         cso_delete_tesseval_shader(st->cso_context, v->driver_shader);
+         break;
+      case GL_GEOMETRY_PROGRAM_NV:
+         cso_delete_geometry_shader(st->cso_context, v->driver_shader);
+         break;
+      default:
+         assert(!"this shouldn't occur");
+      }
+   }
+
+   free(v);
 }
 
 
 /**
- * Free all variants of a geometry program.
+ * Free all basic program variants.
  */
 void
-st_release_gp_variants(struct st_context *st, struct st_geometry_program *stgp)
+st_release_basic_variants(struct st_context *st, GLenum target,
+                          struct st_basic_variant **variants,
+                          struct pipe_shader_state *tgsi)
 {
-   struct st_gp_variant *gpv;
+   struct st_basic_variant *v;
 
-   for (gpv = stgp->variants; gpv; ) {
-      struct st_gp_variant *next = gpv->next;
-      delete_gp_variant(st, gpv);
-      gpv = next;
+   for (v = *variants; v; ) {
+      struct st_basic_variant *next = v->next;
+      delete_basic_variant(st, v, target);
+      v = next;
    }
 
-   stgp->variants = NULL;
+   *variants = NULL;
 
-   if (stgp->tgsi.tokens) {
-      ureg_free_tokens(stgp->tgsi.tokens);
-      stgp->tgsi.tokens = NULL;
-   }
-}
-
-
-/**
- * Delete a tessellation control program variant.  Note the caller must unlink
- * the variant from the linked list.
- */
-static void
-delete_tcp_variant(struct st_context *st, struct st_tcp_variant *tcpv)
-{
-   if (tcpv->driver_shader)
-      cso_delete_tessctrl_shader(st->cso_context, tcpv->driver_shader);
-
-   free(tcpv);
-}
-
-
-/**
- * Free all variants of a tessellation control program.
- */
-void
-st_release_tcp_variants(struct st_context *st, struct st_tessctrl_program *sttcp)
-{
-   struct st_tcp_variant *tcpv;
-
-   for (tcpv = sttcp->variants; tcpv; ) {
-      struct st_tcp_variant *next = tcpv->next;
-      delete_tcp_variant(st, tcpv);
-      tcpv = next;
-   }
-
-   sttcp->variants = NULL;
-
-   if (sttcp->tgsi.tokens) {
-      ureg_free_tokens(sttcp->tgsi.tokens);
-      sttcp->tgsi.tokens = NULL;
-   }
-}
-
-
-/**
- * Delete a tessellation evaluation program variant.  Note the caller must
- * unlink the variant from the linked list.
- */
-static void
-delete_tep_variant(struct st_context *st, struct st_tep_variant *tepv)
-{
-   if (tepv->driver_shader)
-      cso_delete_tesseval_shader(st->cso_context, tepv->driver_shader);
-
-   free(tepv);
-}
-
-
-/**
- * Free all variants of a tessellation evaluation program.
- */
-void
-st_release_tep_variants(struct st_context *st, struct st_tesseval_program *sttep)
-{
-   struct st_tep_variant *tepv;
-
-   for (tepv = sttep->variants; tepv; ) {
-      struct st_tep_variant *next = tepv->next;
-      delete_tep_variant(st, tepv);
-      tepv = next;
-   }
-
-   sttep->variants = NULL;
-
-   if (sttep->tgsi.tokens) {
-      ureg_free_tokens(sttep->tgsi.tokens);
-      sttep->tgsi.tokens = NULL;
+   if (tgsi->tokens) {
+      ureg_free_tokens(tgsi->tokens);
+      tgsi->tokens = NULL;
    }
 }
 
@@ -1324,15 +1266,15 @@ st_translate_geometry_program(struct st_context *st,
 }
 
 
-static struct st_gp_variant *
+static struct st_basic_variant *
 st_create_gp_variant(struct st_context *st,
                      struct st_geometry_program *stgp,
-                     const struct st_gp_variant_key *key)
+                     const struct st_basic_variant_key *key)
 {
    struct pipe_context *pipe = st->pipe;
-   struct st_gp_variant *gpv;
+   struct st_basic_variant *gpv;
 
-   gpv = CALLOC_STRUCT(st_gp_variant);
+   gpv = CALLOC_STRUCT(st_basic_variant);
    if (!gpv)
       return NULL;
 
@@ -1346,12 +1288,12 @@ st_create_gp_variant(struct st_context *st,
 /**
  * Get/create geometry program variant.
  */
-struct st_gp_variant *
+struct st_basic_variant *
 st_get_gp_variant(struct st_context *st,
                   struct st_geometry_program *stgp,
-                  const struct st_gp_variant_key *key)
+                  const struct st_basic_variant_key *key)
 {
-   struct st_gp_variant *gpv;
+   struct st_basic_variant *gpv;
 
    /* Search for existing variant */
    for (gpv = stgp->variants; gpv; gpv = gpv->next) {
@@ -1399,15 +1341,15 @@ st_translate_tessctrl_program(struct st_context *st,
 }
 
 
-static struct st_tcp_variant *
+static struct st_basic_variant *
 st_create_tcp_variant(struct st_context *st,
                       struct st_tessctrl_program *sttcp,
-                      const struct st_tcp_variant_key *key)
+                      const struct st_basic_variant_key *key)
 {
    struct pipe_context *pipe = st->pipe;
-   struct st_tcp_variant *tcpv;
+   struct st_basic_variant *tcpv;
 
-   tcpv = CALLOC_STRUCT(st_tcp_variant);
+   tcpv = CALLOC_STRUCT(st_basic_variant);
    if (!tcpv)
       return NULL;
 
@@ -1421,12 +1363,12 @@ st_create_tcp_variant(struct st_context *st,
 /**
  * Get/create tessellation control program variant.
  */
-struct st_tcp_variant *
+struct st_basic_variant *
 st_get_tcp_variant(struct st_context *st,
                   struct st_tessctrl_program *sttcp,
-                  const struct st_tcp_variant_key *key)
+                  const struct st_basic_variant_key *key)
 {
-   struct st_tcp_variant *tcpv;
+   struct st_basic_variant *tcpv;
 
    /* Search for existing variant */
    for (tcpv = sttcp->variants; tcpv; tcpv = tcpv->next) {
@@ -1496,15 +1438,15 @@ st_translate_tesseval_program(struct st_context *st,
 }
 
 
-static struct st_tep_variant *
+static struct st_basic_variant *
 st_create_tep_variant(struct st_context *st,
                       struct st_tesseval_program *sttep,
-                      const struct st_tep_variant_key *key)
+                      const struct st_basic_variant_key *key)
 {
    struct pipe_context *pipe = st->pipe;
-   struct st_tep_variant *tepv;
+   struct st_basic_variant *tepv;
 
-   tepv = CALLOC_STRUCT(st_tep_variant);
+   tepv = CALLOC_STRUCT(st_basic_variant);
    if (!tepv)
       return NULL;
 
@@ -1518,12 +1460,12 @@ st_create_tep_variant(struct st_context *st,
 /**
  * Get/create tessellation evaluation program variant.
  */
-struct st_tep_variant *
+struct st_basic_variant *
 st_get_tep_variant(struct st_context *st,
                   struct st_tesseval_program *sttep,
-                  const struct st_tep_variant_key *key)
+                  const struct st_basic_variant_key *key)
 {
-   struct st_tep_variant *tepv;
+   struct st_basic_variant *tepv;
 
    /* Search for existing variant */
    for (tepv = sttep->variants; tepv; tepv = tepv->next) {
@@ -1551,15 +1493,15 @@ st_get_tep_variant(struct st_context *st,
  * variants attached to the given program which match the given context.
  */
 static void
-destroy_program_variants(struct st_context *st, struct gl_program *program)
+destroy_program_variants(struct st_context *st, struct gl_program *target)
 {
-   if (!program || program == &_mesa_DummyProgram)
+   if (!target || target == &_mesa_DummyProgram)
       return;
 
-   switch (program->Target) {
+   switch (target->Target) {
    case GL_VERTEX_PROGRAM_ARB:
       {
-         struct st_vertex_program *stvp = (struct st_vertex_program *) program;
+         struct st_vertex_program *stvp = (struct st_vertex_program *) target;
          struct st_vp_variant *vpv, **prevPtr = &stvp->variants;
 
          for (vpv = stvp->variants; vpv; ) {
@@ -1580,7 +1522,7 @@ destroy_program_variants(struct st_context *st, struct gl_program *program)
    case GL_FRAGMENT_PROGRAM_ARB:
       {
          struct st_fragment_program *stfp =
-            (struct st_fragment_program *) program;
+            (struct st_fragment_program *) target;
          struct st_fp_variant *fpv, **prevPtr = &stfp->variants;
 
          for (fpv = stfp->variants; fpv; ) {
@@ -1601,16 +1543,16 @@ destroy_program_variants(struct st_context *st, struct gl_program *program)
    case GL_GEOMETRY_PROGRAM_NV:
       {
          struct st_geometry_program *stgp =
-            (struct st_geometry_program *) program;
-         struct st_gp_variant *gpv, **prevPtr = &stgp->variants;
+            (struct st_geometry_program *) target;
+         struct st_basic_variant *gpv, **prevPtr = &stgp->variants;
 
          for (gpv = stgp->variants; gpv; ) {
-            struct st_gp_variant *next = gpv->next;
+            struct st_basic_variant *next = gpv->next;
             if (gpv->key.st == st) {
                /* unlink from list */
                *prevPtr = next;
                /* destroy this variant */
-               delete_gp_variant(st, gpv);
+               delete_basic_variant(st, gpv, stgp->Base.Base.Target);
             }
             else {
                prevPtr = &gpv->next;
@@ -1622,16 +1564,16 @@ destroy_program_variants(struct st_context *st, struct gl_program *program)
    case GL_TESS_CONTROL_PROGRAM_NV:
       {
          struct st_tessctrl_program *sttcp =
-            (struct st_tessctrl_program *) program;
-         struct st_tcp_variant *tcpv, **prevPtr = &sttcp->variants;
+            (struct st_tessctrl_program *) target;
+         struct st_basic_variant *tcpv, **prevPtr = &sttcp->variants;
 
          for (tcpv = sttcp->variants; tcpv; ) {
-            struct st_tcp_variant *next = tcpv->next;
+            struct st_basic_variant *next = tcpv->next;
             if (tcpv->key.st == st) {
                /* unlink from list */
                *prevPtr = next;
                /* destroy this variant */
-               delete_tcp_variant(st, tcpv);
+               delete_basic_variant(st, tcpv, sttcp->Base.Base.Target);
             }
             else {
                prevPtr = &tcpv->next;
@@ -1643,16 +1585,16 @@ destroy_program_variants(struct st_context *st, struct gl_program *program)
    case GL_TESS_EVALUATION_PROGRAM_NV:
       {
          struct st_tesseval_program *sttep =
-            (struct st_tesseval_program *) program;
-         struct st_tep_variant *tepv, **prevPtr = &sttep->variants;
+            (struct st_tesseval_program *) target;
+         struct st_basic_variant *tepv, **prevPtr = &sttep->variants;
 
          for (tepv = sttep->variants; tepv; ) {
-            struct st_tep_variant *next = tepv->next;
+            struct st_basic_variant *next = tepv->next;
             if (tepv->key.st == st) {
                /* unlink from list */
                *prevPtr = next;
                /* destroy this variant */
-               delete_tep_variant(st, tepv);
+               delete_basic_variant(st, tepv, sttep->Base.Base.Target);
             }
             else {
                prevPtr = &tepv->next;
@@ -1663,7 +1605,7 @@ destroy_program_variants(struct st_context *st, struct gl_program *program)
       break;
    default:
       _mesa_problem(NULL, "Unexpected program target 0x%x in "
-                    "destroy_program_variants_cb()", program->Target);
+                    "destroy_program_variants_cb()", target->Target);
    }
 }
 
@@ -1789,7 +1731,7 @@ st_precompile_shader_variant(struct st_context *st,
 
    case GL_TESS_CONTROL_PROGRAM_NV: {
       struct st_tessctrl_program *p = (struct st_tessctrl_program *)prog;
-      struct st_tcp_variant_key key;
+      struct st_basic_variant_key key;
 
       memset(&key, 0, sizeof(key));
       key.st = st->has_shareable_shaders ? NULL : st;
@@ -1799,7 +1741,7 @@ st_precompile_shader_variant(struct st_context *st,
 
    case GL_TESS_EVALUATION_PROGRAM_NV: {
       struct st_tesseval_program *p = (struct st_tesseval_program *)prog;
-      struct st_tep_variant_key key;
+      struct st_basic_variant_key key;
 
       memset(&key, 0, sizeof(key));
       key.st = st->has_shareable_shaders ? NULL : st;
@@ -1809,7 +1751,7 @@ st_precompile_shader_variant(struct st_context *st,
 
    case GL_GEOMETRY_PROGRAM_NV: {
       struct st_geometry_program *p = (struct st_geometry_program *)prog;
-      struct st_gp_variant_key key;
+      struct st_basic_variant_key key;
 
       memset(&key, 0, sizeof(key));
       key.st = st->has_shareable_shaders ? NULL : st;
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index a74531581b4..80d6f81d267 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -171,25 +171,24 @@ struct st_vertex_program
 
 
 
-/** Geometry program variant key */
-struct st_gp_variant_key
+/** Key shared by all shaders except VP, FP */
+struct st_basic_variant_key
 {
    struct st_context *st;          /**< variants are per-context */
-   /* no other fields yet */
 };
 
 
 /**
  * Geometry program variant.
  */
-struct st_gp_variant
+struct st_basic_variant
 {
    /* Parameters which generated this variant. */
-   struct st_gp_variant_key key;
+   struct st_basic_variant_key key;
 
    void *driver_shader;
 
-   struct st_gp_variant *next;
+   struct st_basic_variant *next;
 };
 
 
@@ -202,30 +201,7 @@ struct st_geometry_program
    struct pipe_shader_state tgsi;
    struct glsl_to_tgsi_visitor* glsl_to_tgsi;
 
-   struct st_gp_variant *variants;
-};
-
-
-
-/** Tessellation control program variant key */
-struct st_tcp_variant_key
-{
-   struct st_context *st;          /**< variants are per-context */
-   /* no other fields yet */
-};
-
-
-/**
- * Tessellation control program variant.
- */
-struct st_tcp_variant
-{
-   /* Parameters which generated this variant. */
-   struct st_tcp_variant_key key;
-
-   void *driver_shader;
-
-   struct st_tcp_variant *next;
+   struct st_basic_variant *variants;
 };
 
 
@@ -238,30 +214,7 @@ struct st_tessctrl_program
    struct pipe_shader_state tgsi;
    struct glsl_to_tgsi_visitor* glsl_to_tgsi;
 
-   struct st_tcp_variant *variants;
-};
-
-
-
-/** Tessellation evaluation program variant key */
-struct st_tep_variant_key
-{
-   struct st_context *st;          /**< variants are per-context */
-   /* no other fields yet */
-};
-
-
-/**
- * Tessellation evaluation program variant.
- */
-struct st_tep_variant
-{
-   /* Parameters which generated this variant. */
-   struct st_tep_variant_key key;
-
-   void *driver_shader;
-
-   struct st_tep_variant *next;
+   struct st_basic_variant *variants;
 };
 
 
@@ -274,7 +227,7 @@ struct st_tesseval_program
    struct pipe_shader_state tgsi;
    struct glsl_to_tgsi_visitor* glsl_to_tgsi;
 
-   struct st_tep_variant *variants;
+   struct st_basic_variant *variants;
 };
 
 
@@ -398,20 +351,20 @@ st_get_fp_variant(struct st_context *st,
                   const struct st_fp_variant_key *key);
 
 
-extern struct st_gp_variant *
+extern struct st_basic_variant *
 st_get_gp_variant(struct st_context *st,
                   struct st_geometry_program *stgp,
-                  const struct st_gp_variant_key *key);
+                  const struct st_basic_variant_key *key);
 
-extern struct st_tcp_variant *
+extern struct st_basic_variant *
 st_get_tcp_variant(struct st_context *st,
                    struct st_tessctrl_program *sttcp,
-                   const struct st_tcp_variant_key *key);
+                   const struct st_basic_variant_key *key);
 
-extern struct st_tep_variant *
+extern struct st_basic_variant *
 st_get_tep_variant(struct st_context *st,
                    struct st_tesseval_program *sttep,
-                   const struct st_tep_variant_key *key);
+                   const struct st_basic_variant_key *key);
 
 extern void
 st_release_vp_variants( struct st_context *st,
@@ -422,16 +375,9 @@ st_release_fp_variants( struct st_context *st,
                         struct st_fragment_program *stfp );
 
 extern void
-st_release_gp_variants(struct st_context *st,
-                       struct st_geometry_program *stgp);
-
-extern void
-st_release_tcp_variants(struct st_context *st,
-                        struct st_tessctrl_program *sttcp);
-
-extern void
-st_release_tep_variants(struct st_context *st,
-                        struct st_tesseval_program *sttep);
+st_release_basic_variants(struct st_context *st, GLenum target,
+                          struct st_basic_variant **variants,
+                          struct pipe_shader_state *tgsi);
 
 extern void
 st_destroy_program_variants(struct st_context *st);

From 75be3ee9f9f7078a415498547637365035650cf9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 30 Jan 2016 16:24:29 +0100
Subject: [PATCH 65/94] st/mesa: unify get_variant functions for TCS, TES, GS

Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
---
 src/mesa/state_tracker/st_atom_shader.c |  18 +--
 src/mesa/state_tracker/st_program.c     | 172 ++++--------------------
 src/mesa/state_tracker/st_program.h     |  17 +--
 3 files changed, 31 insertions(+), 176 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c
index 2d8a3c3fb57..23b7abfc1c5 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -163,7 +163,6 @@ static void
 update_gp( struct st_context *st )
 {
    struct st_geometry_program *stgp;
-   struct st_basic_variant_key key;
 
    if (!st->ctx->GeometryProgram._Current) {
       cso_set_geometry_shader_handle(st->cso_context, NULL);
@@ -173,10 +172,7 @@ update_gp( struct st_context *st )
    stgp = st_geometry_program(st->ctx->GeometryProgram._Current);
    assert(stgp->Base.Base.Target == GL_GEOMETRY_PROGRAM_NV);
 
-   memset(&key, 0, sizeof(key));
-   key.st = st->has_shareable_shaders ? NULL : st;
-
-   st->gp_variant = st_get_gp_variant(st, stgp, &key);
+   st->gp_variant = st_get_basic_variant(st, &stgp->tgsi, &stgp->variants);
 
    st_reference_geomprog(st, &st->gp, stgp);
 
@@ -199,7 +195,6 @@ static void
 update_tcp( struct st_context *st )
 {
    struct st_tessctrl_program *sttcp;
-   struct st_basic_variant_key key;
 
    if (!st->ctx->TessCtrlProgram._Current) {
       cso_set_tessctrl_shader_handle(st->cso_context, NULL);
@@ -209,10 +204,7 @@ update_tcp( struct st_context *st )
    sttcp = st_tessctrl_program(st->ctx->TessCtrlProgram._Current);
    assert(sttcp->Base.Base.Target == GL_TESS_CONTROL_PROGRAM_NV);
 
-   memset(&key, 0, sizeof(key));
-   key.st = st->has_shareable_shaders ? NULL : st;
-
-   st->tcp_variant = st_get_tcp_variant(st, sttcp, &key);
+   st->tcp_variant = st_get_basic_variant(st, &sttcp->tgsi, &sttcp->variants);
 
    st_reference_tesscprog(st, &st->tcp, sttcp);
 
@@ -235,7 +227,6 @@ static void
 update_tep( struct st_context *st )
 {
    struct st_tesseval_program *sttep;
-   struct st_basic_variant_key key;
 
    if (!st->ctx->TessEvalProgram._Current) {
       cso_set_tesseval_shader_handle(st->cso_context, NULL);
@@ -245,10 +236,7 @@ update_tep( struct st_context *st )
    sttep = st_tesseval_program(st->ctx->TessEvalProgram._Current);
    assert(sttep->Base.Base.Target == GL_TESS_EVALUATION_PROGRAM_NV);
 
-   memset(&key, 0, sizeof(key));
-   key.st = st->has_shareable_shaders ? NULL : st;
-
-   st->tep_variant = st_get_tep_variant(st, sttep, &key);
+   st->tep_variant = st_get_basic_variant(st, &sttep->tgsi, &sttep->variants);
 
    st_reference_tesseprog(st, &st->tep, sttep);
 
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 133869bc8c4..d8b7f376a02 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -1266,53 +1266,43 @@ st_translate_geometry_program(struct st_context *st,
 }
 
 
-static struct st_basic_variant *
-st_create_gp_variant(struct st_context *st,
-                     struct st_geometry_program *stgp,
-                     const struct st_basic_variant_key *key)
-{
-   struct pipe_context *pipe = st->pipe;
-   struct st_basic_variant *gpv;
-
-   gpv = CALLOC_STRUCT(st_basic_variant);
-   if (!gpv)
-      return NULL;
-
-   /* fill in new variant */
-   gpv->driver_shader = pipe->create_gs_state(pipe, &stgp->tgsi);
-   gpv->key = *key;
-   return gpv;
-}
-
-
 /**
- * Get/create geometry program variant.
+ * Get/create a basic program variant.
  */
 struct st_basic_variant *
-st_get_gp_variant(struct st_context *st,
-                  struct st_geometry_program *stgp,
-                  const struct st_basic_variant_key *key)
+st_get_basic_variant(struct st_context *st,
+                     struct pipe_shader_state *tgsi,
+                     struct st_basic_variant **variants)
 {
-   struct st_basic_variant *gpv;
+   struct pipe_context *pipe = st->pipe;
+   struct st_basic_variant *v;
+   struct st_basic_variant_key key;
+
+   memset(&key, 0, sizeof(key));
+   key.st = st->has_shareable_shaders ? NULL : st;
 
    /* Search for existing variant */
-   for (gpv = stgp->variants; gpv; gpv = gpv->next) {
-      if (memcmp(&gpv->key, key, sizeof(*key)) == 0) {
+   for (v = *variants; v; v = v->next) {
+      if (memcmp(&v->key, &key, sizeof(key)) == 0) {
          break;
       }
    }
 
-   if (!gpv) {
+   if (!v) {
       /* create new */
-      gpv = st_create_gp_variant(st, stgp, key);
-      if (gpv) {
+      v = CALLOC_STRUCT(st_basic_variant);
+      if (v) {
+         /* fill in new variant */
+         v->driver_shader = pipe->create_gs_state(pipe, tgsi);
+         v->key = key;
+
          /* insert into list */
-         gpv->next = stgp->variants;
-         stgp->variants = gpv;
+         v->next = *variants;
+         *variants = v;
       }
    }
 
-   return gpv;
+   return v;
 }
 
 
@@ -1341,56 +1331,6 @@ st_translate_tessctrl_program(struct st_context *st,
 }
 
 
-static struct st_basic_variant *
-st_create_tcp_variant(struct st_context *st,
-                      struct st_tessctrl_program *sttcp,
-                      const struct st_basic_variant_key *key)
-{
-   struct pipe_context *pipe = st->pipe;
-   struct st_basic_variant *tcpv;
-
-   tcpv = CALLOC_STRUCT(st_basic_variant);
-   if (!tcpv)
-      return NULL;
-
-   /* fill in new variant */
-   tcpv->driver_shader = pipe->create_tcs_state(pipe, &sttcp->tgsi);
-   tcpv->key = *key;
-   return tcpv;
-}
-
-
-/**
- * Get/create tessellation control program variant.
- */
-struct st_basic_variant *
-st_get_tcp_variant(struct st_context *st,
-                  struct st_tessctrl_program *sttcp,
-                  const struct st_basic_variant_key *key)
-{
-   struct st_basic_variant *tcpv;
-
-   /* Search for existing variant */
-   for (tcpv = sttcp->variants; tcpv; tcpv = tcpv->next) {
-      if (memcmp(&tcpv->key, key, sizeof(*key)) == 0) {
-         break;
-      }
-   }
-
-   if (!tcpv) {
-      /* create new */
-      tcpv = st_create_tcp_variant(st, sttcp, key);
-      if (tcpv) {
-         /* insert into list */
-         tcpv->next = sttcp->variants;
-         sttcp->variants = tcpv;
-      }
-   }
-
-   return tcpv;
-}
-
-
 /**
  * Translate a tessellation evaluation program to create a new variant.
  */
@@ -1438,56 +1378,6 @@ st_translate_tesseval_program(struct st_context *st,
 }
 
 
-static struct st_basic_variant *
-st_create_tep_variant(struct st_context *st,
-                      struct st_tesseval_program *sttep,
-                      const struct st_basic_variant_key *key)
-{
-   struct pipe_context *pipe = st->pipe;
-   struct st_basic_variant *tepv;
-
-   tepv = CALLOC_STRUCT(st_basic_variant);
-   if (!tepv)
-      return NULL;
-
-   /* fill in new variant */
-   tepv->driver_shader = pipe->create_tes_state(pipe, &sttep->tgsi);
-   tepv->key = *key;
-   return tepv;
-}
-
-
-/**
- * Get/create tessellation evaluation program variant.
- */
-struct st_basic_variant *
-st_get_tep_variant(struct st_context *st,
-                  struct st_tesseval_program *sttep,
-                  const struct st_basic_variant_key *key)
-{
-   struct st_basic_variant *tepv;
-
-   /* Search for existing variant */
-   for (tepv = sttep->variants; tepv; tepv = tepv->next) {
-      if (memcmp(&tepv->key, key, sizeof(*key)) == 0) {
-         break;
-      }
-   }
-
-   if (!tepv) {
-      /* create new */
-      tepv = st_create_tep_variant(st, sttep, key);
-      if (tepv) {
-         /* insert into list */
-         tepv->next = sttep->variants;
-         sttep->variants = tepv;
-      }
-   }
-
-   return tepv;
-}
-
-
 /**
  * Vert/Geom/Frag programs have per-context variants.  Free all the
  * variants attached to the given program which match the given context.
@@ -1731,31 +1621,19 @@ st_precompile_shader_variant(struct st_context *st,
 
    case GL_TESS_CONTROL_PROGRAM_NV: {
       struct st_tessctrl_program *p = (struct st_tessctrl_program *)prog;
-      struct st_basic_variant_key key;
-
-      memset(&key, 0, sizeof(key));
-      key.st = st->has_shareable_shaders ? NULL : st;
-      st_get_tcp_variant(st, p, &key);
+      st_get_basic_variant(st, &p->tgsi, &p->variants);
       break;
    }
 
    case GL_TESS_EVALUATION_PROGRAM_NV: {
       struct st_tesseval_program *p = (struct st_tesseval_program *)prog;
-      struct st_basic_variant_key key;
-
-      memset(&key, 0, sizeof(key));
-      key.st = st->has_shareable_shaders ? NULL : st;
-      st_get_tep_variant(st, p, &key);
+      st_get_basic_variant(st, &p->tgsi, &p->variants);
       break;
    }
 
    case GL_GEOMETRY_PROGRAM_NV: {
       struct st_geometry_program *p = (struct st_geometry_program *)prog;
-      struct st_basic_variant_key key;
-
-      memset(&key, 0, sizeof(key));
-      key.st = st->has_shareable_shaders ? NULL : st;
-      st_get_gp_variant(st, p, &key);
+      st_get_basic_variant(st, &p->tgsi, &p->variants);
       break;
    }
 
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index 80d6f81d267..7717d02cd3f 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -350,21 +350,10 @@ st_get_fp_variant(struct st_context *st,
                   struct st_fragment_program *stfp,
                   const struct st_fp_variant_key *key);
 
-
 extern struct st_basic_variant *
-st_get_gp_variant(struct st_context *st,
-                  struct st_geometry_program *stgp,
-                  const struct st_basic_variant_key *key);
-
-extern struct st_basic_variant *
-st_get_tcp_variant(struct st_context *st,
-                   struct st_tessctrl_program *sttcp,
-                   const struct st_basic_variant_key *key);
-
-extern struct st_basic_variant *
-st_get_tep_variant(struct st_context *st,
-                   struct st_tesseval_program *sttep,
-                   const struct st_basic_variant_key *key);
+st_get_basic_variant(struct st_context *st,
+                     struct pipe_shader_state *tgsi,
+                     struct st_basic_variant **variants);
 
 extern void
 st_release_vp_variants( struct st_context *st,

From 7046c588eb76a188b9b6b35d44bc9f25d6d2acd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 30 Jan 2016 16:34:20 +0100
Subject: [PATCH 66/94] st/mesa: unify destroy_program_variants cases for TCS,
 TES, GS

Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
---
 src/mesa/state_tracker/st_program.c | 64 +++++++----------------------
 1 file changed, 15 insertions(+), 49 deletions(-)

diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index d8b7f376a02..624586e6d67 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -1431,65 +1431,31 @@ destroy_program_variants(struct st_context *st, struct gl_program *target)
       }
       break;
    case GL_GEOMETRY_PROGRAM_NV:
-      {
-         struct st_geometry_program *stgp =
-            (struct st_geometry_program *) target;
-         struct st_basic_variant *gpv, **prevPtr = &stgp->variants;
-
-         for (gpv = stgp->variants; gpv; ) {
-            struct st_basic_variant *next = gpv->next;
-            if (gpv->key.st == st) {
-               /* unlink from list */
-               *prevPtr = next;
-               /* destroy this variant */
-               delete_basic_variant(st, gpv, stgp->Base.Base.Target);
-            }
-            else {
-               prevPtr = &gpv->next;
-            }
-            gpv = next;
-         }
-      }
-      break;
    case GL_TESS_CONTROL_PROGRAM_NV:
-      {
-         struct st_tessctrl_program *sttcp =
-            (struct st_tessctrl_program *) target;
-         struct st_basic_variant *tcpv, **prevPtr = &sttcp->variants;
-
-         for (tcpv = sttcp->variants; tcpv; ) {
-            struct st_basic_variant *next = tcpv->next;
-            if (tcpv->key.st == st) {
-               /* unlink from list */
-               *prevPtr = next;
-               /* destroy this variant */
-               delete_basic_variant(st, tcpv, sttcp->Base.Base.Target);
-            }
-            else {
-               prevPtr = &tcpv->next;
-            }
-            tcpv = next;
-         }
-      }
-      break;
    case GL_TESS_EVALUATION_PROGRAM_NV:
       {
-         struct st_tesseval_program *sttep =
-            (struct st_tesseval_program *) target;
-         struct st_basic_variant *tepv, **prevPtr = &sttep->variants;
+         struct st_geometry_program *gp = (struct st_geometry_program*)target;
+         struct st_tessctrl_program *tcp = (struct st_tessctrl_program*)target;
+         struct st_tesseval_program *tep = (struct st_tesseval_program*)target;
+         struct st_basic_variant **variants =
+            target->Target == GL_GEOMETRY_PROGRAM_NV ? &gp->variants :
+            target->Target == GL_TESS_CONTROL_PROGRAM_NV ? &tcp->variants :
+            target->Target == GL_TESS_EVALUATION_PROGRAM_NV ? &tep->variants :
+            NULL;
+         struct st_basic_variant *v, **prevPtr = variants;
 
-         for (tepv = sttep->variants; tepv; ) {
-            struct st_basic_variant *next = tepv->next;
-            if (tepv->key.st == st) {
+         for (v = *variants; v; ) {
+            struct st_basic_variant *next = v->next;
+            if (v->key.st == st) {
                /* unlink from list */
                *prevPtr = next;
                /* destroy this variant */
-               delete_basic_variant(st, tepv, sttep->Base.Base.Target);
+               delete_basic_variant(st, v, target->Target);
             }
             else {
-               prevPtr = &tepv->next;
+               prevPtr = &v->next;
             }
-            tepv = next;
+            v = next;
          }
       }
       break;

From a3e9a5f9f8b135b66b48e95a3abb938624d885ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 30 Jan 2016 16:43:53 +0100
Subject: [PATCH 67/94] st/mesa: remove st_is_program_native

The default scenario sets GL_TRUE too.

Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
---
 src/mesa/state_tracker/st_cb_program.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c
index 6f9c53efc40..ca493d84715 100644
--- a/src/mesa/state_tracker/st_cb_program.c
+++ b/src/mesa/state_tracker/st_cb_program.c
@@ -204,18 +204,6 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog)
 }
 
 
-/**
- * Called via ctx->Driver.IsProgramNative()
- */
-static GLboolean
-st_is_program_native(struct gl_context *ctx,
-                     GLenum target, 
-                     struct gl_program *prog)
-{
-   return GL_TRUE;
-}
-
-
 /**
  * Called via ctx->Driver.ProgramStringNotify()
  * Called when the program's text/code is changed.  We have to free
@@ -303,7 +291,6 @@ st_init_program_functions(struct dd_function_table *functions)
    functions->UseProgram = st_use_program;
    functions->NewProgram = st_new_program;
    functions->DeleteProgram = st_delete_program;
-   functions->IsProgramNative = st_is_program_native;
    functions->ProgramStringNotify = st_program_string_notify;
    
    functions->LinkShader = st_link_shader;

From 4bbbaaf191ac7324638f27d3a7309e6f3271a9a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sun, 27 Dec 2015 20:10:33 +0100
Subject: [PATCH 68/94] radeonsi: move si_shader_context initialization into a
 separate function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This will be re-used later.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c | 101 +++++++++++++----------
 1 file changed, 59 insertions(+), 42 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index d9ed6b234e0..d7f4f463340 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4250,47 +4250,26 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
 	}
 }
 
-int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
-		     struct si_shader *shader,
-		     struct pipe_debug_callback *debug)
+static void si_init_shader_ctx(struct si_shader_context *ctx,
+			       struct si_screen *sscreen,
+			       struct si_shader *shader,
+			       LLVMTargetMachineRef tm,
+			       struct tgsi_shader_info *info)
 {
-	struct si_shader_selector *sel = shader->selector;
-	struct tgsi_token *tokens = sel->tokens;
-	struct si_shader_context si_shader_ctx;
-	struct lp_build_tgsi_context * bld_base;
-	struct tgsi_shader_info stipple_shader_info;
-	LLVMModuleRef mod;
-	int r = 0;
-	bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
-			    shader->key.ps.poly_stipple;
-	bool dump = r600_can_dump_shader(&sscreen->b, sel->info.processor);
+	struct lp_build_tgsi_context *bld_base;
 
-	if (poly_stipple) {
-		tokens = util_pstipple_create_fragment_shader(tokens, NULL,
-						SI_POLY_STIPPLE_SAMPLER,
-						TGSI_FILE_SYSTEM_VALUE);
-		tgsi_scan_shader(tokens, &stipple_shader_info);
-	}
+	memset(ctx, 0, sizeof(*ctx));
+	radeon_llvm_context_init(&ctx->radeon_bld);
+	ctx->tm = tm;
+	ctx->screen = sscreen;
+	if (shader && shader->selector)
+		ctx->type = shader->selector->info.processor;
+	else
+		ctx->type = -1;
+	ctx->shader = shader;
 
-	/* Dump TGSI code before doing TGSI->LLVM conversion in case the
-	 * conversion fails. */
-	if (dump && !(sscreen->b.debug_flags & DBG_NO_TGSI)) {
-		si_dump_shader_key(sel->type, &shader->key, stderr);
-		tgsi_dump(tokens, 0);
-		si_dump_streamout(&sel->so);
-	}
-
-	assert(shader->nparam == 0);
-
-	memset(&si_shader_ctx, 0, sizeof(si_shader_ctx));
-	radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
-	bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
-
-	if (sel->type != PIPE_SHADER_COMPUTE)
-		shader->dx10_clamp_mode = true;
-
-	shader->uses_instanceid = sel->info.uses_instanceid;
-	bld_base->info = poly_stipple ? &stipple_shader_info : &sel->info;
+	bld_base = &ctx->radeon_bld.soa.bld_base;
+	bld_base->info = info;
 	bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
 
 	bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
@@ -4326,12 +4305,50 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 		bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
 		bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
 	}
+}
 
+int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
+		     struct si_shader *shader,
+		     struct pipe_debug_callback *debug)
+{
+	struct si_shader_selector *sel = shader->selector;
+	struct tgsi_token *tokens = sel->tokens;
+	struct si_shader_context si_shader_ctx;
+	struct lp_build_tgsi_context * bld_base;
+	struct tgsi_shader_info stipple_shader_info;
+	LLVMModuleRef mod;
+	int r = 0;
+	bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
+			    shader->key.ps.poly_stipple;
+	bool dump = r600_can_dump_shader(&sscreen->b, sel->info.processor);
+
+	if (poly_stipple) {
+		tokens = util_pstipple_create_fragment_shader(tokens, NULL,
+						SI_POLY_STIPPLE_SAMPLER,
+						TGSI_FILE_SYSTEM_VALUE);
+		tgsi_scan_shader(tokens, &stipple_shader_info);
+	}
+
+	/* Dump TGSI code before doing TGSI->LLVM conversion in case the
+	 * conversion fails. */
+	if (dump && !(sscreen->b.debug_flags & DBG_NO_TGSI)) {
+		si_dump_shader_key(sel->type, &shader->key, stderr);
+		tgsi_dump(tokens, 0);
+		si_dump_streamout(&sel->so);
+	}
+
+	assert(shader->nparam == 0);
+
+	si_init_shader_ctx(&si_shader_ctx, sscreen, shader, tm,
+			   poly_stipple ? &stipple_shader_info : &sel->info);
+
+	if (sel->type != PIPE_SHADER_COMPUTE)
+		shader->dx10_clamp_mode = true;
+
+	shader->uses_instanceid = sel->info.uses_instanceid;
+
+	bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
 	si_shader_ctx.radeon_bld.load_system_value = declare_system_value;
-	si_shader_ctx.shader = shader;
-	si_shader_ctx.type = tgsi_get_processor_type(tokens);
-	si_shader_ctx.screen = sscreen;
-	si_shader_ctx.tm = tm;
 
 	switch (si_shader_ctx.type) {
 	case TGSI_PROCESSOR_VERTEX:

From 90cbbe1c127500c22ecc509e37da7ee7c21c5f4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Fri, 1 Jan 2016 19:44:16 +0100
Subject: [PATCH 69/94] radeonsi: generate a color_two_side variant only if the
 shader reads colors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index bbef429edc5..8613af22d97 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -681,7 +681,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 				       sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES_ADJACENCY;
 			bool is_line = !is_poly && sctx->current_rast_prim != PIPE_PRIM_POINTS;
 
-			key->ps.color_two_side = rs->two_side;
+			key->ps.color_two_side = rs->two_side && sel->info.colors_read;
 
 			if (sctx->queued.named.blend) {
 				key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&

From 606e4185f331ad37e6c20bc1063bff7cb2420e29 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 2 Jan 2016 02:25:51 +0100
Subject: [PATCH 70/94] radeonsi: move SPI_PS_INPUT_CNTL value computation to a
 separate function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 .../drivers/radeonsi/si_state_shaders.c       | 74 ++++++++++---------
 1 file changed, 40 insertions(+), 34 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 8613af22d97..8243d2c881f 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1087,14 +1087,50 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
 	free(sel);
 }
 
+static unsigned si_get_ps_input_cntl(struct si_context *sctx,
+				     struct si_shader *vs, unsigned name,
+				     unsigned index, unsigned interpolate)
+{
+	struct tgsi_shader_info *vsinfo = &vs->selector->info;
+	unsigned j, ps_input_cntl = 0;
+
+	if (interpolate == TGSI_INTERPOLATE_CONSTANT ||
+	    (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade))
+		ps_input_cntl |= S_028644_FLAT_SHADE(1);
+
+	if (name == TGSI_SEMANTIC_PCOORD ||
+	    (name == TGSI_SEMANTIC_TEXCOORD &&
+	     sctx->sprite_coord_enable & (1 << index))) {
+		ps_input_cntl |= S_028644_PT_SPRITE_TEX(1);
+	}
+
+	for (j = 0; j < vsinfo->num_outputs; j++) {
+		if (name == vsinfo->output_semantic_name[j] &&
+		    index == vsinfo->output_semantic_index[j]) {
+			ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[j]);
+			break;
+		}
+	}
+
+	if (name == TGSI_SEMANTIC_PRIMID)
+		/* PrimID is written after the last output. */
+		ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]);
+	else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
+		/* No corresponding output found, load defaults into input.
+		 * Don't set any other bits.
+		 * (FLAT_SHADE=1 completely changes behavior) */
+		ps_input_cntl = S_028644_OFFSET(0x20);
+	}
+	return ps_input_cntl;
+}
+
 static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	struct si_shader *ps = sctx->ps_shader.current;
 	struct si_shader *vs = si_get_vs_state(sctx);
 	struct tgsi_shader_info *psinfo;
-	struct tgsi_shader_info *vsinfo = &vs->selector->info;
-	unsigned i, j, tmp, num_written = 0;
+	unsigned i, num_written = 0;
 
 	if (!ps || !ps->nparam)
 		return;
@@ -1109,38 +1145,8 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
 		unsigned interpolate = psinfo->input_interpolate[i];
 		unsigned param_offset = ps->ps_input_param_offset[i];
 bcolor:
-		tmp = 0;
-
-		if (interpolate == TGSI_INTERPOLATE_CONSTANT ||
-		    (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade))
-			tmp |= S_028644_FLAT_SHADE(1);
-
-		if (name == TGSI_SEMANTIC_PCOORD ||
-		    (name == TGSI_SEMANTIC_TEXCOORD &&
-		     sctx->sprite_coord_enable & (1 << index))) {
-			tmp |= S_028644_PT_SPRITE_TEX(1);
-		}
-
-		for (j = 0; j < vsinfo->num_outputs; j++) {
-			if (name == vsinfo->output_semantic_name[j] &&
-			    index == vsinfo->output_semantic_index[j]) {
-				tmp |= S_028644_OFFSET(vs->vs_output_param_offset[j]);
-				break;
-			}
-		}
-
-		if (name == TGSI_SEMANTIC_PRIMID)
-			/* PrimID is written after the last output. */
-			tmp |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]);
-		else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(tmp)) {
-			/* No corresponding output found, load defaults into input.
-			 * Don't set any other bits.
-			 * (FLAT_SHADE=1 completely changes behavior) */
-			tmp = S_028644_OFFSET(0x20);
-		}
-
-		assert(param_offset == num_written);
-		radeon_emit(cs, tmp);
+		radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, name, index,
+						     interpolate));
 		num_written++;
 
 		if (name == TGSI_SEMANTIC_COLOR &&

From 6dda2455c88a752d513d842cc9be1833fe98a89c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 2 Jan 2016 02:54:29 +0100
Subject: [PATCH 71/94] radeonsi: move BCOLOR PS input locations after all
 other inputs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BCOLOR inputs were immediately after COLOR inputs. Thus, all following inputs
were offset by 1 if color_two_side was enabled, and not offset if it was not
enabled, which is a variation that's problematic if we want to have 1 variant
per shader and the variant doesn't care about color_two_side (that should be
handled by other bytecode attached at the beginning).

Instead, move BCOLOR inputs after all other inputs, so BCOLOR0 is at location
"num_inputs" if it's present. BCOLOR1 is next.

This also allows removing si_shader::nparam and
si_shader::ps_input_param_offset, which are useless now.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c      | 25 ++++-----
 src/gallium/drivers/radeonsi/si_shader.h      |  2 -
 .../drivers/radeonsi/si_state_shaders.c       | 52 +++++++++++++------
 3 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index d7f4f463340..79255004f28 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -912,9 +912,7 @@ static void declare_input_fs(
 
 	unsigned chan;
 
-	shader->ps_input_param_offset[input_index] = shader->nparam++;
-	attr_number = lp_build_const_int32(gallivm,
-					   shader->ps_input_param_offset[input_index]);
+	attr_number = lp_build_const_int32(gallivm, input_index);
 
 	shader->ps_input_interpolate[input_index] = decl->Interp.Interpolate;
 	interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
@@ -938,11 +936,19 @@ static void declare_input_fs(
 
 	if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
 	    si_shader_ctx->shader->key.ps.color_two_side) {
+		struct tgsi_shader_info *info = &shader->selector->info;
 		LLVMValueRef args[4];
 		LLVMValueRef face, is_face_positive;
-		LLVMValueRef back_attr_number =
-			lp_build_const_int32(gallivm,
-					     shader->ps_input_param_offset[input_index] + 1);
+		LLVMValueRef back_attr_number;
+
+		/* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1",
+		 * otherwise it's at offset "num_inputs".
+		 */
+		unsigned back_attr_offset = shader->selector->info.num_inputs;
+		if (decl->Semantic.Index == 1 && info->colors_read & 0xf)
+			back_attr_offset += 1;
+
+		back_attr_number = lp_build_const_int32(gallivm, back_attr_offset);
 
 		face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE);
 
@@ -974,8 +980,6 @@ static void declare_input_fs(
 						back,
 						"");
 		}
-
-		shader->nparam++;
 	} else if (decl->Semantic.Name == TGSI_SEMANTIC_FOG) {
 		LLVMValueRef args[4];
 
@@ -3280,8 +3284,7 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
 	else
 		interp_param = NULL;
 
-	attr_number = lp_build_const_int32(gallivm,
-					   shader->ps_input_param_offset[input_index]);
+	attr_number = lp_build_const_int32(gallivm, input_index);
 
 	if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
 	    inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
@@ -4337,8 +4340,6 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 		si_dump_streamout(&sel->so);
 	}
 
-	assert(shader->nparam == 0);
-
 	si_init_shader_ctx(&si_shader_ctx, sscreen, shader, tm,
 			   poly_stipple ? &stipple_shader_info : &sel->info);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 98bdb890a45..86d8f725cb6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -290,9 +290,7 @@ struct si_shader {
 	struct radeon_shader_binary	binary;
 	struct si_shader_config		config;
 
-	unsigned		nparam;
 	unsigned		vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS];
-	unsigned		ps_input_param_offset[PIPE_MAX_SHADER_INPUTS];
 	unsigned		ps_input_interpolate[PIPE_MAX_SHADER_INPUTS];
 	bool			uses_instanceid;
 	unsigned		nr_pos_exports;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 8243d2c881f..6e7311807dd 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -404,6 +404,18 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
 		si_set_tesseval_regs(shader, pm4);
 }
 
+static unsigned si_get_ps_num_interp(struct si_shader *ps)
+{
+	struct tgsi_shader_info *info = &ps->selector->info;
+	unsigned num_colors = !!(info->colors_read & 0x0f) +
+			      !!(info->colors_read & 0xf0);
+	unsigned num_interp = ps->selector->info.num_inputs +
+			      (ps->key.ps.color_two_side ? num_colors : 0);
+
+	assert(num_interp <= 32);
+	return MIN2(num_interp, 32);
+}
+
 static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
 {
 	unsigned value = shader->key.ps.spi_shader_col_format;
@@ -507,7 +519,7 @@ static void si_shader_ps(struct si_shader *shader)
 	has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) ||
 		       G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena);
 
-	spi_ps_in_control = S_0286D8_NUM_INTERP(shader->nparam) |
+	spi_ps_in_control = S_0286D8_NUM_INTERP(si_get_ps_num_interp(shader)) |
 			    S_0286D8_BC_OPTIMIZE_DISABLE(has_centroid);
 
 	/* Set registers. */
@@ -1129,34 +1141,44 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	struct si_shader *ps = sctx->ps_shader.current;
 	struct si_shader *vs = si_get_vs_state(sctx);
-	struct tgsi_shader_info *psinfo;
-	unsigned i, num_written = 0;
+	struct tgsi_shader_info *psinfo = ps ? &ps->selector->info : NULL;
+	unsigned i, num_interp, num_written = 0, bcol_interp[2];
 
-	if (!ps || !ps->nparam)
+	if (!ps || !ps->selector->info.num_inputs)
 		return;
 
-	psinfo = &ps->selector->info;
-
-	radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps->nparam);
+	num_interp = si_get_ps_num_interp(ps);
+	assert(num_interp > 0);
+	radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, num_interp);
 
 	for (i = 0; i < psinfo->num_inputs; i++) {
 		unsigned name = psinfo->input_semantic_name[i];
 		unsigned index = psinfo->input_semantic_index[i];
 		unsigned interpolate = psinfo->input_interpolate[i];
-		unsigned param_offset = ps->ps_input_param_offset[i];
-bcolor:
+
 		radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, name, index,
 						     interpolate));
 		num_written++;
 
-		if (name == TGSI_SEMANTIC_COLOR &&
-		    ps->key.ps.color_two_side) {
-			name = TGSI_SEMANTIC_BCOLOR;
-			param_offset++;
-			goto bcolor;
+		if (name == TGSI_SEMANTIC_COLOR) {
+			assert(index < ARRAY_SIZE(bcol_interp));
+			bcol_interp[index] = interpolate;
 		}
 	}
-	assert(ps->nparam == num_written);
+
+	if (ps->key.ps.color_two_side) {
+		unsigned bcol = TGSI_SEMANTIC_BCOLOR;
+
+		for (i = 0; i < 2; i++) {
+			if (!(psinfo->colors_read & (0xf << (i * 4))))
+				continue;
+
+			radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, bcol,
+							     i, bcol_interp[i]));
+			num_written++;
+		}
+	}
+	assert(num_interp == num_written);
 }
 
 static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom)

From 4596f3c1b8bbcc83b841b6c7ea4a287a6f3210f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 2 Jan 2016 03:18:03 +0100
Subject: [PATCH 72/94] radeonsi: remove si_shader::ps_input_interpolate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

tgsi_shader_info has this too.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c | 8 +++-----
 src/gallium/drivers/radeonsi/si_shader.h | 1 -
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 79255004f28..c595f208274 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -914,7 +914,6 @@ static void declare_input_fs(
 
 	attr_number = lp_build_const_int32(gallivm, input_index);
 
-	shader->ps_input_interpolate[input_index] = decl->Interp.Interpolate;
 	interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
 						     decl->Interp.Location);
 	if (interp_param_idx == -1)
@@ -3257,17 +3256,17 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
 	LLVMValueRef interp_param;
 	const struct tgsi_full_instruction *inst = emit_data->inst;
 	const char *intr_name;
-	int input_index;
+	int input_index = inst->Src[0].Register.Index;
 	int chan;
 	int i;
 	LLVMValueRef attr_number;
 	LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
 	LLVMValueRef params = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_PRIM_MASK);
 	int interp_param_idx;
+	unsigned interp = shader->selector->info.input_interpolate[input_index];
 	unsigned location;
 
 	assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
-	input_index = inst->Src[0].Register.Index;
 
 	if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
 	    inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE)
@@ -3275,8 +3274,7 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
 	else
 		location = TGSI_INTERPOLATE_LOC_CENTROID;
 
-	interp_param_idx = lookup_interp_param_index(shader->ps_input_interpolate[input_index],
-						     location);
+	interp_param_idx = lookup_interp_param_index(interp, location);
 	if (interp_param_idx == -1)
 		return;
 	else if (interp_param_idx)
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 86d8f725cb6..d3609d46334 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -291,7 +291,6 @@ struct si_shader {
 	struct si_shader_config		config;
 
 	unsigned		vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS];
-	unsigned		ps_input_interpolate[PIPE_MAX_SHADER_INPUTS];
 	bool			uses_instanceid;
 	unsigned		nr_pos_exports;
 	unsigned		nr_param_exports;

From b9126dcda834ba9cf58af32e97f4b5d93c9817a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sun, 3 Jan 2016 19:00:29 +0100
Subject: [PATCH 73/94] radeonsi: implement forcing per-sample_interpolation
 using the shader key only
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It was partly a state and partly emulated by shader code, but since we want
to do this in a fragment shader prolog, we need to put it into the shader
key, which will be used to generate the prolog.

This also removes the spi_ps_input states and moves the registers
to the PS state.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_hw_context.c  |   1 -
 src/gallium/drivers/radeonsi/si_pipe.h        |   2 -
 src/gallium/drivers/radeonsi/si_shader.c      |  46 ++------
 src/gallium/drivers/radeonsi/si_shader.h      |  50 ++++----
 src/gallium/drivers/radeonsi/si_state.h       |   1 -
 .../drivers/radeonsi/si_state_shaders.c       | 107 ++++--------------
 6 files changed, 55 insertions(+), 152 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index d60c4515625..b5a4034cc12 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -182,7 +182,6 @@ void si_begin_new_cs(struct si_context *ctx)
 	si_mark_atom_dirty(ctx, &ctx->db_render_state);
 	si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
 	si_mark_atom_dirty(ctx, &ctx->spi_map);
-	si_mark_atom_dirty(ctx, &ctx->spi_ps_input);
 	si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
 	si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
 	si_all_descriptors_begin_new_cs(ctx);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 48947442757..3c963db5078 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -202,7 +202,6 @@ struct si_context {
 	struct si_viewports		viewports;
 	struct si_stencil_ref		stencil_ref;
 	struct r600_atom		spi_map;
-	struct r600_atom		spi_ps_input;
 
 	/* Precomputed states. */
 	struct si_pm4_state		*init_config;
@@ -222,7 +221,6 @@ struct si_context {
 	struct si_vertex_element	*vertex_elements;
 	unsigned			sprite_coord_enable;
 	bool				flatshade;
-	bool				force_persample_interp;
 
 	/* shader descriptors */
 	struct si_descriptors		vertex_buffers;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c595f208274..0a92a7b54e6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -833,14 +833,11 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location)
 }
 
 /* This shouldn't be used by explicit INTERP opcodes. */
-static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx,
-				     unsigned param)
+static unsigned select_interp_param(struct si_shader_context *si_shader_ctx,
+				    unsigned param)
 {
-	struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
-	unsigned sample_param = 0;
-	LLVMValueRef default_ij, sample_ij, force_sample;
-
-	default_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, param);
+	if (!si_shader_ctx->shader->key.ps.force_persample_interp)
+		return param;
 
 	/* If the shader doesn't use center/centroid, just return the parameter.
 	 *
@@ -850,36 +847,15 @@ static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx,
 	switch (param) {
 	case SI_PARAM_PERSP_CENTROID:
 	case SI_PARAM_PERSP_CENTER:
-		if (!si_shader_ctx->shader->selector->forces_persample_interp_for_persp)
-			return default_ij;
-
-		sample_param = SI_PARAM_PERSP_SAMPLE;
-		break;
+		return SI_PARAM_PERSP_SAMPLE;
 
 	case SI_PARAM_LINEAR_CENTROID:
 	case SI_PARAM_LINEAR_CENTER:
-		if (!si_shader_ctx->shader->selector->forces_persample_interp_for_linear)
-			return default_ij;
-
-		sample_param = SI_PARAM_LINEAR_SAMPLE;
-		break;
+		return SI_PARAM_LINEAR_SAMPLE;
 
 	default:
-		return default_ij;
+		return param;
 	}
-
-	/* Otherwise, we have to select (i,j) based on a user data SGPR. */
-	sample_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, sample_param);
-
-	/* TODO: this can be done more efficiently by switching between
-	 * 2 prologs.
-	 */
-	force_sample = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
-				    SI_PARAM_PS_STATE_BITS);
-	force_sample = LLVMBuildTrunc(gallivm->builder, force_sample,
-				      LLVMInt1TypeInContext(gallivm->context), "");
-	return LLVMBuildSelect(gallivm->builder, force_sample,
-			       sample_ij, default_ij, "");
 }
 
 static void declare_input_fs(
@@ -918,8 +894,11 @@ static void declare_input_fs(
 						     decl->Interp.Location);
 	if (interp_param_idx == -1)
 		return;
-	else if (interp_param_idx)
-		interp_param = get_interp_param(si_shader_ctx, interp_param_idx);
+	else if (interp_param_idx) {
+		interp_param_idx = select_interp_param(si_shader_ctx,
+						       interp_param_idx);
+		interp_param = LLVMGetParam(main_fn, interp_param_idx);
+	}
 
 	/* fs.constant returns the param from the middle vertex, so it's not
 	 * really useful for flat shading. It's meant to be used for custom
@@ -3633,7 +3612,6 @@ static void create_function(struct si_shader_context *si_shader_ctx)
 
 	case TGSI_PROCESSOR_FRAGMENT:
 		params[SI_PARAM_ALPHA_REF] = f32;
-		params[SI_PARAM_PS_STATE_BITS] = i32;
 		params[SI_PARAM_PRIM_MASK] = i32;
 		last_sgpr = SI_PARAM_PRIM_MASK;
 		params[SI_PARAM_PERSP_SAMPLE] = v2i32;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index d3609d46334..0d6a45a319a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -88,7 +88,6 @@ struct radeon_shader_reloc;
 #define SI_SGPR_TCS_OUT_LAYOUT	9  /* TCS & TES only */
 #define SI_SGPR_TCS_IN_LAYOUT	10 /* TCS only */
 #define SI_SGPR_ALPHA_REF	8  /* PS only */
-#define SI_SGPR_PS_STATE_BITS	9  /* PS only */
 
 #define SI_VS_NUM_USER_SGPR	13 /* API VS */
 #define SI_ES_NUM_USER_SGPR	12 /* API VS */
@@ -97,7 +96,7 @@ struct radeon_shader_reloc;
 #define SI_TES_NUM_USER_SGPR	10
 #define SI_GS_NUM_USER_SGPR	8
 #define SI_GSCOPY_NUM_USER_SGPR	4
-#define SI_PS_NUM_USER_SGPR	10
+#define SI_PS_NUM_USER_SGPR	9
 
 /* LLVM function parameter indices */
 #define SI_PARAM_RW_BUFFERS	0
@@ -152,27 +151,23 @@ struct radeon_shader_reloc;
 
 /* PS only parameters */
 #define SI_PARAM_ALPHA_REF		4
-/* Bits:
- * 0: force_persample_interp
- */
-#define SI_PARAM_PS_STATE_BITS		5
-#define SI_PARAM_PRIM_MASK		6
-#define SI_PARAM_PERSP_SAMPLE		7
-#define SI_PARAM_PERSP_CENTER		8
-#define SI_PARAM_PERSP_CENTROID		9
-#define SI_PARAM_PERSP_PULL_MODEL	10
-#define SI_PARAM_LINEAR_SAMPLE		11
-#define SI_PARAM_LINEAR_CENTER		12
-#define SI_PARAM_LINEAR_CENTROID	13
-#define SI_PARAM_LINE_STIPPLE_TEX	14
-#define SI_PARAM_POS_X_FLOAT		15
-#define SI_PARAM_POS_Y_FLOAT		16
-#define SI_PARAM_POS_Z_FLOAT		17
-#define SI_PARAM_POS_W_FLOAT		18
-#define SI_PARAM_FRONT_FACE		19
-#define SI_PARAM_ANCILLARY		20
-#define SI_PARAM_SAMPLE_COVERAGE	21
-#define SI_PARAM_POS_FIXED_PT		22
+#define SI_PARAM_PRIM_MASK		5
+#define SI_PARAM_PERSP_SAMPLE		6
+#define SI_PARAM_PERSP_CENTER		7
+#define SI_PARAM_PERSP_CENTROID		8
+#define SI_PARAM_PERSP_PULL_MODEL	9
+#define SI_PARAM_LINEAR_SAMPLE		10
+#define SI_PARAM_LINEAR_CENTER		11
+#define SI_PARAM_LINEAR_CENTROID	12
+#define SI_PARAM_LINE_STIPPLE_TEX	13
+#define SI_PARAM_POS_X_FLOAT		14
+#define SI_PARAM_POS_Y_FLOAT		15
+#define SI_PARAM_POS_Z_FLOAT		16
+#define SI_PARAM_POS_W_FLOAT		17
+#define SI_PARAM_FRONT_FACE		18
+#define SI_PARAM_ANCILLARY		19
+#define SI_PARAM_SAMPLE_COVERAGE	20
+#define SI_PARAM_POS_FIXED_PT		21
 
 #define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1)
 
@@ -193,14 +188,6 @@ struct si_shader_selector {
 	/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
 	unsigned	type;
 
-	/* Whether the shader has to use a conditional assignment to
-	 * choose between weights when emulating
-	 * pipe_rasterizer_state::force_persample_interp.
-	 * If false, "si_emit_spi_ps_input" will take care of it instead.
-	 */
-	bool		forces_persample_interp_for_persp;
-	bool		forces_persample_interp_for_linear;
-
 	/* GS parameters. */
 	unsigned	esgs_itemsize;
 	unsigned	gs_input_verts_per_prim;
@@ -245,6 +232,7 @@ union si_shader_key {
 		unsigned	poly_stipple:1;
 		unsigned	poly_line_smoothing:1;
 		unsigned	clamp_color:1;
+		unsigned	force_persample_interp:1;
 	} ps;
 	struct {
 		unsigned	instance_divisors[SI_NUM_VERTEX_BUFFERS];
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 507f45938ce..e9a017534d1 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -133,7 +133,6 @@ union si_state_atoms {
 		struct r600_atom *viewports;
 		struct r600_atom *stencil_ref;
 		struct r600_atom *spi_map;
-		struct r600_atom *spi_ps_input;
 	} s;
 	struct r600_atom *array[0];
 };
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 6e7311807dd..59511c67ed0 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -472,6 +472,17 @@ static void si_shader_ps(struct si_shader *shader)
 	unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
 	uint64_t va;
 	bool has_centroid;
+	unsigned input_ena = shader->config.spi_ps_input_ena;
+
+	/* we need to enable at least one of them, otherwise we hang the GPU */
+	assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
+	       G_0286CC_PERSP_CENTER_ENA(input_ena) ||
+	       G_0286CC_PERSP_CENTROID_ENA(input_ena) ||
+	       G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) ||
+	       G_0286CC_LINEAR_SAMPLE_ENA(input_ena) ||
+	       G_0286CC_LINEAR_CENTER_ENA(input_ena) ||
+	       G_0286CC_LINEAR_CENTROID_ENA(input_ena) ||
+	       G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena));
 
 	pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
 
@@ -515,6 +526,9 @@ static void si_shader_ps(struct si_shader *shader)
 	     shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS))
 		spi_shader_col_format = V_028714_SPI_SHADER_32_R;
 
+	si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena);
+	si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, input_ena);
+
 	/* Set interpolation controls. */
 	has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) ||
 		       G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena);
@@ -706,6 +720,15 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 						       (is_line && rs->line_smooth)) &&
 						      sctx->framebuffer.nr_samples <= 1;
 			key->ps.clamp_color = rs->clamp_fragment_color;
+
+			key->ps.force_persample_interp = rs->force_persample_interp &&
+							 rs->multisample_enable &&
+							 sctx->framebuffer.nr_samples > 1 &&
+							 sctx->ps_iter_samples > 1 &&
+							 (sel->info.uses_persp_center ||
+							  sel->info.uses_persp_centroid ||
+							  sel->info.uses_linear_center ||
+							  sel->info.uses_linear_centroid);
 		}
 
 		key->ps.alpha_func = si_get_alpha_test_func(sctx);
@@ -808,7 +831,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 	sel->type = util_pipe_shader_from_tgsi_processor(sel->info.processor);
 	p_atomic_inc(&sscreen->b.num_shaders_created);
 
-	/* First set which opcode uses which (i,j) pair. */
+	/* Set which opcode uses which (i,j) pair. */
 	if (sel->info.uses_persp_opcode_interp_centroid)
 		sel->info.uses_persp_centroid = true;
 
@@ -823,19 +846,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 	    sel->info.uses_linear_opcode_interp_sample)
 		sel->info.uses_linear_center = true;
 
-	/* Determine if the shader has to use a conditional assignment when
-	 * emulating force_persample_interp.
-	 */
-	sel->forces_persample_interp_for_persp =
-		sel->info.uses_persp_center +
-		sel->info.uses_persp_centroid +
-		sel->info.uses_persp_sample >= 2;
-
-	sel->forces_persample_interp_for_linear =
-		sel->info.uses_linear_center +
-		sel->info.uses_linear_centroid +
-		sel->info.uses_linear_sample >= 2;
-
 	switch (sel->type) {
 	case PIPE_SHADER_GEOMETRY:
 		sel->gs_output_prim =
@@ -1181,68 +1191,6 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
 	assert(num_interp == num_written);
 }
 
-static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom)
-{
-	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
-	struct si_shader *ps = sctx->ps_shader.current;
-	unsigned input_ena;
-
-	if (!ps)
-		return;
-
-	input_ena = ps->config.spi_ps_input_ena;
-
-	/* we need to enable at least one of them, otherwise we hang the GPU */
-	assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
-	    G_0286CC_PERSP_CENTER_ENA(input_ena) ||
-	    G_0286CC_PERSP_CENTROID_ENA(input_ena) ||
-	    G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) ||
-	    G_0286CC_LINEAR_SAMPLE_ENA(input_ena) ||
-	    G_0286CC_LINEAR_CENTER_ENA(input_ena) ||
-	    G_0286CC_LINEAR_CENTROID_ENA(input_ena) ||
-	    G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena));
-
-	if (sctx->force_persample_interp) {
-		unsigned num_persp = G_0286CC_PERSP_SAMPLE_ENA(input_ena) +
-				     G_0286CC_PERSP_CENTER_ENA(input_ena) +
-				     G_0286CC_PERSP_CENTROID_ENA(input_ena);
-		unsigned num_linear = G_0286CC_LINEAR_SAMPLE_ENA(input_ena) +
-				      G_0286CC_LINEAR_CENTER_ENA(input_ena) +
-				      G_0286CC_LINEAR_CENTROID_ENA(input_ena);
-
-		/* If only one set of (i,j) coordinates is used, we can disable
-		 * CENTER/CENTROID, enable SAMPLE and it will load SAMPLE coordinates
-		 * where CENTER/CENTROID are expected, effectively forcing per-sample
-		 * interpolation.
-		 */
-		if (num_persp == 1) {
-			input_ena &= C_0286CC_PERSP_CENTER_ENA;
-			input_ena &= C_0286CC_PERSP_CENTROID_ENA;
-			input_ena |= G_0286CC_PERSP_SAMPLE_ENA(1);
-		}
-		if (num_linear == 1) {
-			input_ena &= C_0286CC_LINEAR_CENTER_ENA;
-			input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
-			input_ena |= G_0286CC_LINEAR_SAMPLE_ENA(1);
-		}
-
-		/* If at least 2 sets of coordinates are used, we can't use this
-		 * trick and have to select SAMPLE using a conditional assignment
-		 * in the shader with "force_persample_interp" being a shader constant.
-		 */
-	}
-
-	radeon_set_context_reg_seq(cs, R_0286CC_SPI_PS_INPUT_ENA, 2);
-	radeon_emit(cs, input_ena);
-	radeon_emit(cs, input_ena);
-
-	if (ps->selector->forces_persample_interp_for_persp ||
-	    ps->selector->forces_persample_interp_for_linear)
-		radeon_set_sh_reg(cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
-				      SI_SGPR_PS_STATE_BITS * 4,
-				  sctx->force_persample_interp);
-}
-
 /**
  * Writing CONFIG or UCONFIG VGT registers requires VGT_FLUSH before that.
  */
@@ -1774,12 +1722,6 @@ bool si_update_shaders(struct si_context *sctx)
 			si_mark_atom_dirty(sctx, &sctx->spi_map);
 		}
 
-		if (si_pm4_state_changed(sctx, ps) ||
-		    sctx->force_persample_interp != rs->force_persample_interp) {
-			sctx->force_persample_interp = rs->force_persample_interp;
-			si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
-		}
-
 		if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps))
 			si_mark_atom_dirty(sctx, &sctx->cb_render_state);
 
@@ -1812,7 +1754,6 @@ bool si_update_shaders(struct si_context *sctx)
 void si_init_shader_functions(struct si_context *sctx)
 {
 	si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map);
-	si_init_atom(sctx, &sctx->spi_ps_input, &sctx->atoms.s.spi_ps_input, si_emit_spi_ps_input);
 
 	sctx->b.b.create_vs_state = si_create_shader_selector;
 	sctx->b.b.create_tcs_state = si_create_shader_selector;

From c379c2540b7343b02a4c1b4d3cad3c194729d617 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 2 Jan 2016 00:41:43 +0100
Subject: [PATCH 74/94] radeonsi: split PS input interpolation code into its
 own function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This will be used by the fragment shader prolog.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c | 127 +++++++++++++----------
 1 file changed, 71 insertions(+), 56 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 0a92a7b54e6..d9006bc3d6a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -858,48 +858,42 @@ static unsigned select_interp_param(struct si_shader_context *si_shader_ctx,
 	}
 }
 
-static void declare_input_fs(
-	struct radeon_llvm_context *radeon_bld,
-	unsigned input_index,
-	const struct tgsi_full_declaration *decl)
+/**
+ * Interpolate a fragment shader input.
+ *
+ * @param si_shader_ctx		context
+ * @param input_index		index of the input in hardware
+ * @param semantic_name		TGSI_SEMANTIC_*
+ * @param semantic_index	semantic index
+ * @param num_interp_inputs	number of all interpolated inputs (= BCOLOR offset)
+ * @param colors_read_mask	color components read (4 bits for each color, 8 bits in total)
+ * @param interp_param		interpolation weights (i,j)
+ * @param prim_mask		SI_PARAM_PRIM_MASK
+ * @param face			SI_PARAM_FRONT_FACE
+ * @param result		the return value (4 components)
+ */
+static void interp_fs_input(struct si_shader_context *si_shader_ctx,
+			    unsigned input_index,
+			    unsigned semantic_name,
+			    unsigned semantic_index,
+			    unsigned num_interp_inputs,
+			    unsigned colors_read_mask,
+			    LLVMValueRef interp_param,
+			    LLVMValueRef prim_mask,
+			    LLVMValueRef face,
+			    LLVMValueRef result[4])
 {
-	struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
-	struct si_shader_context *si_shader_ctx =
-		si_shader_context(&radeon_bld->soa.bld_base);
-	struct si_shader *shader = si_shader_ctx->shader;
-	struct lp_build_context *uint =	&radeon_bld->soa.bld_base.uint_bld;
+	struct lp_build_context *base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
+	struct lp_build_context *uint =	&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
 	struct gallivm_state *gallivm = base->gallivm;
 	LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
-	LLVMValueRef main_fn = radeon_bld->main_fn;
-
-	LLVMValueRef interp_param = NULL;
-	int interp_param_idx;
 	const char * intr_name;
-
-	/* This value is:
-	 * [15:0] NewPrimMask (Bit mask for each quad.  It is set it the
-	 *                     quad begins a new primitive.  Bit 0 always needs
-	 *                     to be unset)
-	 * [32:16] ParamOffset
-	 *
-	 */
-	LLVMValueRef params = LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK);
 	LLVMValueRef attr_number;
 
 	unsigned chan;
 
 	attr_number = lp_build_const_int32(gallivm, input_index);
 
-	interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
-						     decl->Interp.Location);
-	if (interp_param_idx == -1)
-		return;
-	else if (interp_param_idx) {
-		interp_param_idx = select_interp_param(si_shader_ctx,
-						       interp_param_idx);
-		interp_param = LLVMGetParam(main_fn, interp_param_idx);
-	}
-
 	/* fs.constant returns the param from the middle vertex, so it's not
 	 * really useful for flat shading. It's meant to be used for custom
 	 * interpolation (but the intrinsic can't fetch from the other two
@@ -912,32 +906,28 @@ static void declare_input_fs(
 	 */
 	intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
 
-	if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
+	if (semantic_name == TGSI_SEMANTIC_COLOR &&
 	    si_shader_ctx->shader->key.ps.color_two_side) {
-		struct tgsi_shader_info *info = &shader->selector->info;
 		LLVMValueRef args[4];
-		LLVMValueRef face, is_face_positive;
+		LLVMValueRef is_face_positive;
 		LLVMValueRef back_attr_number;
 
 		/* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1",
 		 * otherwise it's at offset "num_inputs".
 		 */
-		unsigned back_attr_offset = shader->selector->info.num_inputs;
-		if (decl->Semantic.Index == 1 && info->colors_read & 0xf)
+		unsigned back_attr_offset = num_interp_inputs;
+		if (semantic_index == 1 && colors_read_mask & 0xf)
 			back_attr_offset += 1;
 
 		back_attr_number = lp_build_const_int32(gallivm, back_attr_offset);
 
-		face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE);
-
 		is_face_positive = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
 						 face, uint->zero, "");
 
-		args[2] = params;
+		args[2] = prim_mask;
 		args[3] = interp_param;
 		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 			LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
-			unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
 			LLVMValueRef front, back;
 
 			args[0] = llvm_chan;
@@ -951,46 +941,71 @@ static void declare_input_fs(
 					       input_type, args, args[3] ? 4 : 3,
 					       LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 
-			radeon_bld->inputs[soa_index] =
-				LLVMBuildSelect(gallivm->builder,
+			result[chan] = LLVMBuildSelect(gallivm->builder,
 						is_face_positive,
 						front,
 						back,
 						"");
 		}
-	} else if (decl->Semantic.Name == TGSI_SEMANTIC_FOG) {
+	} else if (semantic_name == TGSI_SEMANTIC_FOG) {
 		LLVMValueRef args[4];
 
 		args[0] = uint->zero;
 		args[1] = attr_number;
-		args[2] = params;
+		args[2] = prim_mask;
 		args[3] = interp_param;
-		radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
-			lp_build_intrinsic(gallivm->builder, intr_name,
+		result[0] = lp_build_intrinsic(gallivm->builder, intr_name,
 					input_type, args, args[3] ? 4 : 3,
 					LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
-		radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
-		radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
-			lp_build_const_float(gallivm, 0.0f);
-		radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
-			lp_build_const_float(gallivm, 1.0f);
+		result[1] =
+		result[2] = lp_build_const_float(gallivm, 0.0f);
+		result[3] = lp_build_const_float(gallivm, 1.0f);
 	} else {
 		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 			LLVMValueRef args[4];
 			LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
-			unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
+
 			args[0] = llvm_chan;
 			args[1] = attr_number;
-			args[2] = params;
+			args[2] = prim_mask;
 			args[3] = interp_param;
-			radeon_bld->inputs[soa_index] =
-				lp_build_intrinsic(gallivm->builder, intr_name,
+			result[chan] = lp_build_intrinsic(gallivm->builder, intr_name,
 						input_type, args, args[3] ? 4 : 3,
 						LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 		}
 	}
 }
 
+static void declare_input_fs(
+	struct radeon_llvm_context *radeon_bld,
+	unsigned input_index,
+	const struct tgsi_full_declaration *decl)
+{
+	struct si_shader_context *si_shader_ctx =
+		si_shader_context(&radeon_bld->soa.bld_base);
+	struct si_shader *shader = si_shader_ctx->shader;
+	LLVMValueRef main_fn = radeon_bld->main_fn;
+	LLVMValueRef interp_param = NULL;
+	int interp_param_idx;
+
+	interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
+						     decl->Interp.Location);
+	if (interp_param_idx == -1)
+		return;
+	else if (interp_param_idx) {
+		interp_param_idx = select_interp_param(si_shader_ctx,
+						       interp_param_idx);
+		interp_param = LLVMGetParam(main_fn, interp_param_idx);
+	}
+
+	interp_fs_input(si_shader_ctx, input_index, decl->Semantic.Name,
+			decl->Semantic.Index, shader->selector->info.num_inputs,
+			shader->selector->info.colors_read, interp_param,
+			LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK),
+			LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE),
+			&radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)]);
+}
+
 static LLVMValueRef get_sample_id(struct radeon_llvm_context *radeon_bld)
 {
 	return unpack_param(si_shader_context(&radeon_bld->soa.bld_base),

From 9483fcc7f24d7e144530084bc38e5c325013a130 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 9 Jan 2016 14:33:38 +0100
Subject: [PATCH 75/94] radeonsi: don't force gl_SampleMaskIn to 1 for
 smoothing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index d9006bc3d6a..68ce3871b7e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1057,7 +1057,6 @@ static void declare_system_value(
 	struct si_shader_context *si_shader_ctx =
 		si_shader_context(&radeon_bld->soa.bld_base);
 	struct lp_build_context *bld = &radeon_bld->soa.bld_base.base;
-	struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
 	struct gallivm_state *gallivm = &radeon_bld->gallivm;
 	LLVMValueRef value = 0;
 
@@ -1133,12 +1132,10 @@ static void declare_system_value(
 	}
 
 	case TGSI_SEMANTIC_SAMPLEMASK:
-		/* Smoothing isn't MSAA in GL, but it's MSAA in hardware.
-		 * Therefore, force gl_SampleMaskIn to 1 for GL. */
-		if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
-			value = uint_bld->one;
-		else
-			value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
+		/* This can only occur with the OpenGL Core profile, which
+		 * doesn't support smoothing.
+		 */
+		value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
 		break;
 
 	case TGSI_SEMANTIC_TESSCOORD:

From 5a53628f45787370636b3b0a0c7d29cb80e1ada7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Wed, 6 Jan 2016 16:03:38 +0100
Subject: [PATCH 76/94] radeonsi: read SPI_PS_INPUT_ADDR from LLVM if it
 returns it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c        | 5 ++++-
 src/gallium/drivers/radeonsi/si_shader.h        | 1 +
 src/gallium/drivers/radeonsi/si_state_shaders.c | 3 ++-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 68ce3871b7e..85203e046fc 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3875,7 +3875,7 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary,
 			conf->spi_ps_input_ena = value;
 			break;
 		case R_0286D0_SPI_PS_INPUT_ADDR:
-			/* Not used yet, but will be in the future */
+			conf->spi_ps_input_addr = value;
 			break;
 		case R_0286E8_SPI_TMPRING_SIZE:
 		case R_00B860_COMPUTE_TMPRING_SIZE:
@@ -3895,6 +3895,9 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary,
 			}
 			break;
 		}
+
+		if (!conf->spi_ps_input_addr)
+			conf->spi_ps_input_addr = conf->spi_ps_input_ena;
 	}
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 0d6a45a319a..04b977af943 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -260,6 +260,7 @@ struct si_shader_config {
 	unsigned			num_vgprs;
 	unsigned			lds_size;
 	unsigned			spi_ps_input_ena;
+	unsigned			spi_ps_input_addr;
 	unsigned			float_mode;
 	unsigned			scratch_bytes_per_wave;
 	unsigned			rsrc1;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 59511c67ed0..6e656b07ca1 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -527,7 +527,8 @@ static void si_shader_ps(struct si_shader *shader)
 		spi_shader_col_format = V_028714_SPI_SHADER_32_R;
 
 	si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena);
-	si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, input_ena);
+	si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR,
+		       shader->config.spi_ps_input_addr);
 
 	/* Set interpolation controls. */
 	has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) ||

From 57271d5364bb84fd5c6b6a6baaf8d81bae8c53c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Tue, 26 Jan 2016 22:16:55 +0100
Subject: [PATCH 77/94] radeonsi: dump SPI_PS_INPUT values along with shader
 stats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 85203e046fc..bd45d4ae8db 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4039,6 +4039,13 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
 		max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
 
 	if (r600_can_dump_shader(&sscreen->b, processor)) {
+		if (processor == TGSI_PROCESSOR_FRAGMENT) {
+			fprintf(stderr, "*** SHADER CONFIG ***\n"
+				"SPI_PS_INPUT_ADDR = 0x%04x\n"
+				"SPI_PS_INPUT_ENA  = 0x%04x\n",
+				conf->spi_ps_input_addr, conf->spi_ps_input_ena);
+		}
+
 		fprintf(stderr, "*** SHADER STATS ***\n"
 			"SGPRS: %d\n"
 			"VGPRS: %d\n"

From b6d5666fbf2a4196462db7ea82918feae883daae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Tue, 26 Jan 2016 17:27:54 +0100
Subject: [PATCH 78/94] radeonsi: remove useless code that handles
 dx10_clamp_mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

"enable-no-nans-fp-math" is a wrong string and there was a disagreement
about fixing it.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c        |  7 -------
 src/gallium/drivers/radeonsi/si_shader.h        |  1 -
 src/gallium/drivers/radeonsi/si_state_shaders.c | 12 ++++++------
 3 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index bd45d4ae8db..c92f07cff63 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3654,10 +3654,6 @@ static void create_function(struct si_shader_context *si_shader_ctx)
 	radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, num_params);
 	radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type);
 
-	if (shader->dx10_clamp_mode)
-		LLVMAddTargetDependentFunctionAttr(si_shader_ctx->radeon_bld.main_fn,
-						   "enable-no-nans-fp-math", "true");
-
 	for (i = 0; i <= last_sgpr; ++i) {
 		LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i);
 
@@ -4341,9 +4337,6 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 	si_init_shader_ctx(&si_shader_ctx, sscreen, shader, tm,
 			   poly_stipple ? &stipple_shader_info : &sel->info);
 
-	if (sel->type != PIPE_SHADER_COMPUTE)
-		shader->dx10_clamp_mode = true;
-
 	shader->uses_instanceid = sel->info.uses_instanceid;
 
 	bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 04b977af943..e3d1f4f39a1 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -283,7 +283,6 @@ struct si_shader {
 	bool			uses_instanceid;
 	unsigned		nr_pos_exports;
 	unsigned		nr_param_exports;
-	bool			dx10_clamp_mode; /* convert NaNs to 0 */
 };
 
 static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 6e656b07ca1..f48d7ca360f 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -124,7 +124,7 @@ static void si_shader_ls(struct si_shader *shader)
 	shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
 			   S_00B528_SGPRS((num_sgprs - 1) / 8) |
 		           S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
-			   S_00B528_DX10_CLAMP(shader->dx10_clamp_mode);
+			   S_00B528_DX10_CLAMP(1);
 	shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
 			   S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
 }
@@ -157,7 +157,7 @@ static void si_shader_hs(struct si_shader *shader)
 	si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
 		       S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
 		       S_00B428_SGPRS((num_sgprs - 1) / 8) |
-		       S_00B428_DX10_CLAMP(shader->dx10_clamp_mode));
+		       S_00B428_DX10_CLAMP(1));
 	si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
 		       S_00B42C_USER_SGPR(num_user_sgprs) |
 		       S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
@@ -203,7 +203,7 @@ static void si_shader_es(struct si_shader *shader)
 		       S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
 		       S_00B328_SGPRS((num_sgprs - 1) / 8) |
 		       S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
-		       S_00B328_DX10_CLAMP(shader->dx10_clamp_mode));
+		       S_00B328_DX10_CLAMP(1));
 	si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
 		       S_00B32C_USER_SGPR(num_user_sgprs) |
 		       S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
@@ -292,7 +292,7 @@ static void si_shader_gs(struct si_shader *shader)
 	si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
 		       S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
 		       S_00B228_SGPRS((num_sgprs - 1) / 8) |
-		       S_00B228_DX10_CLAMP(shader->dx10_clamp_mode));
+		       S_00B228_DX10_CLAMP(1));
 	si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
 		       S_00B22C_USER_SGPR(num_user_sgprs) |
 		       S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
@@ -381,7 +381,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
 		       S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
 		       S_00B128_SGPRS((num_sgprs - 1) / 8) |
 		       S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
-		       S_00B128_DX10_CLAMP(shader->dx10_clamp_mode));
+		       S_00B128_DX10_CLAMP(1));
 	si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
 		       S_00B12C_USER_SGPR(num_user_sgprs) |
 		       S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
@@ -567,7 +567,7 @@ static void si_shader_ps(struct si_shader *shader)
 	si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
 		       S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
 		       S_00B028_SGPRS((num_sgprs - 1) / 8) |
-		       S_00B028_DX10_CLAMP(shader->dx10_clamp_mode));
+		       S_00B028_DX10_CLAMP(1));
 	si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
 		       S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
 		       S_00B02C_USER_SGPR(num_user_sgprs) |

From c1041366db7a8af64db5d426f48e253796b77e84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Tue, 26 Jan 2016 22:39:24 +0100
Subject: [PATCH 79/94] radeonsi: move a few r600_can_dump_shader calls to
 where they're needed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c92f07cff63..7f151516b8b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4113,7 +4113,7 @@ int si_compile_llvm(struct si_screen *sscreen,
 /* Generate code for the hardware VS shader stage to go with a geometry shader */
 static int si_generate_gs_copy_shader(struct si_screen *sscreen,
 				      struct si_shader_context *si_shader_ctx,
-				      struct si_shader *gs, bool dump,
+				      struct si_shader *gs,
 				      struct pipe_debug_callback *debug)
 {
 	struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
@@ -4183,7 +4183,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
 
 	radeon_llvm_finalize_module(&si_shader_ctx->radeon_bld);
 
-	if (dump)
+	if (r600_can_dump_shader(&sscreen->b, TGSI_PROCESSOR_GEOMETRY))
 		fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n");
 
 	r = si_compile_llvm(sscreen, &si_shader_ctx->shader->binary,
@@ -4317,7 +4317,6 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 	int r = 0;
 	bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
 			    shader->key.ps.poly_stipple;
-	bool dump = r600_can_dump_shader(&sscreen->b, sel->info.processor);
 
 	if (poly_stipple) {
 		tokens = util_pstipple_create_fragment_shader(tokens, NULL,
@@ -4328,7 +4327,8 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 
 	/* Dump TGSI code before doing TGSI->LLVM conversion in case the
 	 * conversion fails. */
-	if (dump && !(sscreen->b.debug_flags & DBG_NO_TGSI)) {
+	if (r600_can_dump_shader(&sscreen->b, sel->info.processor) &&
+	    !(sscreen->b.debug_flags & DBG_NO_TGSI)) {
 		si_dump_shader_key(sel->type, &shader->key, stderr);
 		tgsi_dump(tokens, 0);
 		si_dump_streamout(&sel->so);
@@ -4431,7 +4431,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 		shader->gs_copy_shader->selector = shader->selector;
 		si_shader_ctx.shader = shader->gs_copy_shader;
 		if ((r = si_generate_gs_copy_shader(sscreen, &si_shader_ctx,
-						    shader, dump, debug))) {
+						    shader, debug))) {
 			free(shader->gs_copy_shader);
 			shader->gs_copy_shader = NULL;
 			goto out;

From dc5fc3c2f60b4c208369e0eddbf416af059d88c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Tue, 26 Jan 2016 23:32:23 +0100
Subject: [PATCH 80/94] radeonsi: make LLVM IR dumping less messy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_compute.c |  3 ++-
 src/gallium/drivers/radeonsi/si_shader.c  | 18 +++++++++++-------
 src/gallium/drivers/radeonsi/si_shader.h  |  3 ++-
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 825fbb181ba..4d27e86b414 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -124,7 +124,8 @@ static void *si_create_compute_state(
                                                         code, header->num_bytes);
 			si_compile_llvm(sctx->screen, &program->kernels[i].binary,
 					&program->kernels[i].config, sctx->tm,
-					mod, &sctx->b.debug, TGSI_PROCESSOR_COMPUTE);
+					mod, &sctx->b.debug, TGSI_PROCESSOR_COMPUTE,
+					"Compute Shader");
 			si_shader_dump(sctx->screen, &program->kernels[i],
 				       &sctx->b.debug, TGSI_PROCESSOR_COMPUTE);
 			si_shader_binary_upload(sctx->screen, &program->kernels[i]);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 7f151516b8b..4551831dd2a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4081,7 +4081,8 @@ int si_compile_llvm(struct si_screen *sscreen,
 		    LLVMTargetMachineRef tm,
 		    LLVMModuleRef mod,
 		    struct pipe_debug_callback *debug,
-		    unsigned processor)
+		    unsigned processor,
+		    const char *name)
 {
 	int r = 0;
 	unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations);
@@ -4089,8 +4090,11 @@ int si_compile_llvm(struct si_screen *sscreen,
 	if (r600_can_dump_shader(&sscreen->b, processor)) {
 		fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
 
-		if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR)))
+		if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR))) {
+			fprintf(stderr, "%s LLVM IR:\n\n", name);
 			LLVMDumpModule(mod);
+			fprintf(stderr, "\n");
+		}
 	}
 
 	if (!si_replace_shader(count, binary)) {
@@ -4183,14 +4187,14 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
 
 	radeon_llvm_finalize_module(&si_shader_ctx->radeon_bld);
 
-	if (r600_can_dump_shader(&sscreen->b, TGSI_PROCESSOR_GEOMETRY))
-		fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n");
-
 	r = si_compile_llvm(sscreen, &si_shader_ctx->shader->binary,
 			    &si_shader_ctx->shader->config, si_shader_ctx->tm,
 			    bld_base->base.gallivm->module,
-			    debug, TGSI_PROCESSOR_GEOMETRY);
+			    debug, TGSI_PROCESSOR_GEOMETRY,
+			    "GS Copy Shader");
 	if (!r) {
+		if (r600_can_dump_shader(&sscreen->b, TGSI_PROCESSOR_GEOMETRY))
+			fprintf(stderr, "GS Copy Shader:\n");
 		si_shader_dump(sscreen, si_shader_ctx->shader, debug,
 			       TGSI_PROCESSOR_GEOMETRY);
 		r = si_shader_binary_upload(sscreen, si_shader_ctx->shader);
@@ -4410,7 +4414,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 	radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
 
 	r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
-			    mod, debug, si_shader_ctx.type);
+			    mod, debug, si_shader_ctx.type, "TGSI shader");
 	if (r) {
 		fprintf(stderr, "LLVM failed to compile shader\n");
 		goto out;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index e3d1f4f39a1..c42c51e0455 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -328,7 +328,8 @@ int si_compile_llvm(struct si_screen *sscreen,
 		    LLVMTargetMachineRef tm,
 		    LLVMModuleRef mod,
 		    struct pipe_debug_callback *debug,
-		    unsigned processor);
+		    unsigned processor,
+		    const char *name);
 void si_shader_destroy(struct si_shader *shader);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);

From e21142087c43627a8b4bdf5aefac8efb58bb5aad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Tue, 26 Jan 2016 17:07:29 +0100
Subject: [PATCH 81/94] radeonsi: move code writing tess factors into a
 separate function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c | 30 +++++++++++++++++-------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 4551831dd2a..15a6a049050 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1959,21 +1959,20 @@ handle_semantic:
 	}
 }
 
-/* This only writes the tessellation factor levels. */
-static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
+static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
+				  LLVMValueRef rel_patch_id,
+				  LLVMValueRef invocation_id,
+				  LLVMValueRef tcs_out_current_patch_data_offset)
 {
 	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
 	struct si_shader *shader = si_shader_ctx->shader;
 	unsigned tess_inner_index, tess_outer_index;
-	LLVMValueRef lds_base, lds_inner, lds_outer;
-	LLVMValueRef tf_base, rel_patch_id, byteoffset, buffer, rw_buffers;
-	LLVMValueRef out[6], vec0, vec1, invocation_id;
+	LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
+	LLVMValueRef out[6], vec0, vec1, rw_buffers, tf_base;
 	unsigned stride, outer_comps, inner_comps, i;
 	struct lp_build_if_state if_ctx;
 
-	invocation_id = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
-
 	/* Do this only for invocation 0, because the tess levels are per-patch,
 	 * not per-vertex.
 	 *
@@ -2012,7 +2011,7 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
 	tess_inner_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSINNER, 0);
 	tess_outer_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSOUTER, 0);
 
-	lds_base = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+	lds_base = tcs_out_current_patch_data_offset;
 	lds_inner = LLVMBuildAdd(gallivm->builder, lds_base,
 				 lp_build_const_int32(gallivm,
 						      tess_inner_index * 4), "");
@@ -2041,7 +2040,6 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
 	/* Get the offset. */
 	tf_base = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
 			       SI_PARAM_TESS_FACTOR_OFFSET);
-	rel_patch_id = get_rel_patch_id(si_shader_ctx);
 	byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id,
 				  lp_build_const_int32(gallivm, 4 * stride), "");
 
@@ -2054,6 +2052,20 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
 	lp_build_endif(&if_ctx);
 }
 
+/* This only writes the tessellation factor levels. */
+static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
+{
+	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+	LLVMValueRef invocation_id;
+
+	invocation_id = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
+
+	si_write_tess_factors(bld_base,
+			      get_rel_patch_id(si_shader_ctx),
+			      invocation_id,
+			      get_tcs_out_current_patch_data_offset(si_shader_ctx));
+}
+
 static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context * bld_base)
 {
 	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);

From f7a8b6fff5ae23546ed92aad4ad67470355ed919 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Thu, 28 Jan 2016 01:29:25 +0100
Subject: [PATCH 82/94] radeonsi: split out code for deleting si_shader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 .../drivers/radeonsi/si_state_shaders.c       | 65 ++++++++++---------
 1 file changed, 36 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index f48d7ca360f..27a981ee0dc 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1053,6 +1053,41 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
 }
 
+static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
+{
+	if (shader->pm4) {
+		switch (shader->selector->type) {
+		case PIPE_SHADER_VERTEX:
+			if (shader->key.vs.as_ls)
+				si_pm4_delete_state(sctx, ls, shader->pm4);
+			else if (shader->key.vs.as_es)
+				si_pm4_delete_state(sctx, es, shader->pm4);
+			else
+				si_pm4_delete_state(sctx, vs, shader->pm4);
+			break;
+		case PIPE_SHADER_TESS_CTRL:
+			si_pm4_delete_state(sctx, hs, shader->pm4);
+			break;
+		case PIPE_SHADER_TESS_EVAL:
+			if (shader->key.tes.as_es)
+				si_pm4_delete_state(sctx, es, shader->pm4);
+			else
+				si_pm4_delete_state(sctx, vs, shader->pm4);
+			break;
+		case PIPE_SHADER_GEOMETRY:
+			si_pm4_delete_state(sctx, gs, shader->pm4);
+			si_pm4_delete_state(sctx, vs, shader->gs_copy_shader->pm4);
+			break;
+		case PIPE_SHADER_FRAGMENT:
+			si_pm4_delete_state(sctx, ps, shader->pm4);
+			break;
+		}
+	}
+
+	si_shader_destroy(shader);
+	free(shader);
+}
+
 static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
@@ -1073,35 +1108,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
 
 	while (p) {
 		c = p->next_variant;
-		switch (sel->type) {
-		case PIPE_SHADER_VERTEX:
-			if (p->key.vs.as_ls)
-				si_pm4_delete_state(sctx, ls, p->pm4);
-			else if (p->key.vs.as_es)
-				si_pm4_delete_state(sctx, es, p->pm4);
-			else
-				si_pm4_delete_state(sctx, vs, p->pm4);
-			break;
-		case PIPE_SHADER_TESS_CTRL:
-			si_pm4_delete_state(sctx, hs, p->pm4);
-			break;
-		case PIPE_SHADER_TESS_EVAL:
-			if (p->key.tes.as_es)
-				si_pm4_delete_state(sctx, es, p->pm4);
-			else
-				si_pm4_delete_state(sctx, vs, p->pm4);
-			break;
-		case PIPE_SHADER_GEOMETRY:
-			si_pm4_delete_state(sctx, gs, p->pm4);
-			si_pm4_delete_state(sctx, vs, p->gs_copy_shader->pm4);
-			break;
-		case PIPE_SHADER_FRAGMENT:
-			si_pm4_delete_state(sctx, ps, p->pm4);
-			break;
-		}
-
-		si_shader_destroy(p);
-		free(p);
+		si_delete_shader(sctx, p);
 		p = c;
 	}
 

From de2e28366a4b43b7c47373d3bbe17243a4dbb3ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Thu, 28 Jan 2016 02:26:59 +0100
Subject: [PATCH 83/94] radeonsi: compile geometry shaders immediately
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

they have only 1 variant

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 27a981ee0dc..ce795c02c32 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -916,7 +916,8 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 	}
 
 	/* Pre-compilation. */
-	if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
+	if (sel->type == PIPE_SHADER_GEOMETRY ||
+	    sscreen->b.debug_flags & DBG_PRECOMPILE) {
 		struct si_shader_ctx_state state = {sel};
 		union si_shader_key key;
 

From d611fce23dce77e674a3fca6e7ed555570efbedb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Wed, 6 Jan 2016 21:21:07 +0100
Subject: [PATCH 84/94] gallium/radeon: add a function for adding llvm function
 attributes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This will be used for setting the new InitialPSInputAddr attribute.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeon/radeon_llvm_emit.c | 13 +++++++++----
 src/gallium/drivers/radeon/radeon_llvm_emit.h |  1 +
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 3d0987624a6..474154e52ff 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -55,6 +55,14 @@ enum radeon_llvm_shader_type {
 	RADEON_LLVM_SHADER_CS = 3,
 };
 
+void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
+{
+	char str[16];
+
+	snprintf(str, sizeof(str), "%i", value);
+	LLVMAddTargetDependentFunctionAttr(F, name, str);
+}
+
 /**
  * Set the shader type we want to compile
  *
@@ -62,7 +70,6 @@ enum radeon_llvm_shader_type {
  */
 void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
 {
-	char Str[2];
 	enum radeon_llvm_shader_type llvm_type;
 
 	switch (type) {
@@ -84,9 +91,7 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
 		assert(0);
 	}
 
-	sprintf(Str, "%1d", llvm_type);
-
-	LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str);
+	radeon_llvm_add_attribute(F, "ShaderType", llvm_type);
 }
 
 static void init_r600_target()
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.h b/src/gallium/drivers/radeon/radeon_llvm_emit.h
index 45f05a9e0e1..84dbd2584a1 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.h
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.h
@@ -34,6 +34,7 @@
 struct pipe_debug_callback;
 struct radeon_shader_binary;
 
+void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value);
 void radeon_llvm_shader_type(LLVMValueRef F, unsigned type);
 
 LLVMTargetRef radeon_llvm_get_r600_target(const char *triple);

From 17fe3fa312d26db58b1c441519a92cd029e03727 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 6 Feb 2016 17:13:07 +0100
Subject: [PATCH 85/94] gallium: pass the robust buffer access context flag to
 drivers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

radeonsi will not do bounds checking for loads if this is not set.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/include/pipe/p_defines.h | 6 ++++++
 src/mesa/state_tracker/st_manager.c  | 6 +++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 800f16cd250..b01f6ea3dcb 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -348,6 +348,12 @@ enum pipe_flush_flags
  */
 #define PIPE_CONTEXT_DEBUG             (1 << 1)
 
+/**
+ * Whether out-of-bounds shader loads must return zero and out-of-bounds
+ * shader stores must be dropped.
+ */
+#define PIPE_CONTEXT_ROBUST_BUFFER_ACCESS (1 << 2)
+
 /**
  * Flags for pipe_context::memory_barrier.
  */
diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index c16fa0b0c72..84b65369d80 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -636,6 +636,7 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi,
    struct pipe_context *pipe;
    struct gl_config mode;
    gl_api api;
+   unsigned ctx_flags = 0;
 
    if (!(stapi->profile_mask & (1 << attribs->profile)))
       return NULL;
@@ -659,7 +660,10 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi,
       break;
    }
 
-   pipe = smapi->screen->context_create(smapi->screen, NULL, 0);
+   if (attribs->flags & ST_CONTEXT_FLAG_ROBUST_ACCESS)
+      ctx_flags |= PIPE_CONTEXT_ROBUST_BUFFER_ACCESS;
+
+   pipe = smapi->screen->context_create(smapi->screen, NULL, ctx_flags);
    if (!pipe) {
       *error = ST_CONTEXT_ERROR_NO_MEMORY;
       return NULL;

From 329181ae3329dc7d6f0aac62a86c4209444d5725 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Fri, 5 Feb 2016 22:49:12 +0100
Subject: [PATCH 86/94] radeonsi: enable denorms for 64-bit and 16-bit floats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes FP16 conversion instructions for VI, which has 16-bit floats,
but not SI & CI, which can't disable denorms for those instructions.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c       | 14 ++++++++++++++
 .../drivers/radeonsi/si_state_shaders.c        | 18 ++++++++++++------
 src/gallium/drivers/radeonsi/sid.h             |  3 +++
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 15a6a049050..c1d3edc7143 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4119,6 +4119,20 @@ int si_compile_llvm(struct si_screen *sscreen,
 
 	si_shader_binary_read_config(binary, conf, 0);
 
+	/* Enable 64-bit and 16-bit denormals, because there is no performance
+	 * cost.
+	 *
+	 * If denormals are enabled, all floating-point output modifiers are
+	 * ignored.
+	 *
+	 * Don't enable denormals for 32-bit floats, because:
+	 * - Floating-point output modifiers would be ignored by the hw.
+	 * - Some opcodes don't support denormals, such as v_mad_f32. We would
+	 *   have to stop using those.
+	 * - SI & CI would be very slow.
+	 */
+	conf->float_mode |= V_00B028_FP_64_DENORMS;
+
 	FREE(binary->config);
 	FREE(binary->global_symbol_offsets);
 	binary->config = NULL;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index ce795c02c32..77a4e47c809 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -124,7 +124,8 @@ static void si_shader_ls(struct si_shader *shader)
 	shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
 			   S_00B528_SGPRS((num_sgprs - 1) / 8) |
 		           S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
-			   S_00B528_DX10_CLAMP(1);
+			   S_00B528_DX10_CLAMP(1) |
+			   S_00B528_FLOAT_MODE(shader->config.float_mode);
 	shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
 			   S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
 }
@@ -157,7 +158,8 @@ static void si_shader_hs(struct si_shader *shader)
 	si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
 		       S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
 		       S_00B428_SGPRS((num_sgprs - 1) / 8) |
-		       S_00B428_DX10_CLAMP(1));
+		       S_00B428_DX10_CLAMP(1) |
+		       S_00B428_FLOAT_MODE(shader->config.float_mode));
 	si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
 		       S_00B42C_USER_SGPR(num_user_sgprs) |
 		       S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
@@ -203,7 +205,8 @@ static void si_shader_es(struct si_shader *shader)
 		       S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
 		       S_00B328_SGPRS((num_sgprs - 1) / 8) |
 		       S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
-		       S_00B328_DX10_CLAMP(1));
+		       S_00B328_DX10_CLAMP(1) |
+		       S_00B328_FLOAT_MODE(shader->config.float_mode));
 	si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
 		       S_00B32C_USER_SGPR(num_user_sgprs) |
 		       S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
@@ -292,7 +295,8 @@ static void si_shader_gs(struct si_shader *shader)
 	si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
 		       S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
 		       S_00B228_SGPRS((num_sgprs - 1) / 8) |
-		       S_00B228_DX10_CLAMP(1));
+		       S_00B228_DX10_CLAMP(1) |
+		       S_00B228_FLOAT_MODE(shader->config.float_mode));
 	si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
 		       S_00B22C_USER_SGPR(num_user_sgprs) |
 		       S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
@@ -381,7 +385,8 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
 		       S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
 		       S_00B128_SGPRS((num_sgprs - 1) / 8) |
 		       S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
-		       S_00B128_DX10_CLAMP(1));
+		       S_00B128_DX10_CLAMP(1) |
+		       S_00B128_FLOAT_MODE(shader->config.float_mode));
 	si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
 		       S_00B12C_USER_SGPR(num_user_sgprs) |
 		       S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
@@ -567,7 +572,8 @@ static void si_shader_ps(struct si_shader *shader)
 	si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
 		       S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
 		       S_00B028_SGPRS((num_sgprs - 1) / 8) |
-		       S_00B028_DX10_CLAMP(1));
+		       S_00B028_DX10_CLAMP(1) |
+		       S_00B028_FLOAT_MODE(shader->config.float_mode));
 	si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
 		       S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
 		       S_00B02C_USER_SGPR(num_user_sgprs) |
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 9e1e158219f..892084707d2 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -2845,6 +2845,9 @@
 #define   S_00B028_FLOAT_MODE(x)                                      (((x) & 0xFF) << 12)
 #define   G_00B028_FLOAT_MODE(x)                                      (((x) >> 12) & 0xFF)
 #define   C_00B028_FLOAT_MODE                                         0xFFF00FFF
+#define     V_00B028_FP_32_DENORMS					0x30
+#define     V_00B028_FP_64_DENORMS					0xc0
+#define     V_00B028_FP_ALL_DENORMS					0xf0
 #define   S_00B028_PRIV(x)                                            (((x) & 0x1) << 20)
 #define   G_00B028_PRIV(x)                                            (((x) >> 20) & 0x1)
 #define   C_00B028_PRIV                                               0xFFEFFFFF

From 8b0f6de73d7bf5cc2b9bb189e5a3fe4d48dd1017 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 9 Feb 2016 02:12:07 -0800
Subject: [PATCH 87/94] glsl: Disallow transform feedback varyings with compute
 shaders.

If the only stage is MESA_SHADER_COMPUTE, we should complain that
there's nothing coming out of the geometry shader stage just as
we would if the first stage were MESA_SHADER_FRAGMENT.

Also, it's valid for tessellation shaders to be the stage producing
transform feedback varyings, so mention those in the compiler error.

Found by inspection.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
---
 src/compiler/glsl/linker.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 9dbb92698c1..bad1c1742b7 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -4446,9 +4446,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
        *     non-zero, but the program object has no vertex or geometry
        *     shader;
        */
-      if (first == MESA_SHADER_FRAGMENT) {
+      if (first >= MESA_SHADER_FRAGMENT) {
          linker_error(prog, "Transform feedback varyings specified, but "
-                      "no vertex or geometry shader is present.\n");
+                      "no vertex, tessellation, or geometry shader is "
+                      "present.\n");
          goto done;
       }
 

From 1df3ecc1b87d95130165283154a13ea5b9a498d4 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Wed, 13 Jan 2016 18:43:14 -0500
Subject: [PATCH 88/94] nir: const_index helpers

Direct access to intr->const_index[n], where different slots have
different meanings, is somewhat confusing.

Instead, let's put some extra info in nir_intrinsic_infos[] about which
slots map to what, and add some get/set helpers.  The helpers validate
that the field being accessed (base/writemask/etc) is applicable for the
intrinsic opc, for some extra safety.  And nir_print can use this to
dump out decoded const_index fields.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
---
 src/compiler/nir/nir.h            |  64 ++++++++++-
 src/compiler/nir/nir_intrinsics.c |  10 +-
 src/compiler/nir/nir_intrinsics.h | 177 +++++++++++++++---------------
 src/compiler/nir/nir_print.c      |  40 +++++--
 4 files changed, 191 insertions(+), 100 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index a4dbfde21d0..16203af9cfb 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -768,7 +768,7 @@ typedef struct {
 } nir_call_instr;
 
 #define INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, \
-                  num_variables, num_indices, flags) \
+                  num_variables, num_indices, idx0, idx1, idx2, flags) \
    nir_intrinsic_##name,
 
 #define LAST_INTRINSIC(name) nir_last_intrinsic = nir_intrinsic_##name,
@@ -781,6 +781,8 @@ typedef enum {
 #undef INTRINSIC
 #undef LAST_INTRINSIC
 
+#define NIR_INTRINSIC_MAX_CONST_INDEX 3
+
 /** Represents an intrinsic
  *
  * An intrinsic is an instruction type for handling things that are
@@ -824,7 +826,7 @@ typedef struct {
     */
    uint8_t num_components;
 
-   int const_index[3];
+   int const_index[NIR_INTRINSIC_MAX_CONST_INDEX];
 
    nir_deref_var *variables[2];
 
@@ -853,6 +855,39 @@ typedef enum {
    NIR_INTRINSIC_CAN_REORDER = (1 << 1),
 } nir_intrinsic_semantic_flag;
 
+/**
+ * \name NIR intrinsics const-index flag
+ *
+ * Indicates the usage of a const_index slot.
+ *
+ * \sa nir_intrinsic_info::index_map
+ */
+typedef enum {
+   /**
+    * Generally instructions that take a offset src argument, can encode
+    * a constant 'base' value which is added to the offset.
+    */
+   NIR_INTRINSIC_BASE = 1,
+
+   /**
+    * For store instructions, a writemask for the store.
+    */
+   NIR_INTRINSIC_WRMASK = 2,
+
+   /**
+    * The stream-id for GS emit_vertex/end_primitive intrinsics.
+    */
+   NIR_INTRINSIC_STREAM_ID = 3,
+
+   /**
+    * The clip-plane id for load_user_clip_plane intrinsic.
+    */
+   NIR_INTRINSIC_UCP_ID = 4,
+
+   NIR_INTRINSIC_NUM_INDEX_FLAGS,
+
+} nir_intrinsic_index_flag;
+
 #define NIR_INTRINSIC_MAX_INPUTS 4
 
 typedef struct {
@@ -882,12 +917,37 @@ typedef struct {
    /** the number of constant indices used by the intrinsic */
    unsigned num_indices;
 
+   /** indicates the usage of intr->const_index[n] */
+   unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS];
+
    /** semantic flags for calls to this intrinsic */
    nir_intrinsic_semantic_flag flags;
 } nir_intrinsic_info;
 
 extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
 
+
+#define INTRINSIC_IDX_ACCESSORS(name, flag, type)                             \
+static inline type                                                            \
+nir_intrinsic_##name(nir_intrinsic_instr *instr)                              \
+{                                                                             \
+   const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];   \
+   assert(info->index_map[NIR_INTRINSIC_##flag] > 0);                         \
+   return instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1];      \
+}                                                                             \
+static inline void                                                            \
+nir_intrinsic_set_##name(nir_intrinsic_instr *instr, type val)                \
+{                                                                             \
+   const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];   \
+   assert(info->index_map[NIR_INTRINSIC_##flag] > 0);                         \
+   instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1] = val;       \
+}
+
+INTRINSIC_IDX_ACCESSORS(write_mask, WRMASK, unsigned)
+INTRINSIC_IDX_ACCESSORS(base, BASE, int)
+INTRINSIC_IDX_ACCESSORS(stream_id, STREAM_ID, unsigned)
+INTRINSIC_IDX_ACCESSORS(ucp_id, UCP_ID, unsigned)
+
 /**
  * \group texture information
  *
diff --git a/src/compiler/nir/nir_intrinsics.c b/src/compiler/nir/nir_intrinsics.c
index a7c868c39af..0257b19b348 100644
--- a/src/compiler/nir/nir_intrinsics.c
+++ b/src/compiler/nir/nir_intrinsics.c
@@ -30,7 +30,8 @@
 #define OPCODE(name) nir_intrinsic_##name
 
 #define INTRINSIC(_name, _num_srcs, _src_components, _has_dest, \
-                  _dest_components, _num_variables, _num_indices, _flags) \
+                  _dest_components, _num_variables, _num_indices, \
+                  idx0, idx1, idx2, _flags) \
 { \
    .name = #_name, \
    .num_srcs = _num_srcs, \
@@ -39,9 +40,16 @@
    .dest_components = _dest_components, \
    .num_variables = _num_variables, \
    .num_indices = _num_indices, \
+   .index_map = { \
+      [NIR_INTRINSIC_ ## idx0] = 1, \
+      [NIR_INTRINSIC_ ## idx1] = 2, \
+      [NIR_INTRINSIC_ ## idx2] = 3, \
+   }, \
    .flags = _flags \
 },
 
+#define NIR_INTRINSIC_xx 0
+
 #define LAST_INTRINSIC(name)
 
 const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics] = {
diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h
index 62eead4878a..00725562874 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -30,7 +30,7 @@
  * expands to a list of macros of the form:
  *
  * INTRINSIC(name, num_srcs, src_components, has_dest, dest_components,
- *              num_variables, num_indices, flags)
+ *              num_variables, num_indices, idx0, idx1, idx2, flags)
  *
  * Which should correspond one-to-one with the nir_intrinsic_info structure. It
  * is included in both ir.h to create the nir_intrinsic enum (with members of
@@ -42,9 +42,9 @@
 #define ARR(...) { __VA_ARGS__ }
 
 
-INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0)
-INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
+INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, xx, 0)
+INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, xx, xx, xx, 0)
 
 /*
  * Interpolation of input.  The interp_var_at* intrinsics are similar to the
@@ -54,25 +54,25 @@ INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
  * respectively.
  */
 
-INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0,
+INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0, xx, xx, xx,
           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0,
+INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0, xx, xx, xx,
           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0,
+INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, xx, xx, xx,
           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
 /*
  * Ask the driver for the size of a given buffer. It takes the buffer index
  * as source.
  */
-INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0,
+INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0, xx, xx, xx,
           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
 /*
  * a barrier is an intrinsic with no inputs/outputs but which can't be moved
  * around/optimized in general
  */
-#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0)
+#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, xx, xx, xx, 0)
 
 BARRIER(barrier)
 BARRIER(discard)
@@ -89,7 +89,7 @@ BARRIER(memory_barrier)
  * The latter can be used as code motion barrier, which is currently not
  * feasible with NIR.
  */
-INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 
 /*
  * Memory barrier with semantics analogous to the compute shader
@@ -103,7 +103,7 @@ BARRIER(memory_barrier_image)
 BARRIER(memory_barrier_shared)
 
 /** A conditional discard, with a single boolean source. */
-INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
+INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
 
 /**
  * Basic Geometry Shader intrinsics.
@@ -113,8 +113,8 @@ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
  *
  * end_primitive implements GLSL's EndPrimitive() built-in.
  */
-INTRINSIC(emit_vertex,   0, ARR(), false, 0, 0, 1, 0)
-INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
+INTRINSIC(emit_vertex,   0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
+INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
 
 /**
  * Geometry Shader intrinsics with a vertex count.
@@ -125,9 +125,9 @@ INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
  * These maintain a count of the number of vertices emitted, as an additional
  * unsigned integer source.
  */
-INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
-INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
-INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)
+INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
+INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
+INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
 
 /*
  * Atomic counters
@@ -137,8 +137,8 @@ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)
  */
 
 #define ATOMIC(name, flags) \
-   INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, flags) \
-   INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, flags)
+   INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, xx, xx, xx, flags) \
+   INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, BASE, xx, xx, flags)
 
 ATOMIC(inc, 0)
 ATOMIC(dec, 0)
@@ -159,20 +159,20 @@ ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE)
  * either one or two additional scalar arguments with the same meaning as in
  * the ARB_shader_image_load_store specification.
  */
-INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0,
+INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0, xx, xx, xx,
           NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, 0)
-INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
-INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
-INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
-INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
-INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
-INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
-INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
-INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0)
-INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,
+INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, xx, xx, xx, 0)
+INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
+INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
+INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
+INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
+INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
+INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
+INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
+INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
+INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, xx, xx, xx,
           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
+INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, xx, xx, xx,
           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
 /*
@@ -192,16 +192,16 @@ INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
  *    in ssbo_atomic_add, etc).
  * 3: For CompSwap only: the second data parameter.
  */
-INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
-INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
-INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
-INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
-INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
-INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
-INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
-INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
-INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
-INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
 
 /*
  * CS shared variable atomic intrinsics
@@ -219,42 +219,43 @@ INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)
  *    in shared_atomic_add, etc).
  * 2: For CompSwap only: the second data parameter.
  */
-INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, 0)
-INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, 0)
-INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, 0)
-INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, 0)
-INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, 0)
-INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, 0)
-INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, 0)
-INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, 0)
-INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, 0)
-INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
 
-#define SYSTEM_VALUE(name, components, num_indices) \
+#define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \
    INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
+   idx0, idx1, idx2, \
    NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
-SYSTEM_VALUE(front_face, 1, 0)
-SYSTEM_VALUE(vertex_id, 1, 0)
-SYSTEM_VALUE(vertex_id_zero_base, 1, 0)
-SYSTEM_VALUE(base_vertex, 1, 0)
-SYSTEM_VALUE(instance_id, 1, 0)
-SYSTEM_VALUE(base_instance, 1, 0)
-SYSTEM_VALUE(draw_id, 1, 0)
-SYSTEM_VALUE(sample_id, 1, 0)
-SYSTEM_VALUE(sample_pos, 2, 0)
-SYSTEM_VALUE(sample_mask_in, 1, 0)
-SYSTEM_VALUE(primitive_id, 1, 0)
-SYSTEM_VALUE(invocation_id, 1, 0)
-SYSTEM_VALUE(tess_coord, 3, 0)
-SYSTEM_VALUE(tess_level_outer, 4, 0)
-SYSTEM_VALUE(tess_level_inner, 2, 0)
-SYSTEM_VALUE(patch_vertices_in, 1, 0)
-SYSTEM_VALUE(local_invocation_id, 3, 0)
-SYSTEM_VALUE(work_group_id, 3, 0)
-SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
-SYSTEM_VALUE(num_work_groups, 3, 0)
-SYSTEM_VALUE(helper_invocation, 1, 0)
+SYSTEM_VALUE(front_face, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(vertex_id, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(vertex_id_zero_base, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(base_vertex, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(instance_id, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(base_instance, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(draw_id, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(sample_id, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(sample_pos, 2, 0, xx, xx, xx)
+SYSTEM_VALUE(sample_mask_in, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(primitive_id, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(invocation_id, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(tess_coord, 3, 0, xx, xx, xx)
+SYSTEM_VALUE(tess_level_outer, 4, 0, xx, xx, xx)
+SYSTEM_VALUE(tess_level_inner, 2, 0, xx, xx, xx)
+SYSTEM_VALUE(patch_vertices_in, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(local_invocation_id, 3, 0, xx, xx, xx)
+SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
+SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
+SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
+SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
 
 /*
  * Load operations pull data from some piece of GPU memory.  All load
@@ -274,25 +275,25 @@ SYSTEM_VALUE(helper_invocation, 1, 0)
  * offsets are always in bytes.
  */
 
-#define LOAD(name, srcs, indices, flags) \
-   INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags)
+#define LOAD(name, srcs, num_indices, idx0, idx1, idx2, flags) \
+   INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, num_indices, idx0, idx1, idx2, flags)
 
 /* src[] = { offset }. const_index[] = { base } */
-LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(uniform, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 /* src[] = { buffer_index, offset }. No const_index */
-LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(ubo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 /* src[] = { offset }. const_index[] = { base } */
-LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(input, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 /* src[] = { vertex, offset }. const_index[] = { base } */
-LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(per_vertex_input, 2, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 /* src[] = { buffer_index, offset }. No const_index */
-LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+LOAD(ssbo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 /* src[] = { offset }. const_index[] = { base } */
-LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+LOAD(output, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 /* src[] = { vertex, offset }. const_index[] = { base } */
-LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+LOAD(per_vertex_output, 2, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 /* src[] = { offset }. const_index[] = { base } */
-LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+LOAD(shared, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 
 /*
  * Stores work the same way as loads, except now the first source is the value
@@ -301,16 +302,16 @@ LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
  * const_index[0].
  */
 
-#define STORE(name, srcs, indices, flags) \
-   INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags)
+#define STORE(name, srcs, num_indices, idx0, idx1, idx2, flags) \
+   INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, num_indices, idx0, idx1, idx2, flags)
 
 /* src[] = { value, offset }. const_index[] = { base, write_mask } */
-STORE(output, 2, 2, 0)
+STORE(output, 2, 2, BASE, WRMASK, xx, 0)
 /* src[] = { value, vertex, offset }. const_index[] = { base, write_mask } */
-STORE(per_vertex_output, 3, 2, 0)
+STORE(per_vertex_output, 3, 2, BASE, WRMASK, xx, 0)
 /* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
-STORE(ssbo, 3, 1, 0)
+STORE(ssbo, 3, 1, WRMASK, xx, xx, 0)
 /* src[] = { value, offset }. const_index[] = { base, write_mask } */
-STORE(shared, 2, 2, 0)
+STORE(shared, 2, 2, BASE, WRMASK, xx, 0)
 
 LAST_INTRINSIC(store_shared)
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 48ecb48a620..101688eb00f 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -444,15 +444,16 @@ print_deref(nir_deref_var *deref, print_state *state)
 static void
 print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
 {
-   unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+   const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
+   unsigned num_srcs = info->num_srcs;
    FILE *fp = state->fp;
 
-   if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
+   if (info->has_dest) {
       print_dest(&instr->dest, state);
       fprintf(fp, " = ");
    }
 
-   fprintf(fp, "intrinsic %s (", nir_intrinsic_infos[instr->intrinsic].name);
+   fprintf(fp, "intrinsic %s (", info->name);
 
    for (unsigned i = 0; i < num_srcs; i++) {
       if (i != 0)
@@ -463,9 +464,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
 
    fprintf(fp, ") (");
 
-   unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
-
-   for (unsigned i = 0; i < num_vars; i++) {
+   for (unsigned i = 0; i < info->num_variables; i++) {
       if (i != 0)
          fprintf(fp, ", ");
 
@@ -474,9 +473,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
 
    fprintf(fp, ") (");
 
-   unsigned num_indices = nir_intrinsic_infos[instr->intrinsic].num_indices;
-
-   for (unsigned i = 0; i < num_indices; i++) {
+   for (unsigned i = 0; i < info->num_indices; i++) {
       if (i != 0)
          fprintf(fp, ", ");
 
@@ -485,6 +482,31 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
 
    fprintf(fp, ")");
 
+   static const char *index_name[NIR_INTRINSIC_NUM_INDEX_FLAGS] = {
+      [NIR_INTRINSIC_BASE] = "base",
+      [NIR_INTRINSIC_WRMASK] = "wrmask",
+      [NIR_INTRINSIC_STREAM_ID] = "stream-id",
+      [NIR_INTRINSIC_UCP_ID] = "ucp-id",
+   };
+   for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) {
+      if (!info->index_map[idx])
+         continue;
+      fprintf(fp, " /*");
+      if (idx == NIR_INTRINSIC_WRMASK) {
+         /* special case wrmask to show it as a writemask.. */
+         unsigned wrmask = nir_intrinsic_write_mask(instr);
+         fprintf(fp, " wrmask=");
+         for (unsigned i = 0; i < 4; i++)
+            if ((wrmask >> i) & 1)
+               fprintf(fp, "%c", "xyzw"[i]);
+      } else {
+         unsigned off = info->index_map[idx] - 1;
+         assert(index_name[idx]);  /* forgot to update index_name table? */
+         fprintf(fp, " %s=%d", index_name[idx], instr->const_index[off]);
+      }
+      fprintf(fp, " */");
+   }
+
    if (!state->shader)
       return;
 

From b6cf98bc82a7842f8391a8e33e5318f6976d21b3 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 21 Jan 2016 13:32:09 -0500
Subject: [PATCH 89/94] gtn: use const_index helpers

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
---
 src/compiler/nir/glsl_to_nir.cpp | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp
index 3db27751289..68b7aebbc37 100644
--- a/src/compiler/nir/glsl_to_nir.cpp
+++ b/src/compiler/nir/glsl_to_nir.cpp
@@ -585,7 +585,7 @@ nir_visitor::visit(ir_emit_vertex *ir)
 {
    nir_intrinsic_instr *instr =
       nir_intrinsic_instr_create(this->shader, nir_intrinsic_emit_vertex);
-   instr->const_index[0] = ir->stream_id();
+   nir_intrinsic_set_stream_id(instr, ir->stream_id());
    nir_builder_instr_insert(&b, &instr->instr);
 }
 
@@ -594,7 +594,7 @@ nir_visitor::visit(ir_end_primitive *ir)
 {
    nir_intrinsic_instr *instr =
       nir_intrinsic_instr_create(this->shader, nir_intrinsic_end_primitive);
-   instr->const_index[0] = ir->stream_id();
+   nir_intrinsic_set_stream_id(instr, ir->stream_id());
    nir_builder_instr_insert(&b, &instr->instr);
 }
 
@@ -874,7 +874,7 @@ nir_visitor::visit(ir_call *ir)
          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
          instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
-         instr->const_index[0] = write_mask->value.u[0];
+         nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
          instr->num_components = val->type->vector_elements;
 
          nir_builder_instr_insert(&b, &instr->instr);
@@ -972,7 +972,7 @@ nir_visitor::visit(ir_call *ir)
          exec_node *param = ir->actual_parameters.get_head();
          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
 
-         instr->const_index[0] = 0;
+         nir_intrinsic_set_base(instr, 0);
          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset));
 
          const glsl_type *type = ir->return_deref->var->type;
@@ -996,10 +996,10 @@ nir_visitor::visit(ir_call *ir)
          ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
          assert(write_mask);
 
-         instr->const_index[0] = 0;
+         nir_intrinsic_set_base(instr, 0);
          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
 
-         instr->const_index[1] = write_mask->value.u[0];
+         nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
 
          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
          instr->num_components = val->type->vector_elements;
@@ -1054,7 +1054,8 @@ nir_visitor::visit(ir_call *ir)
          nir_intrinsic_instr *store_instr =
             nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
          store_instr->num_components = ir->return_deref->type->vector_elements;
-         store_instr->const_index[0] = (1 << store_instr->num_components) - 1;
+         nir_intrinsic_set_write_mask(store_instr,
+                                      (1 << store_instr->num_components) - 1);
 
          store_instr->variables[0] =
             evaluate_deref(&store_instr->instr, ir->return_deref);
@@ -1132,7 +1133,7 @@ nir_visitor::visit(ir_assignment *ir)
    nir_intrinsic_instr *store =
       nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
    store->num_components = ir->lhs->type->vector_elements;
-   store->const_index[0] = ir->write_mask;
+   nir_intrinsic_set_write_mask(store, ir->write_mask);
    nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
    store->variables[0] = nir_deref_as_var(store_deref);
    store->src[0] = nir_src_for_ssa(src);

From b1770235ed74814a4f5d03fe3e93e59c38a3686b Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 21 Jan 2016 13:32:37 -0500
Subject: [PATCH 90/94] ttn: small logic cleanup

The only case where dim!=NULL is where op==load_ubo.  But using
op==load_ubo is less confusing.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 3e7d69f73ed..89c16650b0a 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -614,7 +614,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
       }
 
       nir_ssa_def *offset;
-      if (dim) {
+      if (op == nir_intrinsic_load_ubo) {
          /* UBO loads don't have a const_index[0] base offset. */
          offset = nir_imm_int(b, index);
          if (indirect) {

From ead05e8670c4626a1d4ea03a6a60b5019188e1e2 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 21 Jan 2016 13:36:37 -0500
Subject: [PATCH 91/94] ttn: use const_index helpers

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 89c16650b0a..61ff0a74379 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -615,7 +615,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
 
       nir_ssa_def *offset;
       if (op == nir_intrinsic_load_ubo) {
-         /* UBO loads don't have a const_index[0] base offset. */
+         /* UBO loads don't have a base offset. */
          offset = nir_imm_int(b, index);
          if (indirect) {
             offset = nir_iadd(b, offset, ttn_src_for_indirect(c, indirect));
@@ -623,7 +623,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
          /* UBO offsets are in bytes, but TGSI gives them to us in vec4's */
          offset = nir_ishl(b, offset, nir_imm_int(b, 4));
       } else {
-         load->const_index[0] = index;
+         nir_intrinsic_set_base(load, index);
          if (indirect) {
             offset = ttn_src_for_indirect(c, indirect);
          } else {
@@ -1875,7 +1875,7 @@ ttn_emit_instruction(struct ttn_compile *c)
                                            &tgsi_dst->Indirect : NULL;
 
       store->num_components = 4;
-      store->const_index[0] = dest.write_mask;
+      nir_intrinsic_set_write_mask(store, dest.write_mask);
       store->variables[0] = ttn_array_deref(c, store, var, offset, indirect);
       store->src[0] = nir_src_for_reg(dest.dest.reg.reg);
 
@@ -1907,8 +1907,8 @@ ttn_add_output_stores(struct ttn_compile *c)
          store->num_components = 4;
          store->src[0].reg.reg = c->output_regs[loc].reg;
          store->src[0].reg.base_offset = c->output_regs[loc].offset;
-         store->const_index[0] = loc;
-         store->const_index[1] = 0xf;  /* writemask */
+         nir_intrinsic_set_base(store, loc);
+         nir_intrinsic_set_write_mask(store, 0xf);
          store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
          nir_builder_instr_insert(b, &store->instr);
       }

From 6921762de6490fb7fa791e3fa6aff9f9f27bf40f Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 21 Jan 2016 13:49:07 -0500
Subject: [PATCH 92/94] ptn: use const_index helpers

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
---
 src/mesa/program/prog_to_nir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c
index ebcc5288c2e..29e5d3075c9 100644
--- a/src/mesa/program/prog_to_nir.c
+++ b/src/mesa/program/prog_to_nir.c
@@ -927,7 +927,7 @@ ptn_add_output_stores(struct ptn_compile *c)
       nir_intrinsic_instr *store =
          nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
       store->num_components = glsl_get_vector_elements(var->type);
-      store->const_index[0] = (1 << store->num_components) - 1;
+      nir_intrinsic_set_write_mask(store, (1 << store->num_components) - 1);
       store->variables[0] =
          nir_deref_var_create(store, c->output_vars[var->data.location]);
 
@@ -998,7 +998,7 @@ setup_registers_and_variables(struct ptn_compile *c)
             nir_intrinsic_instr *store =
                nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
             store->num_components = 4;
-            store->const_index[0] = WRITEMASK_XYZW;
+            nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW);
             store->variables[0] = nir_deref_var_create(store, fullvar);
             store->src[0] = nir_src_for_ssa(f001);
             nir_builder_instr_insert(b, &store->instr);

From ced8d3e7730777ea8a264d22f83b43f7b3a5c433 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 21 Jan 2016 14:12:58 -0500
Subject: [PATCH 93/94] nir: use const_index helpers

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
---
 src/compiler/nir/nir_builder.h               |  2 +-
 src/compiler/nir/nir_lower_atomics.c         |  4 ++--
 src/compiler/nir/nir_lower_clip.c            |  8 ++++----
 src/compiler/nir/nir_lower_gs_intrinsics.c   |  4 ++--
 src/compiler/nir/nir_lower_io.c              | 12 +++++-------
 src/compiler/nir/nir_lower_locals_to_regs.c  |  2 +-
 src/compiler/nir/nir_lower_two_sided_color.c |  4 ++--
 src/compiler/nir/nir_lower_var_copies.c      |  2 +-
 src/compiler/nir/nir_lower_vars_to_ssa.c     |  5 +++--
 src/compiler/nir/nir_print.c                 |  2 +-
 src/compiler/nir/nir_validate.c              |  2 +-
 11 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 88ba3a1c269..d546e41b5fe 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -343,7 +343,7 @@ nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value,
    nir_intrinsic_instr *store =
       nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
    store->num_components = num_components;
-   store->const_index[0] = writemask;
+   nir_intrinsic_set_write_mask(store, writemask);
    store->variables[0] = nir_deref_var_create(store, var);
    store->src[0] = nir_src_for_ssa(value);
    nir_builder_instr_insert(build, &store->instr);
diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c
index 1a4458d4f84..1935a527c6f 100644
--- a/src/compiler/nir/nir_lower_atomics.c
+++ b/src/compiler/nir/nir_lower_atomics.c
@@ -70,8 +70,8 @@ lower_instr(nir_intrinsic_instr *instr,
    unsigned uniform_loc = instr->variables[0]->var->data.location;
 
    nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
-   new_instr->const_index[0] =
-      state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index;
+   nir_intrinsic_set_base(new_instr,
+      state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index);
 
    nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
    offset_const->value.u[0] = instr->variables[0]->var->data.offset;
diff --git a/src/compiler/nir/nir_lower_clip.c b/src/compiler/nir/nir_lower_clip.c
index 0ca6a289396..bcbad536874 100644
--- a/src/compiler/nir/nir_lower_clip.c
+++ b/src/compiler/nir/nir_lower_clip.c
@@ -71,8 +71,8 @@ store_clipdist_output(nir_builder *b, nir_variable *out, nir_ssa_def **val)
 
    store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
    store->num_components = 4;
-   store->const_index[0] = out->data.driver_location;
-   store->const_index[1] = 0xf;   /* wrmask */
+   nir_intrinsic_set_base(store, out->data.driver_location);
+   nir_intrinsic_set_write_mask(store, 0xf);
    store->src[0].ssa = nir_vec4(b, val[0], val[1], val[2], val[3]);
    store->src[0].is_ssa = true;
    store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
@@ -86,7 +86,7 @@ load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val)
 
    load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
    load->num_components = 4;
-   load->const_index[0] = in->data.driver_location;
+   nir_intrinsic_set_base(load, in->data.driver_location);
    load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
    nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
    nir_builder_instr_insert(b, &load->instr);
@@ -112,7 +112,7 @@ find_output_in_block(nir_block *block, void *void_state)
       if (instr->type == nir_instr_type_intrinsic) {
          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
          if ((intr->intrinsic == nir_intrinsic_store_output) &&
-             intr->const_index[0] == state->drvloc) {
+             nir_intrinsic_base(intr) == state->drvloc) {
             assert(state->def == NULL);
             assert(intr->src[0].is_ssa);
             assert(nir_src_as_const_value(intr->src[1]));
diff --git a/src/compiler/nir/nir_lower_gs_intrinsics.c b/src/compiler/nir/nir_lower_gs_intrinsics.c
index fdff1656b4d..14abfe3f509 100644
--- a/src/compiler/nir/nir_lower_gs_intrinsics.c
+++ b/src/compiler/nir/nir_lower_gs_intrinsics.c
@@ -93,7 +93,7 @@ rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state)
    nir_intrinsic_instr *lowered =
       nir_intrinsic_instr_create(b->shader,
                                  nir_intrinsic_emit_vertex_with_counter);
-   lowered->const_index[0] = intrin->const_index[0];
+   nir_intrinsic_set_stream_id(lowered, nir_intrinsic_stream_id(intrin));
    lowered->src[0] = nir_src_for_ssa(count);
    nir_builder_instr_insert(b, &lowered->instr);
 
@@ -121,7 +121,7 @@ rewrite_end_primitive(nir_intrinsic_instr *intrin, struct state *state)
    nir_intrinsic_instr *lowered =
       nir_intrinsic_instr_create(b->shader,
                                  nir_intrinsic_end_primitive_with_counter);
-   lowered->const_index[0] = intrin->const_index[0];
+   nir_intrinsic_set_stream_id(lowered, nir_intrinsic_stream_id(intrin));
    lowered->src[0] = nir_src_for_ssa(count);
    nir_builder_instr_insert(b, &lowered->instr);
 
diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index 80c5151f0ea..11fb973a237 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -213,8 +213,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
                                        load_op(state, mode, per_vertex));
          load->num_components = intrin->num_components;
 
-         load->const_index[0] =
-            intrin->variables[0]->var->data.driver_location;
+         nir_intrinsic_set_base(load,
+            intrin->variables[0]->var->data.driver_location);
 
          if (per_vertex)
             load->src[0] = nir_src_for_ssa(vertex_index);
@@ -258,11 +258,9 @@ nir_lower_io_block(nir_block *block, void *void_state)
 
          nir_src_copy(&store->src[0], &intrin->src[0], store);
 
-         store->const_index[0] =
-            intrin->variables[0]->var->data.driver_location;
-
-         /* Copy the writemask */
-         store->const_index[1] = intrin->const_index[0];
+         nir_intrinsic_set_base(store,
+            intrin->variables[0]->var->data.driver_location);
+         nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin));
 
          if (per_vertex)
             store->src[1] = nir_src_for_ssa(vertex_index);
diff --git a/src/compiler/nir/nir_lower_locals_to_regs.c b/src/compiler/nir/nir_lower_locals_to_regs.c
index 51b0fa733f2..45036fa7787 100644
--- a/src/compiler/nir/nir_lower_locals_to_regs.c
+++ b/src/compiler/nir/nir_lower_locals_to_regs.c
@@ -243,7 +243,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
 
          nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
          nir_src_copy(&mov->src[0].src, &intrin->src[0], mov);
-         mov->dest.write_mask = intrin->const_index[0];
+         mov->dest.write_mask = nir_intrinsic_write_mask(intrin);
          mov->dest.dest.is_ssa = false;
          mov->dest.dest.reg.reg = reg_src.reg.reg;
          mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;
diff --git a/src/compiler/nir/nir_lower_two_sided_color.c b/src/compiler/nir/nir_lower_two_sided_color.c
index 1294cb89004..fe3507cb7a3 100644
--- a/src/compiler/nir/nir_lower_two_sided_color.c
+++ b/src/compiler/nir/nir_lower_two_sided_color.c
@@ -72,7 +72,7 @@ load_input(nir_builder *b, nir_variable *in)
 
    load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
    load->num_components = 4;
-   load->const_index[0] = in->data.driver_location;
+   nir_intrinsic_set_base(load, in->data.driver_location);
    load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
    nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
    nir_builder_instr_insert(b, &load->instr);
@@ -151,7 +151,7 @@ nir_lower_two_sided_color_block(nir_block *block, void *void_state)
       for (idx = 0; idx < state->colors_count; idx++) {
          unsigned drvloc =
             state->colors[idx].front->data.driver_location;
-         if (intr->const_index[0] == drvloc) {
+         if (nir_intrinsic_base(intr) == drvloc) {
             assert(nir_src_as_const_value(intr->src[0]));
             break;
          }
diff --git a/src/compiler/nir/nir_lower_var_copies.c b/src/compiler/nir/nir_lower_var_copies.c
index 8cb3edd0a84..7db9839c369 100644
--- a/src/compiler/nir/nir_lower_var_copies.c
+++ b/src/compiler/nir/nir_lower_var_copies.c
@@ -128,7 +128,7 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
       nir_intrinsic_instr *store =
          nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);
       store->num_components = num_components;
-      store->const_index[0] = (1 << num_components) - 1;
+      nir_intrinsic_set_write_mask(store, (1 << num_components) - 1);
       store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref));
 
       store->src[0].is_ssa = true;
diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c
index 75d31ff60af..5e81f237c1a 100644
--- a/src/compiler/nir/nir_lower_vars_to_ssa.c
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@ -682,7 +682,8 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
             nir_ssa_def *new_def;
             b.cursor = nir_before_instr(&intrin->instr);
 
-            if (intrin->const_index[0] == (1 << intrin->num_components) - 1) {
+            unsigned wrmask = nir_intrinsic_write_mask(intrin);
+            if (wrmask == (1 << intrin->num_components) - 1) {
                /* Whole variable store - just copy the source.  Note that
                 * intrin->num_components and intrin->src[0].ssa->num_components
                 * may differ.
@@ -701,7 +702,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
                 */
                nir_ssa_def *srcs[4];
                for (unsigned i = 0; i < intrin->num_components; i++) {
-                  if (intrin->const_index[0] & (1 << i)) {
+                  if (wrmask & (1 << i)) {
                      srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
                   } else {
                      srcs[i] = nir_channel(&b, old_def, i);
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 101688eb00f..4ec32cf48da 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -530,7 +530,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
    }
 
    nir_foreach_variable(var, var_list) {
-      if ((var->data.driver_location == instr->const_index[0]) &&
+      if ((var->data.driver_location == nir_intrinsic_base(instr)) &&
           var->name) {
          fprintf(fp, "\t/* %s */", var->name);
          break;
diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c
index e4db68db3c0..920d6102340 100644
--- a/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@ -417,7 +417,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
       assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
              instr->variables[0]->var->data.mode != nir_var_uniform &&
              instr->variables[0]->var->data.mode != nir_var_shader_storage);
-      assert((instr->const_index[0] & ~((1 << instr->num_components) - 1)) == 0);
+      assert((nir_intrinsic_write_mask(instr) & ~((1 << instr->num_components) - 1)) == 0);
       break;
    }
    case nir_intrinsic_copy_var:

From 8b0fb1c152fe191768953aa8c77b89034a377f83 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 21 Jan 2016 15:15:56 -0500
Subject: [PATCH 94/94] freedreno/ir3: use const_index helpers

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 6eb6a2d52ef..f38dc8643b4 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1004,7 +1004,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
 	nir_const_value *const_offset;
 	/* UBO addresses are the first driver params: */
 	unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0);
-	int off = intr->const_index[0];
+	int off = 0;
 
 	/* First src is ubo index, which could either be an immed or not: */
 	src0 = get_src(ctx, &intr->src[0])[0];
@@ -1092,7 +1092,7 @@ emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 	nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
 	struct ir3_array *arr = get_var(ctx, dvar->var);
 	struct ir3_instruction *addr, **src;
-	unsigned wrmask = intr->const_index[0];
+	unsigned wrmask = nir_intrinsic_write_mask(intr);
 
 	compile_assert(ctx, dvar->deref.child &&
 		(dvar->deref.child->deref_type == nir_deref_type_array));
@@ -1145,8 +1145,8 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 	const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
 	struct ir3_instruction **dst, **src;
 	struct ir3_block *b = ctx->block;
-	int idx = intr->const_index[0];
 	nir_const_value *const_offset;
+	int idx;
 
 	if (info->has_dest) {
 		dst = get_dst(ctx, &intr->dest, intr->num_components);
@@ -1156,6 +1156,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 
 	switch (intr->intrinsic) {
 	case nir_intrinsic_load_uniform:
+		idx = nir_intrinsic_base(intr);
 		const_offset = nir_src_as_const_value(intr->src[0]);
 		if (const_offset) {
 			idx += const_offset->u[0];
@@ -1182,6 +1183,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 		emit_intrinsic_load_ubo(ctx, intr, dst);
 		break;
 	case nir_intrinsic_load_input:
+		idx = nir_intrinsic_base(intr);
 		const_offset = nir_src_as_const_value(intr->src[0]);
 		if (const_offset) {
 			idx += const_offset->u[0];
@@ -1208,6 +1210,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 		emit_intrinsic_store_var(ctx, intr);
 		break;
 	case nir_intrinsic_store_output:
+		idx = nir_intrinsic_base(intr);
 		const_offset = nir_src_as_const_value(intr->src[1]);
 		compile_assert(ctx, const_offset != NULL);
 		idx += const_offset->u[0];
@@ -1243,6 +1246,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 		dst[0] = ctx->instance_id;
 		break;
 	case nir_intrinsic_load_user_clip_plane:
+		idx = nir_intrinsic_ucp_id(intr);
 		for (int i = 0; i < intr->num_components; i++) {
 			unsigned n = idx * 4 + i;
 			dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);