From 69271bba068c21a07b0698bf579becc20767b158 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Sat, 14 Nov 2015 17:20:09 +0100
Subject: [PATCH 001/335] nvc0: reduce the number of GPR used when reading MP
 perf counters

No need to allocate more GPR than used in the compute kernel which
reads MP performance counters on Fermi.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 44b222e5134..7962143d45a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -1014,14 +1014,15 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
       struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
       prog->type = PIPE_SHADER_COMPUTE;
       prog->translated = true;
-      prog->num_gprs = 14;
       prog->parm_size = 12;
       if (is_nve4) {
          prog->code = (uint32_t *)nve4_read_hw_sm_counters_code;
          prog->code_size = sizeof(nve4_read_hw_sm_counters_code);
+         prog->num_gprs = 14;
       } else {
          prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code;
          prog->code_size = sizeof(nvc0_read_hw_sm_counters_code);
+         prog->num_gprs = 12;
       }
       screen->pm.prog = prog;
    }

From 7167a058baa2b524e31e51d8924fe5d0ea661b70 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Sat, 14 Nov 2015 22:57:59 +0100
Subject: [PATCH 002/335] nv50: free interpolation parameters in
 nv50_program_destroy()

As for nvc0, we need to free memory allocated by interpolation
parameters. This fixes a memory leak spotted by valgrind.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/nv50/nv50_program.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 89e7a338283..707bf7a8ae3 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -489,7 +489,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
    FREE(p->code);
 
    FREE(p->fixups);
-
+   FREE(p->interps);
    FREE(p->so);
 
    memset(p, 0, sizeof(*p));

From ff72440b40211326eda118232fabd53965410afd Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Wed, 14 Oct 2015 21:42:41 +0200
Subject: [PATCH 003/335] nv50: implement a basic compute support

This adds the ability to launch simple compute kernels like the one I
will use to read out MP performance counters in the upcoming patch.

This compute support is based on the work of Francisco Jerez (aka curro)
that he did as part of his EVoC project in 2011/2012 to get OpenCL
working on Tesla. His original work can be found here:
https://github.com/curro/mesa/commits/nv50-compute

I did some improvements on the original code, like fixing using both 3D
and COMPUTE simultaneously, improving global buffers binding, and making
the code closer to what nvc0 already does. This compute support has been
tested by Pierre Moreau and myself with some compute kernels. This is a
step towards OpenCL.

Speaking about this, it seems like compute programs overlap fragment
programs when they are used both. To fix this, we need to re-validate
fragment programs when binding compute programs and vice versa.

Note that, textures, samplers and surfaces still need to be implemented.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tested-by: Pierre Moreau <pierre.morrow@free.fr>
Acked-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/Makefile.sources  |   1 +
 .../drivers/nouveau/nv50/nv50_compute.c       | 320 +++++++++++++
 .../drivers/nouveau/nv50/nv50_compute.xml.h   | 444 ++++++++++++++++++
 .../drivers/nouveau/nv50/nv50_context.c       |  30 +-
 .../drivers/nouveau/nv50/nv50_context.h       |  23 +-
 .../drivers/nouveau/nv50/nv50_program.c       |  22 +-
 .../drivers/nouveau/nv50/nv50_program.h       |   7 +
 .../drivers/nouveau/nv50/nv50_screen.c        |  61 ++-
 .../drivers/nouveau/nv50/nv50_screen.h        |   8 +
 src/gallium/drivers/nouveau/nv50/nv50_state.c |  99 ++++
 10 files changed, 1006 insertions(+), 9 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_compute.c
 create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index 83f81135590..c2ff8e9b46e 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -64,6 +64,7 @@ NV50_C_SOURCES := \
 	nv50/nv50_3ddefs.xml.h \
 	nv50/nv50_3d.xml.h \
 	nv50/nv50_blit.h \
+	nv50/nv50_compute.c \
 	nv50/nv50_context.c \
 	nv50/nv50_context.h \
 	nv50/nv50_defs.xml.h \
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
new file mode 100644
index 00000000000..6d23fd66945
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright 2012 Francisco Jerez
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_compute.xml.h"
+
+#include "codegen/nv50_ir_driver.h"
+
+int
+nv50_screen_compute_setup(struct nv50_screen *screen,
+                          struct nouveau_pushbuf *push)
+{
+   struct nouveau_device *dev = screen->base.device;
+   struct nouveau_object *chan = screen->base.channel;
+   struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data;
+   unsigned obj_class;
+   int i, ret;
+
+   switch (dev->chipset & 0xf0) {
+   case 0x50:
+   case 0x80:
+   case 0x90:
+      obj_class = NV50_COMPUTE_CLASS;
+      break;
+   case 0xa0:
+      switch (dev->chipset) {
+      case 0xa3:
+      case 0xa5:
+      case 0xa8:
+         obj_class = NVA3_COMPUTE_CLASS;
+         break;
+      default:
+         obj_class = NV50_COMPUTE_CLASS;
+         break;
+      }
+      break;
+   default:
+      NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
+      return -1;
+   }
+
+   ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0,
+                            &screen->compute);
+   if (ret)
+      return ret;
+
+   BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+   PUSH_DATA (push, screen->compute->handle);
+
+   BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1);
+   PUSH_DATA (push, 1);
+   BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1);
+   PUSH_DATA (push, fifo->vram);
+   BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2);
+   PUSH_DATAh(push, screen->stack_bo->offset);
+   PUSH_DATA (push, screen->stack_bo->offset);
+   BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1);
+   PUSH_DATA (push, 4);
+
+   BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1);
+   PUSH_DATA (push, 1);
+   BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1);
+   PUSH_DATA (push, 1);
+   BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1);
+   PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
+   BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1);
+   PUSH_DATA (push, 0x100);
+   BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1);
+   PUSH_DATA (push, fifo->vram);
+
+   for (i = 0; i < 15; i++) {
+      BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2);
+      PUSH_DATA (push, 0);
+      PUSH_DATA (push, 0);
+      BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1);
+      PUSH_DATA (push, 0);
+      BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1);
+      PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
+   }
+
+   BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2);
+   PUSH_DATA (push, 0);
+   PUSH_DATA (push, 0);
+   BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1);
+   PUSH_DATA (push, ~0);
+   BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1);
+   PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
+
+   BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1);
+   PUSH_DATA (push, 7);
+   BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1);
+   PUSH_DATA (push, 1);
+   BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1);
+   PUSH_DATA (push, 7);
+   BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1);
+   PUSH_DATA (push, 1);
+   BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
+   PUSH_DATA (push, 0);
+
+   BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1);
+   PUSH_DATA (push, fifo->vram);
+   BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1);
+   PUSH_DATA (push, 0x54);
+   BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1);
+   PUSH_DATA (push, 0);
+
+   BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1);
+   PUSH_DATA (push, fifo->vram);
+   BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3);
+   PUSH_DATAh(push, screen->txc->offset);
+   PUSH_DATA (push, screen->txc->offset);
+   PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
+
+   BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1);
+   PUSH_DATA (push, fifo->vram);
+   BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3);
+   PUSH_DATAh(push, screen->txc->offset + 65536);
+   PUSH_DATA (push, screen->txc->offset + 65536);
+   PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
+
+   BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1);
+   PUSH_DATA (push, fifo->vram);
+
+   BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1);
+   PUSH_DATA (push, fifo->vram);
+   BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2);
+   PUSH_DATAh(push, screen->tls_bo->offset + 65536);
+   PUSH_DATA (push, screen->tls_bo->offset + 65536);
+   BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1);
+   PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));
+
+   return 0;
+}
+
+static bool
+nv50_compute_validate_program(struct nv50_context *nv50)
+{
+   struct nv50_program *prog = nv50->compprog;
+
+   if (prog->mem)
+      return true;
+
+   if (!prog->translated) {
+      prog->translated = nv50_program_translate(
+         prog, nv50->screen->base.device->chipset, &nv50->base.debug);
+      if (!prog->translated)
+         return false;
+   }
+   if (unlikely(!prog->code_size))
+      return false;
+
+   if (likely(prog->code_size)) {
+      if (nv50_program_upload_code(nv50, prog)) {
+         struct nouveau_pushbuf *push = nv50->base.pushbuf;
+         BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1);
+         PUSH_DATA (push, 0);
+         return true;
+      }
+   }
+   return false;
+}
+
+static void
+nv50_compute_validate_globals(struct nv50_context *nv50)
+{
+   unsigned i;
+
+   for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
+        ++i) {
+      struct pipe_resource *res = *util_dynarray_element(
+         &nv50->global_residents, struct pipe_resource *, i);
+      if (res)
+         nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL,
+                                  nv04_resource(res), NOUVEAU_BO_RDWR);
+   }
+}
+
+static bool
+nv50_compute_state_validate(struct nv50_context *nv50)
+{
+   if (!nv50_compute_validate_program(nv50))
+      return false;
+
+   if (nv50->dirty_cp & NV50_NEW_CP_GLOBALS)
+      nv50_compute_validate_globals(nv50);
+
+   /* TODO: validate textures, samplers, surfaces */
+
+   nv50_bufctx_fence(nv50->bufctx_cp, false);
+
+   nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_cp);
+   if (unlikely(nouveau_pushbuf_validate(nv50->base.pushbuf)))
+      return false;
+   if (unlikely(nv50->state.flushed))
+      nv50_bufctx_fence(nv50->bufctx_cp, true);
+
+   return true;
+}
+
+static void
+nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
+{
+   struct nv50_screen *screen = nv50->screen;
+   struct nouveau_pushbuf *push = screen->base.pushbuf;
+   unsigned size = align(nv50->compprog->parm_size, 0x4);
+
+   BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
+   PUSH_DATA (push, (size / 4) << 8);
+
+   if (size) {
+      struct nouveau_mm_allocation *mm;
+      struct nouveau_bo *bo = NULL;
+      unsigned offset;
+
+      mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset);
+      assert(mm);
+
+      nouveau_bo_map(bo, 0, screen->base.client);
+      memcpy(bo->map + offset, input, size);
+
+      nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+      nouveau_pushbuf_bufctx(push, nv50->bufctx);
+      nouveau_pushbuf_validate(push);
+
+      BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4);
+      nouveau_pushbuf_data(push, bo, offset, size);
+
+      nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
+      nouveau_bo_ref(NULL, &bo);
+      nouveau_bufctx_reset(nv50->bufctx, 0);
+   }
+}
+
+static uint32_t
+nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label)
+{
+   struct nv50_program *prog = nv50->compprog;
+   const struct nv50_ir_prog_symbol *syms =
+      (const struct nv50_ir_prog_symbol *)prog->cp.syms;
+   unsigned i;
+
+   for (i = 0; i < prog->cp.num_syms; ++i) {
+      if (syms[i].label == label)
+         return prog->code_base + syms[i].offset;
+   }
+   return prog->code_base; /* no symbols or symbol not found */
+}
+
+void
+nv50_launch_grid(struct pipe_context *pipe,
+                 const uint *block_layout, const uint *grid_layout,
+                 uint32_t label, const void *input)
+{
+   struct nv50_context *nv50 = nv50_context(pipe);
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   unsigned block_size = block_layout[0] * block_layout[1] * block_layout[2];
+   struct nv50_program *cp = nv50->compprog;
+   bool ret;
+
+   ret = !nv50_compute_state_validate(nv50);
+   if (ret) {
+      NOUVEAU_ERR("Failed to launch grid !\n");
+      return;
+   }
+
+   nv50_compute_upload_input(nv50, input);
+
+   BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1);
+   PUSH_DATA (push, nv50_compute_find_symbol(nv50, label));
+
+   BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1);
+   PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
+   BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1);
+   PUSH_DATA (push, cp->max_gpr);
+
+   /* grid/block setup */
+   BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2);
+   PUSH_DATA (push, block_layout[1] << 16 | block_layout[0]);
+   PUSH_DATA (push, block_layout[2]);
+   BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1);
+   PUSH_DATA (push, 1 << 16 | block_size);
+   BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1);
+   PUSH_DATA (push, 1);
+   BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1);
+   PUSH_DATA (push, grid_layout[1] << 16 | grid_layout[0]);
+   BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1);
+   PUSH_DATA (push, 1);
+
+   /* kernel launching */
+   BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1);
+   PUSH_DATA (push, 0);
+   BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+   PUSH_DATA (push, 0);
+
+   /* bind a compute shader clobbers fragment shader state */
+   nv50->dirty |= NV50_NEW_FRAGPROG;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h
new file mode 100644
index 00000000000..268d11253b6
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h
@@ -0,0 +1,444 @@
+#ifndef NV50_COMPUTE_XML
+#define NV50_COMPUTE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/envytools/envytools/
+git clone https://github.com/envytools/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- rnndb/graph/g80_compute.xml (  14027 bytes, from 2015-02-14 02:01:36)
+- rnndb/copyright.xml         (   6456 bytes, from 2015-02-14 02:01:36)
+- rnndb/nvchipsets.xml        (   2833 bytes, from 2015-04-28 16:28:33)
+- rnndb/fifo/nv_object.xml    (  15390 bytes, from 2015-04-22 20:36:09)
+- rnndb/g80_defs.xml          (  18210 bytes, from 2015-10-19 20:49:59)
+
+Copyright (C) 2006-2015 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NV50_COMPUTE_DMA_NOTIFY					0x00000180
+
+#define NV50_COMPUTE_DMA_GLOBAL					0x000001a0
+
+#define NV50_COMPUTE_DMA_QUERY					0x000001a4
+
+#define NV50_COMPUTE_DMA_LOCAL					0x000001b8
+
+#define NV50_COMPUTE_DMA_STACK					0x000001bc
+
+#define NV50_COMPUTE_DMA_CODE_CB					0x000001c0
+
+#define NV50_COMPUTE_DMA_TSC					0x000001c4
+
+#define NV50_COMPUTE_DMA_TIC					0x000001c8
+
+#define NV50_COMPUTE_DMA_TEXTURE					0x000001cc
+
+#define NV50_COMPUTE_UNK0200					0x00000200
+#define NV50_COMPUTE_UNK0200_UNK1__MASK				0x0000ffff
+#define NV50_COMPUTE_UNK0200_UNK1__SHIFT				0
+#define NV50_COMPUTE_UNK0200_UNK2__MASK				0x00ff0000
+#define NV50_COMPUTE_UNK0200_UNK2__SHIFT				16
+
+#define NV50_COMPUTE_UNK0204					0x00000204
+
+#define NV50_COMPUTE_UNK0208					0x00000208
+
+#define NV50_COMPUTE_UNK020C					0x0000020c
+
+#define NV50_COMPUTE_CP_ADDRESS_HIGH				0x00000210
+
+#define NV50_COMPUTE_CP_ADDRESS_LOW				0x00000214
+
+#define NV50_COMPUTE_STACK_ADDRESS_HIGH				0x00000218
+
+#define NV50_COMPUTE_STACK_ADDRESS_LOW				0x0000021c
+
+#define NV50_COMPUTE_STACK_SIZE_LOG				0x00000220
+
+#define NV50_COMPUTE_CALL_LIMIT_LOG				0x00000224
+
+#define NV50_COMPUTE_UNK0228					0x00000228
+#define NV50_COMPUTE_UNK0228_UNK0				0x00000001
+#define NV50_COMPUTE_UNK0228_UNK4__MASK				0x00000ff0
+#define NV50_COMPUTE_UNK0228_UNK4__SHIFT				4
+#define NV50_COMPUTE_UNK0228_UNK12__MASK				0x000ff000
+#define NV50_COMPUTE_UNK0228_UNK12__SHIFT			12
+
+#define NV50_COMPUTE_TSC_ADDRESS_HIGH				0x0000022c
+
+#define NV50_COMPUTE_TSC_ADDRESS_LOW				0x00000230
+#define NV50_COMPUTE_TSC_ADDRESS_LOW__ALIGN			0x00000020
+
+#define NV50_COMPUTE_TSC_LIMIT					0x00000234
+#define NV50_COMPUTE_TSC_LIMIT__MAX				0x00001fff
+
+#define NV50_COMPUTE_CB_ADDR					0x00000238
+#define NV50_COMPUTE_CB_ADDR_ID__MASK				0x003fff00
+#define NV50_COMPUTE_CB_ADDR_ID__SHIFT				8
+#define NV50_COMPUTE_CB_ADDR_BUFFER__MASK			0x0000007f
+#define NV50_COMPUTE_CB_ADDR_BUFFER__SHIFT			0
+
+#define NV50_COMPUTE_CB_DATA(i0)				       (0x0000023c + 0x4*(i0))
+#define NV50_COMPUTE_CB_DATA__ESIZE				0x00000004
+#define NV50_COMPUTE_CB_DATA__LEN				0x00000010
+
+#define NV50_COMPUTE_TSC_FLUSH					0x0000027c
+#define NV50_COMPUTE_TSC_FLUSH_SPECIFIC				0x00000001
+#define NV50_COMPUTE_TSC_FLUSH_ENTRY__MASK			0x03fffff0
+#define NV50_COMPUTE_TSC_FLUSH_ENTRY__SHIFT			4
+
+#define NV50_COMPUTE_TIC_FLUSH					0x00000280
+#define NV50_COMPUTE_TIC_FLUSH_SPECIFIC				0x00000001
+#define NV50_COMPUTE_TIC_FLUSH_ENTRY__MASK			0x03fffff0
+#define NV50_COMPUTE_TIC_FLUSH_ENTRY__SHIFT			4
+
+#define NV50_COMPUTE_DELAY1					0x00000284
+
+#define NV50_COMPUTE_WATCHDOG_TIMER				0x00000288
+
+#define NV50_COMPUTE_DELAY2					0x0000028c
+
+#define NV50_COMPUTE_UNK0290					0x00000290
+
+#define NV50_COMPUTE_LOCAL_ADDRESS_HIGH				0x00000294
+
+#define NV50_COMPUTE_LOCAL_ADDRESS_LOW				0x00000298
+#define NV50_COMPUTE_LOCAL_ADDRESS_LOW__ALIGN			0x00000100
+
+#define NV50_COMPUTE_LOCAL_SIZE_LOG				0x0000029c
+
+#define NV50_COMPUTE_UNK02A0					0x000002a0
+
+#define NV50_COMPUTE_CB_DEF_ADDRESS_HIGH				0x000002a4
+
+#define NV50_COMPUTE_CB_DEF_ADDRESS_LOW				0x000002a8
+
+#define NV50_COMPUTE_CB_DEF_SET					0x000002ac
+#define NV50_COMPUTE_CB_DEF_SET_SIZE__MASK			0x0000ffff
+#define NV50_COMPUTE_CB_DEF_SET_SIZE__SHIFT			0
+#define NV50_COMPUTE_CB_DEF_SET_BUFFER__MASK			0x007f0000
+#define NV50_COMPUTE_CB_DEF_SET_BUFFER__SHIFT			16
+
+#define NV50_COMPUTE_UNK02B0					0x000002b0
+
+#define NV50_COMPUTE_BLOCK_ALLOC					0x000002b4
+#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__MASK			0x0000ffff
+#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__SHIFT			0
+#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__MASK			0x00ff0000
+#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__SHIFT			16
+
+#define NV50_COMPUTE_LANES32_ENABLE				0x000002b8
+
+#define NV50_COMPUTE_UNK02BC					0x000002bc
+#define NV50_COMPUTE_UNK02BC_UNK1__MASK				0x00000007
+#define NV50_COMPUTE_UNK02BC_UNK1__SHIFT				0
+#define NV50_COMPUTE_UNK02BC_UNK2__MASK				0x00000070
+#define NV50_COMPUTE_UNK02BC_UNK2__SHIFT				4
+
+#define NV50_COMPUTE_CP_REG_ALLOC_TEMP				0x000002c0
+
+#define NV50_COMPUTE_TIC_ADDRESS_HIGH				0x000002c4
+
+#define NV50_COMPUTE_TIC_ADDRESS_LOW				0x000002c8
+
+#define NV50_COMPUTE_TIC_LIMIT					0x000002cc
+
+#define NV50_COMPUTE_MP_PM_SET(i0)			       (0x000002d0 + 0x4*(i0))
+#define NV50_COMPUTE_MP_PM_SET__ESIZE				0x00000004
+#define NV50_COMPUTE_MP_PM_SET__LEN				0x00000004
+
+#define NV50_COMPUTE_MP_PM_CONTROL(i0)			       (0x000002e0 + 0x4*(i0))
+#define NV50_COMPUTE_MP_PM_CONTROL__ESIZE			0x00000004
+#define NV50_COMPUTE_MP_PM_CONTROL__LEN				0x00000004
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE__MASK			0x00000001
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE__SHIFT			0
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP			0x00000000
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP_PULSE		0x00000001
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__MASK			0x00000070
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__SHIFT			4
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK0			0x00000000
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK1			0x00000010
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK2			0x00000020
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK3			0x00000030
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK4			0x00000040
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK5			0x00000050
+#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__MASK			0x00ffff00
+#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__SHIFT			8
+#define NV50_COMPUTE_MP_PM_CONTROL_SIG__MASK			0xff000000
+#define NV50_COMPUTE_MP_PM_CONTROL_SIG__SHIFT			24
+
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE			0x000002f0
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_0		0x00000001
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_1		0x00000002
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_2		0x00000004
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_3		0x00000008
+
+#define NV50_COMPUTE_UNK02F4					0x000002f4
+
+#define NV50_COMPUTE_BLOCKDIM_LATCH				0x000002f8
+
+#define NV50_COMPUTE_LOCAL_WARPS_LOG_ALLOC			0x000002fc
+
+#define NV50_COMPUTE_LOCAL_WARPS_NO_CLAMP			0x00000300
+
+#define NV50_COMPUTE_STACK_WARPS_LOG_ALLOC			0x00000304
+
+#define NV50_COMPUTE_STACK_WARPS_NO_CLAMP			0x00000308
+
+#define NV50_COMPUTE_UNK030C					0x0000030c
+
+#define NV50_COMPUTE_QUERY_ADDRESS_HIGH				0x00000310
+
+#define NV50_COMPUTE_QUERY_ADDRESS_LOW				0x00000314
+
+#define NV50_COMPUTE_QUERY_SEQUENCE				0x00000318
+
+#define NV50_COMPUTE_QUERY_GET					0x0000031c
+#define NV50_COMPUTE_QUERY_GET_INTR				0x00000200
+#define NV50_COMPUTE_QUERY_GET_SHORT				0x00008000
+
+#define NV50_COMPUTE_COND_ADDRESS_HIGH				0x00000320
+
+#define NV50_COMPUTE_COND_ADDRESS_LOW				0x00000324
+
+#define NV50_COMPUTE_COND_MODE					0x00000328
+#define NV50_COMPUTE_COND_MODE_NEVER				0x00000000
+#define NV50_COMPUTE_COND_MODE_ALWAYS				0x00000001
+#define NV50_COMPUTE_COND_MODE_RES_NON_ZERO			0x00000002
+#define NV50_COMPUTE_COND_MODE_EQUAL				0x00000003
+#define NV50_COMPUTE_COND_MODE_NOT_EQUAL				0x00000004
+
+#define NV50_COMPUTE_UNK032C					0x0000032c
+
+#define NV50_COMPUTE_UNK0330					0x00000330
+
+#define NV50_COMPUTE_UNK0334(i0)				       (0x00000334 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0334__ESIZE				0x00000004
+#define NV50_COMPUTE_UNK0334__LEN				0x00000003
+
+#define NV50_COMPUTE_UNK0340(i0)				       (0x00000340 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0340__ESIZE				0x00000004
+#define NV50_COMPUTE_UNK0340__LEN				0x00000002
+
+#define NV50_COMPUTE_UNK0348(i0)				       (0x00000348 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0348__ESIZE				0x00000004
+#define NV50_COMPUTE_UNK0348__LEN				0x00000002
+
+#define NV50_COMPUTE_UNK0350(i0)				       (0x00000350 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0350__ESIZE				0x00000004
+#define NV50_COMPUTE_UNK0350__LEN				0x00000002
+
+#define NV50_COMPUTE_UNK0358					0x00000358
+
+#define NV50_COMPUTE_UNK035C					0x0000035c
+
+#define NV50_COMPUTE_UNK0360					0x00000360
+#define NV50_COMPUTE_UNK0360_UNK0__MASK				0x000000f0
+#define NV50_COMPUTE_UNK0360_UNK0__SHIFT				4
+#define NV50_COMPUTE_UNK0360_UNK1__MASK				0x00000f00
+#define NV50_COMPUTE_UNK0360_UNK1__SHIFT				8
+
+#define NV50_COMPUTE_UNK0364					0x00000364
+
+#define NV50_COMPUTE_LAUNCH					0x00000368
+
+#define NV50_COMPUTE_UNK036C					0x0000036c
+
+#define NV50_COMPUTE_UNK0370					0x00000370
+
+#define NV50_COMPUTE_USER_PARAM_COUNT				0x00000374
+#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__MASK			0x000000ff
+#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__SHIFT		0
+#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MASK		0x0000ff00
+#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__SHIFT		8
+#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MAX			0x00000040
+
+#define NV50_COMPUTE_LINKED_TSC					0x00000378
+
+#define NV50_COMPUTE_UNK037C					0x0000037c
+#define NV50_COMPUTE_UNK037C_ALWAYS_DERIV			0x00000001
+#define NV50_COMPUTE_UNK037C_UNK16				0x00010000
+
+#define NV50_COMPUTE_CODE_CB_FLUSH				0x00000380
+
+#define NV50_COMPUTE_UNK0384					0x00000384
+
+#define NV50_COMPUTE_GRIDID					0x00000388
+
+#define NV50_COMPUTE_UNK038C(i0)				       (0x0000038c + 0x4*(i0))
+#define NV50_COMPUTE_UNK038C__ESIZE				0x00000004
+#define NV50_COMPUTE_UNK038C__LEN				0x00000003
+
+#define NV50_COMPUTE_WRCACHE_FLUSH				0x00000398
+
+#define NV50_COMPUTE_UNK039C(i0)				       (0x0000039c + 0x4*(i0))
+#define NV50_COMPUTE_UNK039C__ESIZE				0x00000004
+#define NV50_COMPUTE_UNK039C__LEN				0x00000002
+
+#define NV50_COMPUTE_GRIDDIM					0x000003a4
+#define NV50_COMPUTE_GRIDDIM_X__MASK				0x0000ffff
+#define NV50_COMPUTE_GRIDDIM_X__SHIFT				0
+#define NV50_COMPUTE_GRIDDIM_Y__MASK				0xffff0000
+#define NV50_COMPUTE_GRIDDIM_Y__SHIFT				16
+
+#define NV50_COMPUTE_SHARED_SIZE					0x000003a8
+#define NV50_COMPUTE_SHARED_SIZE__MAX				0x00004000
+#define NV50_COMPUTE_SHARED_SIZE__ALIGN				0x00000040
+
+#define NV50_COMPUTE_BLOCKDIM_XY					0x000003ac
+#define NV50_COMPUTE_BLOCKDIM_XY_X__MASK				0x0000ffff
+#define NV50_COMPUTE_BLOCKDIM_XY_X__SHIFT			0
+#define NV50_COMPUTE_BLOCKDIM_XY_Y__MASK				0xffff0000
+#define NV50_COMPUTE_BLOCKDIM_XY_Y__SHIFT			16
+
+#define NV50_COMPUTE_BLOCKDIM_Z					0x000003b0
+#define NV50_COMPUTE_BLOCKDIM_Z__MIN				0x00000001
+#define NV50_COMPUTE_BLOCKDIM_Z__MAX				0x00000040
+
+#define NV50_COMPUTE_CP_START_ID					0x000003b4
+
+#define NV50_COMPUTE_REG_MODE					0x000003b8
+#define NV50_COMPUTE_REG_MODE_PACKED				0x00000001
+#define NV50_COMPUTE_REG_MODE_STRIPED				0x00000002
+
+#define NV50_COMPUTE_TEX_LIMITS					0x000003bc
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MASK		0x0000000f
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__SHIFT		0
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MIN		0x00000000
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MAX		0x00000004
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MASK		0x000000f0
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__SHIFT		4
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MIN		0x00000000
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MAX		0x00000007
+
+#define NV50_COMPUTE_BIND_TSC					0x000003c0
+#define NV50_COMPUTE_BIND_TSC_VALID				0x00000001
+#define NV50_COMPUTE_BIND_TSC_SAMPLER__MASK			0x000000f0
+#define NV50_COMPUTE_BIND_TSC_SAMPLER__SHIFT			4
+#define NV50_COMPUTE_BIND_TSC_TSC__MASK				0x001ff000
+#define NV50_COMPUTE_BIND_TSC_TSC__SHIFT				12
+
+#define NV50_COMPUTE_BIND_TIC					0x000003c4
+#define NV50_COMPUTE_BIND_TIC_VALID				0x00000001
+#define NV50_COMPUTE_BIND_TIC_TEXTURE__MASK			0x000001fe
+#define NV50_COMPUTE_BIND_TIC_TEXTURE__SHIFT			1
+#define NV50_COMPUTE_BIND_TIC_TIC__MASK				0x7ffffe00
+#define NV50_COMPUTE_BIND_TIC_TIC__SHIFT				9
+
+#define NV50_COMPUTE_SET_PROGRAM_CB				0x000003c8
+#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__MASK			0x00000f00
+#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__SHIFT			8
+#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__MASK			0x0007f000
+#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__SHIFT		12
+#define NV50_COMPUTE_SET_PROGRAM_CB_VALID			0x000000ff
+
+#define NV50_COMPUTE_UNK03CC					0x000003cc
+
+#define NV50_COMPUTE_TEX_CACHE_CTL				0x000003d0
+#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__MASK			0x00000030
+#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__SHIFT			4
+
+#define NV50_COMPUTE_UNK03D4					0x000003d4
+
+#define NV50_COMPUTE_UNK03D8					0x000003d8
+
+#define NV50_COMPUTE_UNK03DC					0x000003dc
+
+#define NV50_COMPUTE_UNK03E0					0x000003e0
+
+#define NV50_COMPUTE_UNK03E4					0x000003e4
+
+#define NVA3_COMPUTE_TEX_MISC					0x000003e8
+#define NVA3_COMPUTE_TEX_MISC_UNK1				0x00000001
+#define NVA3_COMPUTE_TEX_MISC_SEAMLESS_CUBE_MAP		0x00000002
+
+#define NV50_COMPUTE_GLOBAL(i0)				       (0x00000400 + 0x20*(i0))
+#define NV50_COMPUTE_GLOBAL__ESIZE				0x00000020
+#define NV50_COMPUTE_GLOBAL__LEN					0x00000010
+
+#define NV50_COMPUTE_GLOBAL_ADDRESS_HIGH(i0)		       (0x00000400 + 0x20*(i0))
+
+#define NV50_COMPUTE_GLOBAL_ADDRESS_LOW(i0)		       (0x00000404 + 0x20*(i0))
+
+#define NV50_COMPUTE_GLOBAL_PITCH(i0)			       (0x00000408 + 0x20*(i0))
+#define NV50_COMPUTE_GLOBAL_PITCH__MAX				0x00800000
+#define NV50_COMPUTE_GLOBAL_PITCH__ALIGN				0x00000100
+
+#define NV50_COMPUTE_GLOBAL_LIMIT(i0)			       (0x0000040c + 0x20*(i0))
+
+#define NV50_COMPUTE_GLOBAL_MODE(i0)			       (0x00000410 + 0x20*(i0))
+#define NV50_COMPUTE_GLOBAL_MODE_LINEAR				0x00000001
+#define NV50_COMPUTE_GLOBAL_MODE_UNK1__MASK			0x000000f0
+#define NV50_COMPUTE_GLOBAL_MODE_UNK1__SHIFT			4
+#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__MASK			0x00000f00
+#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__SHIFT		8
+
+#define NV50_COMPUTE_USER_PARAM(i0)			       (0x00000600 + 0x4*(i0))
+#define NV50_COMPUTE_USER_PARAM__ESIZE				0x00000004
+#define NV50_COMPUTE_USER_PARAM__LEN				0x00000040
+
+#define NV50_COMPUTE_UNK0700(i0)				       (0x00000700 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0700__ESIZE				0x00000004
+#define NV50_COMPUTE_UNK0700__LEN				0x00000010
+
+
+#endif /* NV50_COMPUTE_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 7867c2df7f3..f645a4d4e6b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -113,6 +113,7 @@ nv50_context_unreference_resources(struct nv50_context *nv50)
 
    nouveau_bufctx_del(&nv50->bufctx_3d);
    nouveau_bufctx_del(&nv50->bufctx);
+   nouveau_bufctx_del(&nv50->bufctx_cp);
 
    util_unreference_framebuffer_state(&nv50->framebuffer);
 
@@ -131,6 +132,14 @@ nv50_context_unreference_resources(struct nv50_context *nv50)
          if (!nv50->constbuf[s][i].user)
             pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL);
    }
+
+   for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
+        ++i) {
+      struct pipe_resource **res = util_dynarray_element(
+         &nv50->global_residents, struct pipe_resource *, i);
+      pipe_resource_reference(res, NULL);
+   }
+   util_dynarray_fini(&nv50->global_residents);
 }
 
 static void
@@ -263,10 +272,13 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
    nv50->base.pushbuf = screen->base.pushbuf;
    nv50->base.client = screen->base.client;
 
-   ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_COUNT,
-                            &nv50->bufctx_3d);
+   ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx);
    if (!ret)
-      ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx);
+      ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_3D_COUNT,
+                               &nv50->bufctx_3d);
+   if (!ret)
+      ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_CP_COUNT,
+                               &nv50->bufctx_cp);
    if (ret)
       goto out_err;
 
@@ -290,6 +302,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
 
    pipe->draw_vbo = nv50_draw_vbo;
    pipe->clear = nv50_clear;
+   pipe->launch_grid = nv50_launch_grid;
 
    pipe->flush = nv50_flush;
    pipe->texture_barrier = nv50_texture_barrier;
@@ -335,19 +348,30 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
    BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms);
    BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc);
    BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo);
+   if (screen->compute) {
+      BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code);
+      BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc);
+      BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->stack_bo);
+   }
 
    flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
 
    BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo);
    BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo);
+   if (screen->compute)
+      BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
 
    nv50->base.scratch.bo_size = 2 << 20;
 
+   util_dynarray_init(&nv50->global_residents);
+
    return pipe;
 
 out_err:
    if (nv50->bufctx_3d)
       nouveau_bufctx_del(&nv50->bufctx_3d);
+   if (nv50->bufctx_cp)
+      nouveau_bufctx_del(&nv50->bufctx_cp);
    if (nv50->bufctx)
       nouveau_bufctx_del(&nv50->bufctx);
    FREE(nv50->blit);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index fb74a9748a3..fbafe029948 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -49,6 +49,10 @@
 #define NV50_NEW_MIN_SAMPLES  (1 << 22)
 #define NV50_NEW_CONTEXT      (1 << 31)
 
+#define NV50_NEW_CP_PROGRAM   (1 << 0)
+#define NV50_NEW_CP_GLOBALS   (1 << 1)
+
+/* 3d bufctx (during draw_vbo, blit_3d) */
 #define NV50_BIND_FB          0
 #define NV50_BIND_VERTEX      1
 #define NV50_BIND_VERTEX_TMP  2
@@ -58,7 +62,14 @@
 #define NV50_BIND_SO         53
 #define NV50_BIND_SCREEN     54
 #define NV50_BIND_TLS        55
-#define NV50_BIND_COUNT      56
+#define NV50_BIND_3D_COUNT   56
+
+/* compute bufctx (during launch_grid) */
+#define NV50_BIND_CP_GLOBAL   0
+#define NV50_BIND_CP_SCREEN   1
+#define NV50_BIND_CP_COUNT    2
+
+/* bufctx for other operations */
 #define NV50_BIND_2D          0
 #define NV50_BIND_M2MF        0
 #define NV50_BIND_FENCE       1
@@ -101,8 +112,10 @@ struct nv50_context {
 
    struct nouveau_bufctx *bufctx_3d;
    struct nouveau_bufctx *bufctx;
+   struct nouveau_bufctx *bufctx_cp;
 
    uint32_t dirty;
+   uint32_t dirty_cp; /* dirty flags for compute state */
    bool cb_dirty;
 
    struct nv50_graph_state state;
@@ -115,6 +128,7 @@ struct nv50_context {
    struct nv50_program *vertprog;
    struct nv50_program *gmtyprog;
    struct nv50_program *fragprog;
+   struct nv50_program *compprog;
 
    struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS];
    uint16_t constbuf_dirty[3];
@@ -163,6 +177,8 @@ struct nv50_context {
    uint32_t cond_condmode; /* the calculated condition */
 
    struct nv50_blitctx *blit;
+
+   struct util_dynarray global_residents;
 };
 
 static inline struct nv50_context *
@@ -302,4 +318,9 @@ struct pipe_video_buffer *
 nv98_video_buffer_create(struct pipe_context *pipe,
                          const struct pipe_video_buffer *template);
 
+/* nv50_compute.c */
+void
+nv50_launch_grid(struct pipe_context *, const uint *, const uint *,
+                 uint32_t, const void *);
+
 #endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 707bf7a8ae3..48057d20f4e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -259,6 +259,8 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
       return nv50_vertprog_assign_slots(info);
    case PIPE_SHADER_FRAGMENT:
       return nv50_fragprog_assign_slots(info);
+   case PIPE_SHADER_COMPUTE:
+      return 0;
    default:
       return -1;
    }
@@ -355,6 +357,9 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
    prog->gp.has_layer = 0;
    prog->gp.has_viewport = 0;
 
+   if (prog->type == PIPE_SHADER_COMPUTE)
+      info->prop.cp.inputOffset = 0x10;
+
    info->driverPriv = prog;
 
 #ifdef DEBUG
@@ -401,6 +406,10 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
          break;
       }
       prog->gp.vert_count = info->prop.gp.maxVertices;
+   } else
+   if (prog->type == PIPE_SHADER_COMPUTE) {
+      prog->cp.syms = info->bin.syms;
+      prog->cp.num_syms = info->bin.numSyms;
    }
 
    if (prog->pipe.stream_output.num_outputs)
@@ -423,11 +432,13 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
    struct nouveau_heap *heap;
    int ret;
    uint32_t size = align(prog->code_size, 0x40);
+   uint8_t prog_type;
 
    switch (prog->type) {
    case PIPE_SHADER_VERTEX:   heap = nv50->screen->vp_code_heap; break;
    case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break;
    case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break;
+   case PIPE_SHADER_COMPUTE:  heap = nv50->screen->fp_code_heap; break;
    default:
       assert(!"invalid program type");
       return false;
@@ -450,7 +461,14 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
          return false;
       }
    }
-   prog->code_base = prog->mem->start;
+
+   if (prog->type == PIPE_SHADER_COMPUTE) {
+      /* CP code must be uploaded in FP code segment. */
+      prog_type = 1;
+   } else {
+      prog->code_base = prog->mem->start;
+      prog_type = prog->type;
+   }
 
    ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
    if (ret < 0) {
@@ -468,7 +486,7 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
                             false /* flatshade */);
 
    nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
-                       (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
+                       (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
                        NOUVEAU_BO_VRAM, prog->code_size, prog->code);
 
    BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index 7a33eb11d6d..f0016707163 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -98,6 +98,13 @@ struct nv50_program {
       ubyte viewportid; /* hw value of viewport index output */
    } gp;
 
+   struct {
+      uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */
+      uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */
+      void *syms;
+      unsigned num_syms;
+   } cp;
+
    void *fixups; /* relocation records */
    void *interps; /* interpolation records */
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index f47e998ab1e..0142e86ba20 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -41,8 +41,6 @@
 
 #define THREADS_IN_WARP 32
 
-#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
-
 static boolean
 nv50_screen_is_format_supported(struct pipe_screen *pscreen,
                                 enum pipe_format format,
@@ -183,6 +181,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
    case PIPE_CAP_SHAREABLE_SHADERS:
    case PIPE_CAP_CLEAR_TEXTURE:
+   case PIPE_CAP_COMPUTE:
       return 1;
    case PIPE_CAP_SEAMLESS_CUBE_MAP:
       return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -212,7 +211,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_FAKE_SW_MSAA:
    case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
    case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
-   case PIPE_CAP_COMPUTE:
    case PIPE_CAP_DRAW_INDIRECT:
    case PIPE_CAP_VERTEXID_NOBASE:
    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */
@@ -251,6 +249,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
    case PIPE_SHADER_VERTEX:
    case PIPE_SHADER_GEOMETRY:
    case PIPE_SHADER_FRAGMENT:
+   case PIPE_SHADER_COMPUTE:
       break;
    default:
       return 0;
@@ -336,6 +335,52 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
    return 0.0f;
 }
 
+static int
+nv50_screen_get_compute_param(struct pipe_screen *pscreen,
+                              enum pipe_compute_cap param, void *data)
+{
+   struct nv50_screen *screen = nv50_screen(pscreen);
+
+#define RET(x) do {                  \
+   if (data)                         \
+      memcpy(data, x, sizeof(x));    \
+   return sizeof(x);                 \
+} while (0)
+
+   switch (param) {
+   case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+      RET((uint64_t []) { 2 });
+   case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+      RET(((uint64_t []) { 65535, 65535 }));
+   case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+      RET(((uint64_t []) { 512, 512, 64 }));
+   case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+      RET((uint64_t []) { 512 });
+   case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */
+      RET((uint64_t []) { 1ULL << 32 });
+   case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
+      RET((uint64_t []) { 16 << 10 });
+   case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
+      RET((uint64_t []) { 16 << 10 });
+   case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
+      RET((uint64_t []) { 4096 });
+   case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+      RET((uint32_t []) { 32 });
+   case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+      RET((uint64_t []) { 1ULL << 40 });
+   case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+      RET((uint32_t []) { 0 });
+   case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+      RET((uint32_t []) { screen->mp_count });
+   case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+      RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
+   default:
+      return 0;
+   }
+
+#undef RET
+}
+
 static void
 nv50_screen_destroy(struct pipe_screen *pscreen)
 {
@@ -377,6 +422,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
    nouveau_object_del(&screen->tesla);
    nouveau_object_del(&screen->eng2d);
    nouveau_object_del(&screen->m2mf);
+   nouveau_object_del(&screen->compute);
    nouveau_object_del(&screen->sync);
 
    nouveau_screen_fini(&screen->base);
@@ -742,6 +788,7 @@ nv50_screen_create(struct nouveau_device *dev)
    pscreen->get_param = nv50_screen_get_param;
    pscreen->get_shader_param = nv50_screen_get_shader_param;
    pscreen->get_paramf = nv50_screen_get_paramf;
+   pscreen->get_compute_param = nv50_screen_get_compute_param;
 
    nv50_screen_init_resource_functions(pscreen);
 
@@ -851,6 +898,8 @@ nv50_screen_create(struct nouveau_device *dev)
    screen->TPs = util_bitcount(value & 0xffff);
    screen->MPsInTP = util_bitcount((value >> 24) & 0xf);
 
+   screen->mp_count = screen->TPs * screen->MPsInTP;
+
    stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
          STACK_WARPS_ALLOC * 64 * 8;
 
@@ -902,6 +951,12 @@ nv50_screen_create(struct nouveau_device *dev)
 
    nv50_screen_init_hwctx(screen);
 
+   ret = nv50_screen_compute_setup(screen, screen->base.pushbuf);
+   if (ret) {
+      NOUVEAU_ERR("Failed to init compute context: %d\n", ret);
+      goto fail;
+   }
+
    nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
 
    return pscreen;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index ce51f0fc254..153ceea7a4f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -23,6 +23,10 @@ struct nv50_context;
 
 #define NV50_MAX_VIEWPORTS 16
 
+#define NV50_MAX_GLOBALS 16
+
+#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
+
 struct nv50_blitter;
 
 struct nv50_graph_state {
@@ -66,6 +70,7 @@ struct nv50_screen {
    unsigned MPsInTP;
    unsigned max_tls_space;
    unsigned cur_tls_space;
+   unsigned mp_count;
 
    struct nouveau_heap *vp_code_heap;
    struct nouveau_heap *gp_code_heap;
@@ -93,6 +98,7 @@ struct nv50_screen {
    struct nouveau_object *sync;
 
    struct nouveau_object *tesla;
+   struct nouveau_object *compute;
    struct nouveau_object *eng2d;
    struct nouveau_object *m2mf;
 };
@@ -109,6 +115,8 @@ void nv50_blitter_destroy(struct nv50_screen *);
 int nv50_screen_tic_alloc(struct nv50_screen *, void *);
 int nv50_screen_tsc_alloc(struct nv50_screen *, void *);
 
+int nv50_screen_compute_setup(struct nv50_screen *, struct nouveau_pushbuf *);
+
 static inline void
 nv50_resource_fence(struct nv04_resource *res, uint32_t flags)
 {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index d27f12ca94b..b4ea08d4d13 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -792,6 +792,35 @@ nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso)
     nv50->dirty |= NV50_NEW_GMTYPROG;
 }
 
+static void *
+nv50_cp_state_create(struct pipe_context *pipe,
+                     const struct pipe_compute_state *cso)
+{
+   struct nv50_program *prog;
+
+   prog = CALLOC_STRUCT(nv50_program);
+   if (!prog)
+      return NULL;
+   prog->type = PIPE_SHADER_COMPUTE;
+
+   prog->cp.smem_size = cso->req_local_mem;
+   prog->cp.lmem_size = cso->req_private_mem;
+   prog->parm_size = cso->req_input_mem;
+
+   prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog);
+
+   return (void *)prog;
+}
+
+static void
+nv50_cp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+   struct nv50_context *nv50 = nv50_context(pipe);
+
+   nv50->compprog = hwcso;
+   nv50->dirty_cp |= NV50_NEW_CP_PROGRAM;
+}
+
 static void
 nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
                          struct pipe_constant_buffer *cb)
@@ -1134,6 +1163,70 @@ nv50_set_stream_output_targets(struct pipe_context *pipe,
       nv50->dirty |= NV50_NEW_STRMOUT;
 }
 
+static void
+nv50_set_compute_resources(struct pipe_context *pipe,
+                           unsigned start, unsigned nr,
+                           struct pipe_surface **resources)
+{
+   /* TODO: bind surfaces */
+}
+
+static inline void
+nv50_set_global_handle(uint32_t *phandle, struct pipe_resource *res)
+{
+   struct nv04_resource *buf = nv04_resource(res);
+   if (buf) {
+      uint64_t limit = (buf->address + buf->base.width0) - 1;
+      if (limit < (1ULL << 32)) {
+         *phandle = (uint32_t)buf->address;
+      } else {
+         NOUVEAU_ERR("Cannot map into TGSI_RESOURCE_GLOBAL: "
+                     "resource not contained within 32-bit address space !\n");
+         *phandle = 0;
+      }
+   } else {
+      *phandle = 0;
+   }
+}
+
+static void
+nv50_set_global_bindings(struct pipe_context *pipe,
+                         unsigned start, unsigned nr,
+                         struct pipe_resource **resources,
+                         uint32_t **handles)
+{
+   struct nv50_context *nv50 = nv50_context(pipe);
+   struct pipe_resource **ptr;
+   unsigned i;
+   const unsigned end = start + nr;
+
+   if (nv50->global_residents.size <= (end * sizeof(struct pipe_resource *))) {
+      const unsigned old_size = nv50->global_residents.size;
+      const unsigned req_size = end * sizeof(struct pipe_resource *);
+      util_dynarray_resize(&nv50->global_residents, req_size);
+      memset((uint8_t *)nv50->global_residents.data + old_size, 0,
+             req_size - old_size);
+   }
+
+   if (resources) {
+      ptr = util_dynarray_element(
+         &nv50->global_residents, struct pipe_resource *, start);
+      for (i = 0; i < nr; ++i) {
+         pipe_resource_reference(&ptr[i], resources[i]);
+         nv50_set_global_handle(handles[i], resources[i]);
+      }
+   } else {
+      ptr = util_dynarray_element(
+         &nv50->global_residents, struct pipe_resource *, start);
+      for (i = 0; i < nr; ++i)
+         pipe_resource_reference(&ptr[i], NULL);
+   }
+
+   nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL);
+
+   nv50->dirty_cp = NV50_NEW_CP_GLOBALS;
+}
+
 void
 nv50_init_state_functions(struct nv50_context *nv50)
 {
@@ -1162,12 +1255,15 @@ nv50_init_state_functions(struct nv50_context *nv50)
    pipe->create_vs_state = nv50_vp_state_create;
    pipe->create_fs_state = nv50_fp_state_create;
    pipe->create_gs_state = nv50_gp_state_create;
+   pipe->create_compute_state = nv50_cp_state_create;
    pipe->bind_vs_state = nv50_vp_state_bind;
    pipe->bind_fs_state = nv50_fp_state_bind;
    pipe->bind_gs_state = nv50_gp_state_bind;
+   pipe->bind_compute_state = nv50_cp_state_bind;
    pipe->delete_vs_state = nv50_sp_state_delete;
    pipe->delete_fs_state = nv50_sp_state_delete;
    pipe->delete_gs_state = nv50_sp_state_delete;
+   pipe->delete_compute_state = nv50_sp_state_delete;
 
    pipe->set_blend_color = nv50_set_blend_color;
    pipe->set_stencil_ref = nv50_set_stencil_ref;
@@ -1191,6 +1287,9 @@ nv50_init_state_functions(struct nv50_context *nv50)
    pipe->stream_output_target_destroy = nv50_so_target_destroy;
    pipe->set_stream_output_targets = nv50_set_stream_output_targets;
 
+   pipe->set_global_binding = nv50_set_global_bindings;
+   pipe->set_compute_resources = nv50_set_compute_resources;
+
    nv50->sample_mask = ~0;
    nv50->min_samples = 1;
 }

From 6a9c151dbb87a10b6d51c451a5a277d646d08857 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Tue, 10 Nov 2015 01:27:15 +0100
Subject: [PATCH 004/335] nv50: add compute-related MP perf counters on G84+

These compute-related MP performance counters have been reverse
engineered using CUPTI which is part of NVIDIA CUDA.

As for nvc0, we use a compute kernel to read out those performance
counters, and the command stream to configure them. Note that Tesla
only exposes 4 MP performance counters, while Fermi has 8.

Only G84+ is supported because G80 is an old and weird card.

Tested on G84, G96, G200, MCP79 and GT218 with glxgears, glxspheres64,
xonotic-glx, heaven and valley.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tested-by: Pierre Moreau <pierre.morrow@free.fr>
Acked-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/Makefile.sources  |   2 +
 .../drivers/nouveau/nv50/nv50_context.h       |   3 +-
 src/gallium/drivers/nouveau/nv50/nv50_query.c |  23 +
 .../drivers/nouveau/nv50/nv50_query_hw.c      |  34 +-
 .../drivers/nouveau/nv50/nv50_query_hw.h      |  16 +
 .../drivers/nouveau/nv50/nv50_query_hw_sm.c   | 417 ++++++++++++++++++
 .../drivers/nouveau/nv50/nv50_query_hw_sm.h   |  45 ++
 .../drivers/nouveau/nv50/nv50_screen.c        |   1 +
 .../drivers/nouveau/nv50/nv50_screen.h        |   9 +
 9 files changed, 548 insertions(+), 2 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
 create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index c2ff8e9b46e..a1aa13587a1 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -77,6 +77,8 @@ NV50_C_SOURCES := \
 	nv50/nv50_query.h \
 	nv50/nv50_query_hw.c \
 	nv50/nv50_query_hw.h \
+	nv50/nv50_query_hw_sm.c \
+	nv50/nv50_query_hw_sm.h \
 	nv50/nv50_resource.c \
 	nv50/nv50_resource.h \
 	nv50/nv50_screen.c \
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index fbafe029948..2cebcd99423 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -67,7 +67,8 @@
 /* compute bufctx (during launch_grid) */
 #define NV50_BIND_CP_GLOBAL   0
 #define NV50_BIND_CP_SCREEN   1
-#define NV50_BIND_CP_COUNT    2
+#define NV50_BIND_CP_QUERY    2
+#define NV50_BIND_CP_COUNT    3
 
 /* bufctx for other operations */
 #define NV50_BIND_2D          0
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index dd9b85b7208..c31bf728c61 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -153,3 +153,26 @@ nv50_init_query_functions(struct nv50_context *nv50)
    pipe->get_query_result = nv50_get_query_result;
    pipe->render_condition = nv50_render_condition;
 }
+
+int
+nv50_screen_get_driver_query_info(struct pipe_screen *pscreen,
+                                  unsigned id,
+                                  struct pipe_driver_query_info *info)
+{
+   struct nv50_screen *screen = nv50_screen(pscreen);
+   int num_hw_queries = 0;
+
+   num_hw_queries = nv50_hw_get_driver_query_info(screen, 0, NULL);
+
+   if (!info)
+      return num_hw_queries;
+
+   /* Init default values. */
+   info->name = "this_is_not_the_query_you_are_looking_for";
+   info->query_type = 0xdeadd01d;
+   info->max_value.u64 = 0;
+   info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
+   info->group_id = -1;
+
+   return nv50_hw_get_driver_query_info(screen, id, info);
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
index 945ce7abe50..23108acbef5 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -25,6 +25,7 @@
 
 #include "nv50/nv50_context.h"
 #include "nv50/nv50_query_hw.h"
+#include "nv50/nv50_query_hw_sm.h"
 #include "nv_object.xml.h"
 
 #define NV50_HW_QUERY_STATE_READY   0
@@ -41,7 +42,7 @@
 
 #define NV50_HW_QUERY_ALLOC_SPACE 256
 
-static bool
+bool
 nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q,
                        int size)
 {
@@ -122,6 +123,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    struct nv50_hw_query *hq = nv50_hw_query(q);
 
+   if (hq->funcs && hq->funcs->begin_query)
+      return hq->funcs->begin_query(nv50, hq);
+
    /* For occlusion queries we have to change the storage, because a previous
     * query might set the initial render condition to false even *after* we re-
     * initialized it to true.
@@ -193,6 +197,11 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    struct nv50_hw_query *hq = nv50_hw_query(q);
 
+   if (hq->funcs && hq->funcs->end_query) {
+      hq->funcs->end_query(nv50, hq);
+      return;
+   }
+
    hq->state = NV50_HW_QUERY_STATE_ENDED;
 
    switch (q->type) {
@@ -261,6 +270,9 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
    uint64_t *data64 = (uint64_t *)hq->data;
    int i;
 
+   if (hq->funcs && hq->funcs->get_query_result)
+      return hq->funcs->get_query_result(nv50, hq, wait, result);
+
    if (hq->state != NV50_HW_QUERY_STATE_READY)
       nv50_hw_query_update(q);
 
@@ -331,6 +343,12 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
    struct nv50_hw_query *hq;
    struct nv50_query *q;
 
+   hq = nv50_hw_sm_create_query(nv50, type);
+   if (hq) {
+      hq->base.funcs = &hw_query_funcs;
+      return (struct nv50_query *)hq;
+   }
+
    hq = CALLOC_STRUCT(nv50_hw_query);
    if (!hq)
       return NULL;
@@ -375,6 +393,20 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
    return q;
 }
 
+int
+nv50_hw_get_driver_query_info(struct nv50_screen *screen, unsigned id,
+                              struct pipe_driver_query_info *info)
+{
+   int num_hw_sm_queries = 0;
+
+   num_hw_sm_queries = nv50_hw_sm_get_driver_query_info(screen, 0, NULL);
+
+   if (!info)
+      return num_hw_sm_queries;
+
+   return nv50_hw_sm_get_driver_query_info(screen, id, info);
+}
+
 void
 nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
                              struct nv50_query *q, unsigned result_offset)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
index 294c67de9a4..82ec6bd2d96 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
@@ -8,8 +8,19 @@
 
 #define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
 
+struct nv50_hw_query;
+
+struct nv50_hw_query_funcs {
+   void (*destroy_query)(struct nv50_context *, struct nv50_hw_query *);
+   boolean (*begin_query)(struct nv50_context *, struct nv50_hw_query *);
+   void (*end_query)(struct nv50_context *, struct nv50_hw_query *);
+   boolean (*get_query_result)(struct nv50_context *, struct nv50_hw_query *,
+                               boolean, union pipe_query_result *);
+};
+
 struct nv50_hw_query {
    struct nv50_query base;
+   const struct nv50_hw_query_funcs *funcs;
    uint32_t *data;
    uint32_t sequence;
    struct nouveau_bo *bo;
@@ -31,6 +42,11 @@ nv50_hw_query(struct nv50_query *q)
 
 struct nv50_query *
 nv50_hw_create_query(struct nv50_context *, unsigned, unsigned);
+int
+nv50_hw_get_driver_query_info(struct nv50_screen *, unsigned,
+                              struct pipe_driver_query_info *);
+bool
+nv50_hw_query_allocate(struct nv50_context *, struct nv50_query *, int);
 void
 nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t,
                              struct nv50_query *, unsigned);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
new file mode 100644
index 00000000000..e75b428fb12
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
@@ -0,0 +1,417 @@
+/*
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define NV50_PUSH_EXPLICIT_SPACE_CHECKING
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_query_hw_sm.h"
+
+#include "nv_object.xml.h"
+#include "nv50/nv50_compute.xml.h"
+
+/* === PERFORMANCE MONITORING COUNTERS for NV84+ === */
+
+/* NOTE: intentionally using the same names as NV */
+static const char *nv50_hw_sm_query_names[] =
+{
+   "branch",
+   "divergent_branch",
+   "instructions",
+   "prof_trigger_00",
+   "prof_trigger_01",
+   "prof_trigger_02",
+   "prof_trigger_03",
+   "prof_trigger_04",
+   "prof_trigger_05",
+   "prof_trigger_06",
+   "prof_trigger_07",
+   "sm_cta_launched",
+   "warp_serialize",
+};
+
+static const uint64_t nv50_read_hw_sm_counters_code[] =
+{
+   /* and b32 $r0 $r0 0x0000ffff
+    * add b32 $c0 $r0 $r0 $r0
+    * (lg $c0) ret
+    * mov $r0 $pm0
+    * mov $r1 $pm1
+    * mov $r2 $pm2
+    * mov $r3 $pm3
+    * mov $r4 $physid
+    * ld $r5 b32 s[0x10]
+    * ld $r6 b32 s[0x14]
+    * and b32 $r4 $r4 0x000f0000
+    * shr u32 $r4 $r4 0x10
+    * mul $r4 u24 $r4 0x14
+    * add b32 $r5 $r5 $r4
+    * st b32 g15[$r5] $r0
+    * add b32 $r5 $r5 0x04
+    * st b32 g15[$r5] $r1
+    * add b32 $r5 $r5 0x04
+    * st b32 g15[$r5] $r2
+    * add b32 $r5 $r5 0x04
+    * st b32 g15[$r5] $r3
+    * add b32 $r5 $r5 0x04
+    * exit st b32 g15[$r5] $r6 */
+   0x00000fffd03f0001ULL,
+   0x040007c020000001ULL,
+   0x0000028030000003ULL,
+   0x6001078000000001ULL,
+   0x6001478000000005ULL,
+   0x6001878000000009ULL,
+   0x6001c7800000000dULL,
+   0x6000078000000011ULL,
+   0x4400c78010000815ULL,
+   0x4400c78010000a19ULL,
+   0x0000f003d0000811ULL,
+   0xe410078030100811ULL,
+   0x0000000340540811ULL,
+   0x0401078020000a15ULL,
+   0xa0c00780d00f0a01ULL,
+   0x0000000320048a15ULL,
+   0xa0c00780d00f0a05ULL,
+   0x0000000320048a15ULL,
+   0xa0c00780d00f0a09ULL,
+   0x0000000320048a15ULL,
+   0xa0c00780d00f0a0dULL,
+   0x0000000320048a15ULL,
+   0xa0c00781d00f0a19ULL,
+};
+
+struct nv50_hw_sm_counter_cfg
+{
+   uint32_t mode : 4;    /* LOGOP, LOGOP_PULSE */
+   uint32_t unit : 8;    /* UNK[0-5] */
+   uint32_t sig  : 8;    /* signal selection */
+};
+
+struct nv50_hw_sm_query_cfg
+{
+   struct nv50_hw_sm_counter_cfg ctr[4];
+   uint8_t num_counters;
+};
+
+#define _Q(n, m, u, s) [NV50_HW_SM_QUERY_##n] = { { { NV50_COMPUTE_MP_PM_CONTROL_MODE_##m, NV50_COMPUTE_MP_PM_CONTROL_UNIT_##u, s, }, {}, {}, {} }, 1 }
+
+/* ==== Compute capability 1.1 (G84+) ==== */
+static const struct nv50_hw_sm_query_cfg sm11_hw_sm_queries[] =
+{
+   _Q(BRANCH,           LOGOP, UNK4, 0x02),
+   _Q(DIVERGENT_BRANCH, LOGOP, UNK4, 0x09),
+   _Q(INSTRUCTIONS,     LOGOP, UNK4, 0x04),
+   _Q(PROF_TRIGGER_0,   LOGOP, UNK1, 0x26),
+   _Q(PROF_TRIGGER_1,   LOGOP, UNK1, 0x27),
+   _Q(PROF_TRIGGER_2,   LOGOP, UNK1, 0x28),
+   _Q(PROF_TRIGGER_3,   LOGOP, UNK1, 0x29),
+   _Q(PROF_TRIGGER_4,   LOGOP, UNK1, 0x2a),
+   _Q(PROF_TRIGGER_5,   LOGOP, UNK1, 0x2b),
+   _Q(PROF_TRIGGER_6,   LOGOP, UNK1, 0x2c),
+   _Q(PROF_TRIGGER_7,   LOGOP, UNK1, 0x2d),
+   _Q(SM_CTA_LAUNCHED,  LOGOP, UNK1, 0x33),
+   _Q(WARP_SERIALIZE,   LOGOP, UNK0, 0x0b),
+};
+
+static inline uint16_t nv50_hw_sm_get_func(uint8_t slot)
+{
+   switch (slot) {
+   case 0: return 0xaaaa;
+   case 1: return 0xcccc;
+   case 2: return 0xf0f0;
+   case 3: return 0xff00;
+   }
+   return 0;
+}
+
+static const struct nv50_hw_sm_query_cfg *
+nv50_hw_sm_query_get_cfg(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+   struct nv50_query *q = &hq->base;
+   return &sm11_hw_sm_queries[q->type - NV50_HW_SM_QUERY(0)];
+}
+
+static void
+nv50_hw_sm_destroy_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+   struct nv50_query *q = &hq->base;
+   q->funcs->destroy_query(nv50, q);
+}
+
+static boolean
+nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+   struct nv50_screen *screen = nv50->screen;
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
+   const struct nv50_hw_sm_query_cfg *cfg;
+   uint16_t func;
+   int i, c;
+
+   cfg = nv50_hw_sm_query_get_cfg(nv50, hq);
+
+   /* check if we have enough free counter slots */
+   if (screen->pm.num_hw_sm_active + cfg->num_counters > 4) {
+      NOUVEAU_ERR("Not enough free MP counter slots !\n");
+      return false;
+   }
+
+   assert(cfg->num_counters <= 4);
+   PUSH_SPACE(push, 4 * 4);
+
+   /* set sequence field to 0 (used to check if result is available) */
+   for (i = 0; i < screen->MPsInTP; ++i) {
+      const unsigned b = (0x14 / 4) * i;
+      hq->data[b + 16] = 0;
+   }
+   hq->sequence++;
+
+   for (i = 0; i < cfg->num_counters; i++) {
+      screen->pm.num_hw_sm_active++;
+
+      /* find free counter slots */
+      for (c = 0; c < 4; ++c) {
+         if (!screen->pm.mp_counter[c]) {
+            hsq->ctr[i] = c;
+            screen->pm.mp_counter[c] = hsq;
+            break;
+         }
+      }
+
+      /* select func to aggregate counters */
+      func = nv50_hw_sm_get_func(c);
+
+      /* configure and reset the counter(s) */
+      BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
+      PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
+                        | cfg->ctr[i].unit | cfg->ctr[i].mode);
+      BEGIN_NV04(push, NV50_COMPUTE(MP_PM_SET(c)), 1);
+      PUSH_DATA (push, 0);
+   }
+   return true;
+}
+
+static void
+nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+   struct nv50_screen *screen = nv50->screen;
+   struct pipe_context *pipe = &nv50->base.pipe;
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
+   uint32_t mask;
+   uint32_t input[3];
+   const uint block[3] = { 32, 1, 1 };
+   const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 };
+   int c;
+
+   if (unlikely(!screen->pm.prog)) {
+      struct nv50_program *prog = CALLOC_STRUCT(nv50_program);
+      prog->type = PIPE_SHADER_COMPUTE;
+      prog->translated = true;
+      prog->max_gpr = 7;
+      prog->parm_size = 8;
+      prog->code = (uint32_t *)nv50_read_hw_sm_counters_code;
+      prog->code_size = sizeof(nv50_read_hw_sm_counters_code);
+      screen->pm.prog = prog;
+   }
+
+   /* disable all counting */
+   PUSH_SPACE(push, 8);
+   for (c = 0; c < 4; c++) {
+      if (screen->pm.mp_counter[c]) {
+         BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
+         PUSH_DATA (push, 0);
+      }
+   }
+
+   /* release counters for this query */
+   for (c = 0; c < 4; c++) {
+      if (screen->pm.mp_counter[c] == hsq) {
+         screen->pm.num_hw_sm_active--;
+         screen->pm.mp_counter[c] = NULL;
+      }
+   }
+
+   BCTX_REFN_bo(nv50->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
+                hq->bo);
+
+   PUSH_SPACE(push, 2);
+   BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+   PUSH_DATA (push, 0);
+
+   pipe->bind_compute_state(pipe, screen->pm.prog);
+   input[0] = hq->bo->offset + hq->base_offset;
+   input[1] = hq->sequence;
+   pipe->launch_grid(pipe, block, grid, 0, input);
+
+   nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY);
+
+   /* re-active other counters */
+   PUSH_SPACE(push, 8);
+   mask = 0;
+   for (c = 0; c < 4; c++) {
+      const struct nv50_hw_sm_query_cfg *cfg;
+      unsigned i;
+
+      hsq = screen->pm.mp_counter[c];
+      if (!hsq)
+         continue;
+
+      cfg = nv50_hw_sm_query_get_cfg(nv50, &hsq->base);
+      for (i = 0; i < cfg->num_counters; i++) {
+         uint16_t func;
+
+         if (mask & (1 << hsq->ctr[i]))
+            break;
+
+         mask |= 1 << hsq->ctr[i];
+         func  = nv50_hw_sm_get_func(hsq->ctr[i]);
+
+         BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(hsq->ctr[i])), 1);
+         PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
+                    | cfg->ctr[i].unit | cfg->ctr[i].mode);
+      }
+   }
+}
+
+static inline bool
+nv50_hw_sm_query_read_data(uint32_t count[32][4],
+                           struct nv50_context *nv50, bool wait,
+                           struct nv50_hw_query *hq,
+                           const struct nv50_hw_sm_query_cfg *cfg,
+                           unsigned mp_count)
+{
+   struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
+   unsigned p, c;
+
+   for (p = 0; p < mp_count; ++p) {
+      const unsigned b = (0x14 / 4) * p;
+
+      for (c = 0; c < cfg->num_counters; ++c) {
+         if (hq->data[b + 4] != hq->sequence) {
+            if (!wait)
+               return false;
+            if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->base.client))
+               return false;
+         }
+         count[p][c] = hq->data[b + hsq->ctr[c]];
+      }
+   }
+   return true;
+}
+
+static boolean
+nv50_hw_sm_get_query_result(struct nv50_context *nv50, struct nv50_hw_query *hq,
+                            boolean wait, union pipe_query_result *result)
+{
+   uint32_t count[32][4];
+   uint64_t value = 0;
+   unsigned mp_count = MIN2(nv50->screen->MPsInTP, 32);
+   unsigned p, c;
+   const struct nv50_hw_sm_query_cfg *cfg;
+   bool ret;
+
+   cfg = nv50_hw_sm_query_get_cfg(nv50, hq);
+
+   ret = nv50_hw_sm_query_read_data(count, nv50, wait, hq, cfg, mp_count);
+   if (!ret)
+      return false;
+
+   for (c = 0; c < cfg->num_counters; ++c)
+      for (p = 0; p < mp_count; ++p)
+         value += count[p][c];
+
+   /* We only count a single TP, and simply multiply by the total number of
+    * TPs to compute result over all TPs. This is inaccurate, but enough! */
+   value *= nv50->screen->TPs;
+
+   *(uint64_t *)result = value;
+   return true;
+}
+
+static const struct nv50_hw_query_funcs hw_sm_query_funcs = {
+   .destroy_query = nv50_hw_sm_destroy_query,
+   .begin_query = nv50_hw_sm_begin_query,
+   .end_query = nv50_hw_sm_end_query,
+   .get_query_result = nv50_hw_sm_get_query_result,
+};
+
+struct nv50_hw_query *
+nv50_hw_sm_create_query(struct nv50_context *nv50, unsigned type)
+{
+   struct nv50_hw_sm_query *hsq;
+   struct nv50_hw_query *hq;
+   unsigned space;
+
+   if (type < NV50_HW_SM_QUERY(0) || type > NV50_HW_SM_QUERY_LAST)
+      return NULL;
+
+   hsq = CALLOC_STRUCT(nv50_hw_sm_query);
+   if (!hsq)
+      return NULL;
+
+   hq = &hsq->base;
+   hq->funcs = &hw_sm_query_funcs;
+   hq->base.type = type;
+
+   /*
+    * for each MP:
+    * [00] = MP.C0
+    * [04] = MP.C1
+    * [08] = MP.C2
+    * [0c] = MP.C3
+    * [10] = MP.sequence
+    */
+   space = (4 + 1) * nv50->screen->MPsInTP * sizeof(uint32_t);
+
+   if (!nv50_hw_query_allocate(nv50, &hq->base, space)) {
+      FREE(hq);
+      return NULL;
+   }
+
+   return hq;
+}
+
+int
+nv50_hw_sm_get_driver_query_info(struct nv50_screen *screen, unsigned id,
+                                 struct pipe_driver_query_info *info)
+{
+   int count = 0;
+
+   if (screen->compute)
+      if (screen->base.class_3d >= NV84_3D_CLASS)
+         count += NV50_HW_SM_QUERY_COUNT;
+
+   if (!info)
+      return count;
+
+   if (id < count) {
+      if (screen->compute) {
+         if (screen->base.class_3d >= NV84_3D_CLASS) {
+            info->name = nv50_hw_sm_query_names[id];
+            info->query_type = NV50_HW_SM_QUERY(id);
+            info->group_id = -1;
+            return 1;
+         }
+      }
+   }
+   return 0;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h
new file mode 100644
index 00000000000..c1a1cd175e3
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h
@@ -0,0 +1,45 @@
+#ifndef __NV50_QUERY_HW_SM_H__
+#define __NV50_QUERY_HW_SM_H__
+
+#include "nv50_query_hw.h"
+
+struct nv50_hw_sm_query {
+   struct nv50_hw_query base;
+   uint8_t ctr[4];
+};
+
+static inline struct nv50_hw_sm_query *
+nv50_hw_sm_query(struct nv50_hw_query *hq)
+{
+   return (struct nv50_hw_sm_query *)hq;
+}
+
+/*
+ * Performance counter queries:
+ */
+#define NV50_HW_SM_QUERY(i)    (PIPE_QUERY_DRIVER_SPECIFIC + (i))
+#define NV50_HW_SM_QUERY_LAST   NV50_HW_SM_QUERY(NV50_HW_SM_QUERY_COUNT - 1)
+enum nv50_hw_sm_queries
+{
+   NV50_HW_SM_QUERY_BRANCH = 0,
+   NV50_HW_SM_QUERY_DIVERGENT_BRANCH,
+   NV50_HW_SM_QUERY_INSTRUCTIONS,
+   NV50_HW_SM_QUERY_PROF_TRIGGER_0,
+   NV50_HW_SM_QUERY_PROF_TRIGGER_1,
+   NV50_HW_SM_QUERY_PROF_TRIGGER_2,
+   NV50_HW_SM_QUERY_PROF_TRIGGER_3,
+   NV50_HW_SM_QUERY_PROF_TRIGGER_4,
+   NV50_HW_SM_QUERY_PROF_TRIGGER_5,
+   NV50_HW_SM_QUERY_PROF_TRIGGER_6,
+   NV50_HW_SM_QUERY_PROF_TRIGGER_7,
+   NV50_HW_SM_QUERY_SM_CTA_LAUNCHED,
+   NV50_HW_SM_QUERY_WARP_SERIALIZE,
+   NV50_HW_SM_QUERY_COUNT,
+};
+
+struct nv50_hw_query *
+nv50_hw_sm_create_query(struct nv50_context *, unsigned);
+int
+nv50_hw_sm_get_driver_query_info(struct nv50_screen *, unsigned,
+                                 struct pipe_driver_query_info *);
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 0142e86ba20..4e7201d7dd9 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -789,6 +789,7 @@ nv50_screen_create(struct nouveau_device *dev)
    pscreen->get_shader_param = nv50_screen_get_shader_param;
    pscreen->get_paramf = nv50_screen_get_paramf;
    pscreen->get_compute_param = nv50_screen_get_compute_param;
+   pscreen->get_driver_query_info = nv50_screen_get_driver_query_info;
 
    nv50_screen_init_resource_functions(pscreen);
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 153ceea7a4f..c2a16d8bd1d 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -95,6 +95,12 @@ struct nv50_screen {
       struct nouveau_bo *bo;
    } fence;
 
+   struct {
+      struct nv50_program *prog; /* compute state object to read MP counters */
+      struct nv50_hw_sm_query *mp_counter[4]; /* counter to query allocation */
+      uint8_t num_hw_sm_active;
+   } pm;
+
    struct nouveau_object *sync;
 
    struct nouveau_object *tesla;
@@ -109,6 +115,9 @@ nv50_screen(struct pipe_screen *screen)
    return (struct nv50_screen *)screen;
 }
 
+int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned,
+                                      struct pipe_driver_query_info *);
+
 bool nv50_blitter_create(struct nv50_screen *);
 void nv50_blitter_destroy(struct nv50_screen *);
 

From 848fa3101d5077b1aecfb0886c69a7d0dd7f75bc Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Thu, 12 Nov 2015 00:59:00 +0100
Subject: [PATCH 005/335] nv50: add support for performance metrics on G84+

Currently only one metric is exposed but more will be added later.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tested-by: Pierre Moreau <pierre.morrow@free.fr>
Acked-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/Makefile.sources  |   2 +
 .../drivers/nouveau/nv50/nv50_query_hw.c      |  19 +-
 .../nouveau/nv50/nv50_query_hw_metric.c       | 207 ++++++++++++++++++
 .../nouveau/nv50/nv50_query_hw_metric.h       |  34 +++
 4 files changed, 259 insertions(+), 3 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c
 create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index a1aa13587a1..12821a670cd 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -77,6 +77,8 @@ NV50_C_SOURCES := \
 	nv50/nv50_query.h \
 	nv50/nv50_query_hw.c \
 	nv50/nv50_query_hw.h \
+	nv50/nv50_query_hw_metric.c \
+	nv50/nv50_query_hw_metric.h \
 	nv50/nv50_query_hw_sm.c \
 	nv50/nv50_query_hw_sm.h \
 	nv50/nv50_resource.c \
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
index 23108acbef5..b6ebbbf1010 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -25,6 +25,7 @@
 
 #include "nv50/nv50_context.h"
 #include "nv50/nv50_query_hw.h"
+#include "nv50/nv50_query_hw_metric.h"
 #include "nv50/nv50_query_hw_sm.h"
 #include "nv_object.xml.h"
 
@@ -349,6 +350,12 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
       return (struct nv50_query *)hq;
    }
 
+   hq = nv50_hw_metric_create_query(nv50, type);
+   if (hq) {
+      hq->base.funcs = &hw_query_funcs;
+      return (struct nv50_query *)hq;
+   }
+
    hq = CALLOC_STRUCT(nv50_hw_query);
    if (!hq)
       return NULL;
@@ -397,14 +404,20 @@ int
 nv50_hw_get_driver_query_info(struct nv50_screen *screen, unsigned id,
                               struct pipe_driver_query_info *info)
 {
-   int num_hw_sm_queries = 0;
+   int num_hw_sm_queries = 0, num_hw_metric_queries = 0;
 
    num_hw_sm_queries = nv50_hw_sm_get_driver_query_info(screen, 0, NULL);
+   num_hw_metric_queries =
+      nv50_hw_metric_get_driver_query_info(screen, 0, NULL);
 
    if (!info)
-      return num_hw_sm_queries;
+      return num_hw_sm_queries + num_hw_metric_queries;
 
-   return nv50_hw_sm_get_driver_query_info(screen, id, info);
+   if (id < num_hw_sm_queries)
+      return nv50_hw_sm_get_driver_query_info(screen, id, info);
+
+   return nv50_hw_metric_get_driver_query_info(screen,
+                                               id - num_hw_sm_queries, info);
 }
 
 void
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c
new file mode 100644
index 00000000000..13dad30f113
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_query_hw_metric.h"
+#include "nv50/nv50_query_hw_sm.h"
+
+/* === PERFORMANCE MONITORING METRICS for NV84+ === */
+static const char *nv50_hw_metric_names[] =
+{
+   "metric-branch_efficiency",
+};
+
+struct nv50_hw_metric_query_cfg {
+   uint32_t queries[4];
+   uint32_t num_queries;
+};
+
+#define _SM(n) NV50_HW_SM_QUERY(NV50_HW_SM_QUERY_ ##n)
+#define _M(n, c) [NV50_HW_METRIC_QUERY_##n] = c
+
+/* ==== Compute capability 1.1 (G84+) ==== */
+static const struct nv50_hw_metric_query_cfg
+sm11_branch_efficiency =
+{
+   .queries[0]  = _SM(BRANCH),
+   .queries[1]  = _SM(DIVERGENT_BRANCH),
+   .num_queries = 2,
+};
+
+static const struct nv50_hw_metric_query_cfg *sm11_hw_metric_queries[] =
+{
+   _M(BRANCH_EFFICIENCY, &sm11_branch_efficiency),
+};
+
+#undef _SM
+#undef _M
+
+static const struct nv50_hw_metric_query_cfg *
+nv50_hw_metric_query_get_cfg(struct nv50_context *nv50,
+                             struct nv50_hw_query *hq)
+{
+   struct nv50_query *q = &hq->base;
+   return sm11_hw_metric_queries[q->type - NV50_HW_METRIC_QUERY(0)];
+}
+
+static void
+nv50_hw_metric_destroy_query(struct nv50_context *nv50,
+                             struct nv50_hw_query *hq)
+{
+   struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq);
+   unsigned i;
+
+   for (i = 0; i < hmq->num_queries; i++)
+      hmq->queries[i]->funcs->destroy_query(nv50, hmq->queries[i]);
+   FREE(hmq);
+}
+
+static boolean
+nv50_hw_metric_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+   struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq);
+   boolean ret = false;
+   unsigned i;
+
+   for (i = 0; i < hmq->num_queries; i++) {
+      ret = hmq->queries[i]->funcs->begin_query(nv50, hmq->queries[i]);
+      if (!ret)
+         return ret;
+   }
+   return ret;
+}
+
+static void
+nv50_hw_metric_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+   struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq);
+   unsigned i;
+
+   for (i = 0; i < hmq->num_queries; i++)
+      hmq->queries[i]->funcs->end_query(nv50, hmq->queries[i]);
+}
+
+static uint64_t
+sm11_hw_metric_calc_result(struct nv50_hw_query *hq, uint64_t res64[8])
+{
+   switch (hq->base.type - NV50_HW_METRIC_QUERY(0)) {
+   case NV50_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
+      /* (branch / (branch + divergent_branch)) * 100 */
+      if (res64[0] + res64[1])
+         return (res64[0] / (double)(res64[0] + res64[1])) * 100;
+      break;
+   default:
+      debug_printf("invalid metric type: %d\n",
+                   hq->base.type - NV50_HW_METRIC_QUERY(0));
+      break;
+   }
+   return 0;
+}
+
+static boolean
+nv50_hw_metric_get_query_result(struct nv50_context *nv50,
+                                struct nv50_hw_query *hq, boolean wait,
+                                union pipe_query_result *result)
+{
+   struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq);
+   union pipe_query_result results[4] = {};
+   uint64_t res64[4] = {};
+   boolean ret = false;
+   unsigned i;
+
+   for (i = 0; i < hmq->num_queries; i++) {
+      ret = hmq->queries[i]->funcs->get_query_result(nv50, hmq->queries[i],
+                                                     wait, &results[i]);
+      if (!ret)
+         return ret;
+      res64[i] = *(uint64_t *)&results[i];
+   }
+
+   *(uint64_t *)result = sm11_hw_metric_calc_result(hq, res64);
+   return ret;
+}
+
+static const struct nv50_hw_query_funcs hw_metric_query_funcs = {
+   .destroy_query = nv50_hw_metric_destroy_query,
+   .begin_query = nv50_hw_metric_begin_query,
+   .end_query = nv50_hw_metric_end_query,
+   .get_query_result = nv50_hw_metric_get_query_result,
+};
+
+struct nv50_hw_query *
+nv50_hw_metric_create_query(struct nv50_context *nv50, unsigned type)
+{
+   const struct nv50_hw_metric_query_cfg *cfg;
+   struct nv50_hw_metric_query *hmq;
+   struct nv50_hw_query *hq;
+   unsigned i;
+
+   if (type < NV50_HW_METRIC_QUERY(0) || type > NV50_HW_METRIC_QUERY_LAST)
+      return NULL;
+
+   hmq = CALLOC_STRUCT(nv50_hw_metric_query);
+   if (!hmq)
+      return NULL;
+
+   hq = &hmq->base;
+   hq->funcs = &hw_metric_query_funcs;
+   hq->base.type = type;
+
+   cfg = nv50_hw_metric_query_get_cfg(nv50, hq);
+
+   for (i = 0; i < cfg->num_queries; i++) {
+      hmq->queries[i] = nv50_hw_sm_create_query(nv50, cfg->queries[i]);
+      if (!hmq->queries[i]) {
+         nv50_hw_metric_destroy_query(nv50, hq);
+         return NULL;
+      }
+      hmq->num_queries++;
+   }
+
+   return hq;
+}
+
+int
+nv50_hw_metric_get_driver_query_info(struct nv50_screen *screen, unsigned id,
+                                     struct pipe_driver_query_info *info)
+{
+   int count = 0;
+
+   if (screen->compute)
+      if (screen->base.class_3d >= NV84_3D_CLASS)
+         count += NV50_HW_METRIC_QUERY_COUNT;
+
+   if (!info)
+      return count;
+
+   if (id < count) {
+      if (screen->compute) {
+         if (screen->base.class_3d >= NV84_3D_CLASS) {
+            info->name = nv50_hw_metric_names[id];
+            info->query_type = NV50_HW_METRIC_QUERY(id);
+            info->group_id = -1;
+            return 1;
+         }
+      }
+   }
+   return 0;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h
new file mode 100644
index 00000000000..f8cfc04084f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h
@@ -0,0 +1,34 @@
+#ifndef __NV50_QUERY_HW_METRIC_H__
+#define __NV50_QUERY_HW_METRIC_H__
+
+#include "nv50_query_hw.h"
+
+struct nv50_hw_metric_query {
+   struct nv50_hw_query base;
+   struct nv50_hw_query *queries[4];
+   unsigned num_queries;
+};
+
+static inline struct nv50_hw_metric_query *
+nv50_hw_metric_query(struct nv50_hw_query *hq)
+{
+   return (struct nv50_hw_metric_query *)hq;
+}
+
+/*
+ * Driver metrics queries:
+ */
+#define NV50_HW_METRIC_QUERY(i)   (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
+#define NV50_HW_METRIC_QUERY_LAST  NV50_HW_METRIC_QUERY(NV50_HW_METRIC_QUERY_COUNT - 1)
+enum nv50_hw_metric_queries
+{
+    NV50_HW_METRIC_QUERY_BRANCH_EFFICIENCY = 0,
+    NV50_HW_METRIC_QUERY_COUNT
+};
+
+struct nv50_hw_query *
+nv50_hw_metric_create_query(struct nv50_context *, unsigned);
+int
+nv50_hw_metric_get_driver_query_info(struct nv50_screen *, unsigned,
+                                     struct pipe_driver_query_info *);
+#endif

From d2f089ba17c6b17823fc3d244e15c0a18108d5ce Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sat, 7 Nov 2015 18:58:34 -0800
Subject: [PATCH 006/335] i965: Introduce a MOV_INDIRECT opcode.

The geometry and tessellation control shader stages both read from
multiple URB entries (one per vertex).  The thread payload contains
several URB handles which reference these separate memory segments.

In GLSL, these inputs are represented as per-vertex arrays; the
outermost array index selects which vertex's inputs to read.  This
array index does not necessarily need to be constant.

To handle that, we need to use indirect addressing on GRFs to select
which of the thread payload registers has the appropriate URB handle.
(This is before we can even think about applying the pull model!)

This patch introduces a new opcode which performs a MOV from a
source using VxH indirect addressing (which allows each of the 8
SIMD channels to select distinct data.)

Based on a patch by Jason Ekstrand.

v2: Rename from INDIRECT_THREAD_PAYLOAD_MOV to MOV_INDIRECT; make it
    a bit more generic.  Use regs_read() instead of hacking up the
    register allocator.  (Suggested by Jason Ekstrand.)

v3: Fix regs_read() to be more accurate for small unaligned regions.
    Also rebase on Matt's work.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com> [v3]
Reviewed-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com> [v1]
---
 src/mesa/drivers/dri/i965/brw_defines.h       | 10 ++++++
 src/mesa/drivers/dri/i965/brw_fs.cpp          | 28 +++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs.h            |  5 +++
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp      |  1 +
 .../drivers/dri/i965/brw_fs_generator.cpp     | 34 +++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_shader.cpp      |  2 ++
 6 files changed, 80 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 6484484ed34..0b8de63df42 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1289,6 +1289,16 @@ enum opcode {
     * Calculate the high 32-bits of a 32x32 multiply.
     */
    SHADER_OPCODE_MULH,
+
+   /**
+    * A MOV that uses VxH indirect addressing.
+    *
+    * Source 0: A register to start from (HW_REG).
+    * Source 1: An indirect offset (in bytes, UD GRF).
+    * Source 2: The length of the region that could be accessed (in bytes,
+    *           UD immediate).
+    */
+   SHADER_OPCODE_MOV_INDIRECT,
 };
 
 enum brw_urb_write_flags {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 80b8c8e1207..84b5920d4f5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -840,6 +840,34 @@ fs_inst::regs_read(int arg) const
    case SHADER_OPCODE_BARRIER:
       return 1;
 
+   case SHADER_OPCODE_MOV_INDIRECT:
+      if (arg == 0) {
+         assert(src[2].file == IMM);
+         unsigned region_length = src[2].ud;
+
+         if (src[0].file == FIXED_GRF) {
+            /* If the start of the region is not register aligned, then
+             * there's some portion of the register that's technically
+             * unread at the beginning.
+             *
+             * However, the register allocator works in terms of whole
+             * registers, and does not use subnr.  It assumes that the
+             * read starts at the beginning of the register, and extends
+             * regs_read() whole registers beyond that.
+             *
+             * To compensate, we extend the region length to include this
+             * unread portion at the beginning.
+             */
+            if (src[0].subnr)
+               region_length += src[0].subnr * type_sz(src[0].type);
+
+            return DIV_ROUND_UP(region_length, REG_SIZE);
+         } else {
+            assert(!"Invalid register file");
+         }
+      }
+      break;
+
    default:
       if (is_tex() && arg == 0 && src[0].file == VGRF)
          return mlen;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index f40e58b8ca0..cbfc07f68bc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -527,6 +527,11 @@ private:
                                  struct brw_reg offset,
                                  struct brw_reg value);
 
+   void generate_mov_indirect(fs_inst *inst,
+                              struct brw_reg dst,
+                              struct brw_reg reg,
+                              struct brw_reg indirect_byte_offset);
+
    bool patch_discard_jumps_to_fb_writes();
 
    const struct brw_compiler *compiler;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 8c67caff6e0..3c40fcd4fd2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -78,6 +78,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
    case FS_OPCODE_LINTERP:
    case SHADER_OPCODE_FIND_LIVE_CHANNEL:
    case SHADER_OPCODE_BROADCAST:
+   case SHADER_OPCODE_MOV_INDIRECT:
       return true;
    case SHADER_OPCODE_RCP:
    case SHADER_OPCODE_RSQ:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 139cda3ca59..e5a286a763b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -371,6 +371,36 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
    }
 }
 
+void
+fs_generator::generate_mov_indirect(fs_inst *inst,
+                                    struct brw_reg dst,
+                                    struct brw_reg reg,
+                                    struct brw_reg indirect_byte_offset)
+{
+   assert(indirect_byte_offset.type == BRW_REGISTER_TYPE_UD);
+   assert(indirect_byte_offset.file == BRW_GENERAL_REGISTER_FILE);
+
+   unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr;
+
+   /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
+   struct brw_reg addr = vec8(brw_address_reg(0));
+
+   /* The destination stride of an instruction (in bytes) must be greater
+    * than or equal to the size of the rest of the instruction.  Since the
+    * address register is of type UW, we can't use a D-type instruction.
+    * In order to get around this, re re-type to UW and use a stride.
+    */
+   indirect_byte_offset =
+      retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
+
+   /* Prior to Broadwell, there are only 8 address registers. */
+   assert(inst->exec_size == 8 || devinfo->gen >= 8);
+
+   brw_MOV(p, addr, indirect_byte_offset);
+   brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE);
+   brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
+}
+
 void
 fs_generator::generate_urb_read(fs_inst *inst,
                                 struct brw_reg dst,
@@ -2079,6 +2109,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          fill_count++;
 	 break;
 
+      case SHADER_OPCODE_MOV_INDIRECT:
+         generate_mov_indirect(inst, dst, src[0], src[1]);
+         break;
+
       case SHADER_OPCODE_URB_READ_SIMD8:
       case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
          generate_urb_read(inst, dst, src[0]);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index a438e1881d5..50b288b5c51 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -554,6 +554,8 @@ brw_instruction_name(enum opcode op)
       return "barrier";
    case SHADER_OPCODE_MULH:
       return "mulh";
+   case SHADER_OPCODE_MOV_INDIRECT:
+      return "mov_indirect";
    }
 
    unreachable("not reached");

From ff17b3ccf4f8d9f989cc975cd0e11716ff48bc1d Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 14 Nov 2015 20:14:07 -0500
Subject: [PATCH 007/335] nv50,nvc0: disable render condition around clear_*
 functions

Only the regular "clear" call is supposed to respect the render
condition. The rest should ignore it.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/nv50/nv50_query.c  |  1 +
 .../drivers/nouveau/nv50/nv50_surface.c        | 18 ++++++++++++++++++
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c  |  1 +
 .../drivers/nouveau/nvc0/nvc0_surface.c        | 12 ++++++++++++
 4 files changed, 32 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index c31bf728c61..643d430f1bc 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -152,6 +152,7 @@ nv50_init_query_functions(struct nv50_context *nv50)
    pipe->end_query = nv50_end_query;
    pipe->get_query_result = nv50_get_query_result;
    pipe->render_condition = nv50_render_condition;
+   nv50->cond_condmode = NV50_3D_COND_MODE_ALWAYS;
 }
 
 int
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 916a7d44a31..8ba19d2cc90 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -339,12 +339,18 @@ nv50_clear_render_target(struct pipe_context *pipe,
    PUSH_DATA (push, (width << 16) | dstx);
    PUSH_DATA (push, (height << 16) | dsty);
 
+   BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+   PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+
    BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth);
    for (z = 0; z < sf->depth; ++z) {
       PUSH_DATA (push, 0x3c |
                  (z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT));
    }
 
+   BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+   PUSH_DATA (push, nv50->cond_condmode);
+
    nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
 }
 
@@ -415,12 +421,18 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
    PUSH_DATA (push, (width << 16) | dstx);
    PUSH_DATA (push, (height << 16) | dsty);
 
+   BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+   PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+
    BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth);
    for (z = 0; z < sf->depth; ++z) {
       PUSH_DATA (push, mode |
                  (z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT));
    }
 
+   BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+   PUSH_DATA (push, nv50->cond_condmode);
+
    nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
 }
 
@@ -673,6 +685,9 @@ nv50_clear_buffer(struct pipe_context *pipe,
    PUSH_DATA (push, (width << 16));
    PUSH_DATA (push, (height << 16));
 
+   BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+   PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+
    BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
    PUSH_DATA (push, 0x3c);
 
@@ -690,6 +705,9 @@ nv50_clear_buffer(struct pipe_context *pipe,
       PUSH_DATA (push, 0x3c);
    }
 
+   BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+   PUSH_DATA (push, nv50->cond_condmode);
+
    nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
    nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index f53921092a5..edde57eb8e2 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -260,4 +260,5 @@ nvc0_init_query_functions(struct nvc0_context *nvc0)
    pipe->end_query = nvc0_end_query;
    pipe->get_query_result = nvc0_get_query_result;
    pipe->render_condition = nvc0_render_condition;
+   nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS;
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index cdb1fc1145f..6a4ae5be2ab 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -341,12 +341,16 @@ nvc0_clear_render_target(struct pipe_context *pipe,
       nvc0_resource_fence(res, NOUVEAU_BO_WR);
    }
 
+   IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+
    BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
    for (z = 0; z < sf->depth; ++z) {
       PUSH_DATA (push, 0x3c |
                  (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
    }
 
+   IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+
    nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
 }
 
@@ -470,6 +474,8 @@ nvc0_clear_buffer(struct pipe_context *pipe,
    IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);
    IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0);
 
+   IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+
    IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
 
    if (width * height != elements) {
@@ -486,6 +492,8 @@ nvc0_clear_buffer(struct pipe_context *pipe,
       IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
    }
 
+   IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+
    nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
    nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
    nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
@@ -545,12 +553,16 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
    PUSH_DATA (push, dst->u.tex.first_layer);
    IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);
 
+   IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+
    BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
    for (z = 0; z < sf->depth; ++z) {
       PUSH_DATA (push, mode |
                  (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
    }
 
+   IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+
    nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
 }
 

From 3f34afa0aad2a9bcfc0e5469a9675eca11ea7649 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Fri, 13 Nov 2015 09:03:55 +0100
Subject: [PATCH 008/335] nir/copy_propagate: do not copy-propagate MOV srcs
 with source modifiers

If a source operand in a MOV has source modifiers, then we cannot
copy-propagate it from the parent instruction and remove the MOV.

v2: remove the check for source modifiers from is_move() (Jason)

v3: Put the check for source modifiers back into is_move() since
    this function is called from copy_prop_alu_src(). Add source
    modifiers checks to is_vec() instead.

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_opt_copy_propagate.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/glsl/nir/nir_opt_copy_propagate.c b/src/glsl/nir/nir_opt_copy_propagate.c
index 7d8bdd7f2ca..cfc8e331128 100644
--- a/src/glsl/nir/nir_opt_copy_propagate.c
+++ b/src/glsl/nir/nir_opt_copy_propagate.c
@@ -55,10 +55,15 @@ static bool is_move(nir_alu_instr *instr)
 
 static bool is_vec(nir_alu_instr *instr)
 {
-   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
       if (!instr->src[i].src.is_ssa)
          return false;
 
+      /* we handle modifiers in a separate pass */
+      if (instr->src[i].abs || instr->src[i].negate)
+         return false;
+   }
+
    return instr->op == nir_op_vec2 ||
           instr->op == nir_op_vec3 ||
           instr->op == nir_op_vec4;

From 40c2acef5cfe28f4ac371203bd70bfc7a222ba26 Mon Sep 17 00:00:00 2001
From: "Juan A. Suarez Romero" <jasuarez@igalia.com>
Date: Fri, 6 Nov 2015 12:23:17 +0000
Subject: [PATCH 009/335] nir/glsl_to_nir: use _mesa_fls() to compute
 num_textures

Replace the current loop by a direct call to _mesa_fls() function.

It also fixes an implicit bug in the current code where num_textures
seems to be one value less than it should be when sh->Program->SamplersUsed > 0.

For instance, num_textures is 0 instead of 1 when
sh->Program->SamplersUsed is 1.

Signed-off-by: Juan A. Suarez Romero <jasuarez@igalia.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/glsl/nir/glsl_to_nir.cpp | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index b10d1923e0a..38b8390ad36 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -31,6 +31,7 @@
 #include "ir_visitor.h"
 #include "ir_hierarchical_visitor.h"
 #include "ir.h"
+#include "main/imports.h"
 
 /*
  * pass to lower GLSL IR to NIR
@@ -145,16 +146,10 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
 
    nir_lower_outputs_to_temporaries(shader);
 
-   /* TODO: Use _mesa_fls instead */
-   unsigned num_textures = 0;
-   for (unsigned i = 0; i < 8 * sizeof(sh->Program->SamplersUsed); i++)
-      if (sh->Program->SamplersUsed & (1 << i))
-         num_textures = i;
-
    shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
    if (shader_prog->Label)
       shader->info.label = ralloc_strdup(shader, shader_prog->Label);
-   shader->info.num_textures = num_textures;
+   shader->info.num_textures = _mesa_fls(sh->Program->SamplersUsed);
    shader->info.num_ubos = sh->NumUniformBlocks;
    shader->info.num_abos = shader_prog->NumAtomicBuffers;
    shader->info.num_ssbos = sh->NumShaderStorageBlocks;

From 1780a562bcb996828509c5e8912305751bee6144 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Mon, 16 Nov 2015 10:49:14 +0000
Subject: [PATCH 010/335] nv50: add missing header into the sources list

Otherwise it won't end up in the tarball.

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/gallium/drivers/nouveau/Makefile.sources | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index 12821a670cd..31a93659647 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -65,6 +65,7 @@ NV50_C_SOURCES := \
 	nv50/nv50_3d.xml.h \
 	nv50/nv50_blit.h \
 	nv50/nv50_compute.c \
+	nv50/nv50_compute.xml.h \
 	nv50/nv50_context.c \
 	nv50/nv50_context.h \
 	nv50/nv50_defs.xml.h \

From 2ca018cb65e1001e890f3dccbe65f757ed8da3f8 Mon Sep 17 00:00:00 2001
From: Neil Roberts <neil@linux.intel.com>
Date: Mon, 16 Nov 2015 14:35:46 +0100
Subject: [PATCH 011/335] docs: Add 16x MSAA on i965 to the release notes

Signed-off-by: Neil Roberts <neil@linux.intel.com>
---
 docs/relnotes/11.1.0.html | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 82ee3c4037b..6654311a3ac 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -63,6 +63,7 @@ Note: some of the new features are only available with certain drivers.
 <li>EGL_KHR_create_context on softpipe, llvmpipe</li>
 <li>EGL_KHR_gl_colorspace on softpipe, llvmpipe</li>
 <li>new virgl gallium driver for qemu virtio-gpu</li>
+<li>16x multisampling on i965 (gen9+)</li>
 </ul>
 
 <h2>Bug fixes</h2>

From 5b37d8b50cfc9a390f8320557a332a3c75b91953 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 13 Nov 2015 16:21:09 -0500
Subject: [PATCH 012/335] radeonsi: use proper GRBM_GFX_INDEX offset for CI+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The offset is different on CI and newer.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/radeonsi/si_state.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 93847d5ec2f..f0f87da46ea 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3256,21 +3256,29 @@ si_write_harvested_raster_configs(struct si_context *sctx,
 			}
 		}
 
-		/* GRBM_GFX_INDEX is privileged on VI */
-		if (sctx->b.chip_class <= CIK)
+		/* GRBM_GFX_INDEX has a different offset on SI and CI+ */
+		if (sctx->b.chip_class < CIK)
 			si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
 				       SE_INDEX(se) | SH_BROADCAST_WRITES |
 				       INSTANCE_BROADCAST_WRITES);
+		else
+			si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
+				       S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
+				       S_030800_INSTANCE_BROADCAST_WRITES(1));
 		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
 		if (sctx->b.chip_class >= CIK)
 			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
 	}
 
-	/* GRBM_GFX_INDEX is privileged on VI */
-	if (sctx->b.chip_class <= CIK)
+	/* GRBM_GFX_INDEX has a different offset on SI and CI+ */
+	if (sctx->b.chip_class < CIK)
 		si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
 			       SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
 			       INSTANCE_BROADCAST_WRITES);
+	else
+		si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
+			       S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
+			       S_030800_INSTANCE_BROADCAST_WRITES(1));
 }
 
 static void si_init_config(struct si_context *sctx)

From 00f554abba8c0f3b65af94365c15109c3b858486 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 13 Nov 2015 13:00:30 -0500
Subject: [PATCH 013/335] radeonsi: enable optimal raster config setting for
 fiji (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Requires proper kernel tiling configuration so check the tiling
config registers.

v2: send the right version of the patch

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/radeonsi/si_state.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index f0f87da46ea..209b940aa11 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3283,6 +3283,7 @@ si_write_harvested_raster_configs(struct si_context *sctx,
 
 static void si_init_config(struct si_context *sctx)
 {
+	struct si_screen *sscreen = sctx->screen;
 	unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
 	unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
 	unsigned raster_config, raster_config_1;
@@ -3353,9 +3354,14 @@ static void si_init_config(struct si_context *sctx)
 		raster_config_1 = 0x0000002e;
 		break;
 	case CHIP_FIJI:
-		/* Fiji should be same as Hawaii, but that causes corruption in some cases */
-		raster_config = 0x16000012; /* 0x3a00161a */
-		raster_config_1 = 0x0000002a; /* 0x0000002e */
+		if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) {
+			/* old kernels with old tiling config */
+			raster_config = 0x16000012;
+			raster_config_1 = 0x0000002a;
+		} else {
+			raster_config = 0x3a00161a;
+			raster_config_1 = 0x0000002e;
+		}
 		break;
 	case CHIP_TONGA:
 		raster_config = 0x16000012;

From d564b5b58e8955f807e330364ff534ce57c99d8c Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Sat, 14 Nov 2015 17:47:33 -0800
Subject: [PATCH 014/335] nir/glsl: Fix copy-n-paste mistakes from commit
 213f864.

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/glsl/nir/glsl_to_nir.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 38b8390ad36..6d24341ce01 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -1533,9 +1533,9 @@ nir_visitor::visit(ir_expression *ir)
       result = supports_ints ? nir_ior(&b, srcs[0], srcs[1])
                              : nir_for(&b, srcs[0], srcs[1]);
       break;
-   case ir_binop_logic_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break;
-      result = supports_ints ? nir_ior(&b, srcs[0], srcs[1])
-                             : nir_for(&b, srcs[0], srcs[1]);
+   case ir_binop_logic_xor:
+      result = supports_ints ? nir_ixor(&b, srcs[0], srcs[1])
+                             : nir_fxor(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break;
    case ir_binop_rshift:

From c40a88b6c5a698e5297957e28cccf2ce23820caa Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Fri, 13 Nov 2015 11:58:41 -0800
Subject: [PATCH 015/335] meta/generate_mipmap: Only modify the draw
 framebuffer binding in fallback_required

Previously GL_FRAMEBUFFER was used.  However, if GL_EXT_framebuffer_blit
is supported (note: it is supported by every Mesa driver), this is
*sometimes* an alias for GL_DRAW_FRAMEBUFFER (getters) and *sometimes*
an alias for *both* GL_DRAW_FRAMEBUFFER and GL_READ_FRAMEBUFFER
(setters).  As a result, the code saved one binding but modified both.
If the bindings were different, the GL_READ_FRAMEBUFFER would be
incorrect on exit.

Fixes the piglit fbo-generatemipmap-versus-READ_FRAMEBUFFER test.

Ideally this function would use DSA functions and not modify the binding
at all.  However, that would be a much more intrusive change because
_mesa_meta_bind_fbo_image would also need to be modified.
_mesa_meta_bind_fbo_image has a lot of callers.  Much of this code is
about to get a major rework due to bug #92363, so I don't think it
matters too much.  In fact, I discovered this bug while working on the
other bug.  Le bon temps!

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/mesa/drivers/common/meta_generate_mipmap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c
index a9da0a21ba3..ffd71b6a199 100644
--- a/src/mesa/drivers/common/meta_generate_mipmap.c
+++ b/src/mesa/drivers/common/meta_generate_mipmap.c
@@ -102,13 +102,13 @@ fallback_required(struct gl_context *ctx, GLenum target,
     */
    if (!mipmap->FBO)
       _mesa_GenFramebuffers(1, &mipmap->FBO);
-   _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, mipmap->FBO);
+   _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, mipmap->FBO);
 
-   _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, baseImage, 0);
+   _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, baseImage, 0);
 
-   status = _mesa_CheckFramebufferStatus(GL_FRAMEBUFFER_EXT);
+   status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER);
 
-   _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, fboSave);
+   _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fboSave);
 
    if (status != GL_FRAMEBUFFER_COMPLETE_EXT) {
       _mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_HIGH,

From 983614dbede7b94cba1bad9f3e8627fc5e14bb91 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Thu, 12 Nov 2015 19:33:14 +0100
Subject: [PATCH 016/335] radeon: fix bgrx8/xrgb8 blits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since d21320f6258b2e1780a15c1ca718963d8a15ca18 the same txformat table entries
are used for "normal" texturing as well as for blits. However, I forgot to put
in an entry for the bgrx8 (le) and xrgb8 (be) formats - the normal texturing
path can't hit them because the radeon tex format chooser will never chose
them, but we get that format from the dri buffers (at least I assume we got
it from there). This caused lots of piglit regressions (and probably lots of
trouble outside piglit too).
This fixes bug https://bugs.freedesktop.org/show_bug.cgi?id=92900.

Tested-by: Ian Romanick <ian.d.romanick@intel.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/mesa/drivers/dri/radeon/radeon_tex.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h
index f8ec432755a..37c2fa0dc2f 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex.h
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.h
@@ -63,6 +63,8 @@ static const struct tx_table tx_table[] =
    [ MESA_FORMAT_R8G8B8A8_UNORM ] = { RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
    [ MESA_FORMAT_B8G8R8A8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
    [ MESA_FORMAT_A8R8G8B8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_B8G8R8X8_UNORM ] = { RADEON_TXFORMAT_ARGB8888, 0 },
+   [ MESA_FORMAT_X8R8G8B8_UNORM ] = { RADEON_TXFORMAT_ARGB8888, 0 },
    [ MESA_FORMAT_BGR_UNORM8 ] = { RADEON_TXFORMAT_ARGB8888, 0 },
    [ MESA_FORMAT_B5G6R5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 },
    [ MESA_FORMAT_R5G6B5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 },

From a2611ffe4b5f1852c59301f086b988233a1c62f3 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 17 Nov 2015 01:04:05 +0100
Subject: [PATCH 017/335] r200: fix bgrx8/xrgb8 blits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since 779cabfc7d022de8b7b9bc7fdac0caffa8646c51 the same txformat table entries
are used for "normal" texturing as well as for blits. However, I forgot to put
in an entry for the bgrx8 (le) and xrgb8 (be) formats - the normal texturing
path can't hit them because the radeon tex format chooser will never chose
them, but we get that format from the dri buffers (at least I assume we got
it from there).
This is untested but essentially addressing the same bug as for radeon.
(I don't think that the second entry per le/be table is actually necessary,
but shouldn't hurt...)

Tested-by: Ian Romanick <ian.d.romanick@intel.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/mesa/drivers/dri/r200/r200_tex.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h
index a8c31b741ed..14f5e71fadf 100644
--- a/src/mesa/drivers/dri/r200/r200_tex.h
+++ b/src/mesa/drivers/dri/r200/r200_tex.h
@@ -63,7 +63,9 @@ static const struct tx_table tx_table_be[] =
    [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
    [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
    [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_B8G8R8X8_UNORM ] = { R200_TXFORMAT_ARGB8888, 0 },
    [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_X8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888, 0 },
    [ MESA_FORMAT_BGR_UNORM8 ] = { 0xffffffff, 0 },
    [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
    [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
@@ -91,7 +93,9 @@ static const struct tx_table tx_table_le[] =
    [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
    [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
    [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_B8G8R8X8_UNORM ] = { R200_TXFORMAT_ARGB8888, 0 },
    [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_X8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888, 0 },
    [ MESA_FORMAT_BGR_UNORM8 ] = { R200_TXFORMAT_ARGB8888, 0 },
    [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
    [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },

From 44d6c0c805d2911cc5dfe853e5bc5a505f87775f Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 12 Nov 2015 13:32:13 -0800
Subject: [PATCH 018/335] i965: Convert scalar_* flags to a scalar_stage array.

I was going to add scalar_tcs and scalar_tes flags, and then thought
better of it and decided to convert this to an array.  Simpler.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
---
 src/mesa/drivers/dri/i965/brw_compiler.h      |  3 +-
 src/mesa/drivers/dri/i965/brw_context.c       |  2 +-
 src/mesa/drivers/dri/i965/brw_gs.c            |  3 +-
 src/mesa/drivers/dri/i965/brw_link.cpp        | 11 ++++---
 src/mesa/drivers/dri/i965/brw_program.c       |  3 +-
 src/mesa/drivers/dri/i965/brw_shader.cpp      | 29 +++++--------------
 src/mesa/drivers/dri/i965/brw_shader.h        |  2 --
 src/mesa/drivers/dri/i965/brw_vec4.cpp        |  4 +--
 .../drivers/dri/i965/brw_vec4_gs_visitor.cpp  |  2 +-
 src/mesa/drivers/dri/i965/brw_vs.c            |  7 +++--
 10 files changed, 27 insertions(+), 39 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index e3a26d6a353..3f546161409 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -89,8 +89,7 @@ struct brw_compiler {
    void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
    void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
 
-   bool scalar_vs;
-   bool scalar_gs;
+   bool scalar_stage[MESA_SHADER_STAGES];
    struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
 };
 
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index ac6045dbba9..2db99c74b8e 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -525,7 +525,7 @@ brw_initialize_context_constants(struct brw_context *brw)
       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms =
          BRW_MAX_IMAGES;
       ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms =
-         (brw->intelScreen->compiler->scalar_vs ? BRW_MAX_IMAGES : 0);
+         (brw->intelScreen->compiler->scalar_stage[MESA_SHADER_VERTEX] ? BRW_MAX_IMAGES : 0);
       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms =
          BRW_MAX_IMAGES;
       ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index ed0890f430f..ad5b242a3ab 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -87,7 +87,8 @@ brw_codegen_gs_prog(struct brw_context *brw,
    prog_data.base.base.nr_image_params = gs->NumImages;
 
    brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base,
-                               &prog_data.base.base, compiler->scalar_gs);
+                               &prog_data.base.base,
+                               compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
 
    GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
 
diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp
index 29911732761..14421d421b6 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -66,12 +66,14 @@ brw_lower_packing_builtins(struct brw_context *brw,
                            gl_shader_stage shader_type,
                            exec_list *ir)
 {
+   const struct brw_compiler *compiler = brw->intelScreen->compiler;
+
    int ops = LOWER_PACK_SNORM_2x16
            | LOWER_UNPACK_SNORM_2x16
            | LOWER_PACK_UNORM_2x16
            | LOWER_UNPACK_UNORM_2x16;
 
-   if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
+   if (compiler->scalar_stage[shader_type]) {
       ops |= LOWER_UNPACK_UNORM_4x8
            | LOWER_UNPACK_SNORM_4x8
            | LOWER_PACK_UNORM_4x8
@@ -84,7 +86,7 @@ brw_lower_packing_builtins(struct brw_context *brw,
        * lowering is needed. For SOA code, the Half2x16 ops must be
        * scalarized.
        */
-      if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
+      if (compiler->scalar_stage[shader_type]) {
          ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
              |  LOWER_UNPACK_HALF_2x16_TO_SPLIT;
       }
@@ -103,6 +105,7 @@ process_glsl_ir(gl_shader_stage stage,
                 struct gl_shader *shader)
 {
    struct gl_context *ctx = &brw->ctx;
+   const struct brw_compiler *compiler = brw->intelScreen->compiler;
    const struct gl_shader_compiler_options *options =
       &ctx->Const.ShaderCompilerOptions[shader->Stage];
 
@@ -161,7 +164,7 @@ process_glsl_ir(gl_shader_stage stage,
    do {
       progress = false;
 
-      if (is_scalar_shader_stage(brw->intelScreen->compiler, shader->Stage)) {
+      if (compiler->scalar_stage[shader->Stage]) {
          brw_do_channel_expressions(shader->ir);
          brw_do_vector_splitting(shader->ir);
       }
@@ -252,7 +255,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
       brw_add_texrect_params(prog);
 
       prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
-                                 is_scalar_shader_stage(compiler, stage));
+                                 compiler->scalar_stage[stage]);
 
       _mesa_reference_program(ctx, &prog, NULL);
    }
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 1ccfa1b6a1d..2297fa69488 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -126,6 +126,7 @@ brwProgramStringNotify(struct gl_context *ctx,
 		       struct gl_program *prog)
 {
    struct brw_context *brw = brw_context(ctx);
+   const struct brw_compiler *compiler = brw->intelScreen->compiler;
 
    switch (target) {
    case GL_FRAGMENT_PROGRAM_ARB: {
@@ -165,7 +166,7 @@ brwProgramStringNotify(struct gl_context *ctx,
       brw_add_texrect_params(prog);
 
       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
-                                 brw->intelScreen->compiler->scalar_vs);
+                                 compiler->scalar_stage[MESA_SHADER_VERTEX]);
 
       brw_vs_precompile(ctx, NULL, prog);
       break;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 50b288b5c51..c4a567f4cc9 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -72,22 +72,6 @@ shader_perf_log_mesa(void *data, const char *fmt, ...)
    va_end(args);
 }
 
-bool
-is_scalar_shader_stage(const struct brw_compiler *compiler, int stage)
-{
-   switch (stage) {
-   case MESA_SHADER_FRAGMENT:
-   case MESA_SHADER_COMPUTE:
-      return true;
-   case MESA_SHADER_GEOMETRY:
-      return compiler->scalar_gs;
-   case MESA_SHADER_VERTEX:
-      return compiler->scalar_vs;
-   default:
-      return false;
-   }
-}
-
 struct brw_compiler *
 brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
 {
@@ -100,11 +84,12 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
    brw_fs_alloc_reg_sets(compiler);
    brw_vec4_alloc_reg_set(compiler);
 
-   if (devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
-      compiler->scalar_vs = true;
-
-   if (devinfo->gen >= 8 && brw_env_var_as_boolean("INTEL_SCALAR_GS", false))
-      compiler->scalar_gs = true;
+   compiler->scalar_stage[MESA_SHADER_VERTEX] =
+      devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
+   compiler->scalar_stage[MESA_SHADER_GEOMETRY] =
+      devinfo->gen >= 8 && brw_env_var_as_boolean("INTEL_SCALAR_GS", false);
+   compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true;
+   compiler->scalar_stage[MESA_SHADER_COMPUTE] = true;
 
    nir_shader_compiler_options *nir_options =
       rzalloc(compiler, nir_shader_compiler_options);
@@ -137,7 +122,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
       compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
       compiler->glsl_compiler_options[i].LowerClipDistance = true;
 
-      bool is_scalar = is_scalar_shader_stage(compiler, i);
+      bool is_scalar = compiler->scalar_stage[i];
 
       compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar;
       compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index c851941021d..a4139cf3ddb 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -255,8 +255,6 @@ int type_size_scalar(const struct glsl_type *type);
 int type_size_vec4(const struct glsl_type *type);
 int type_size_vec4_times_4(const struct glsl_type *type);
 
-bool is_scalar_shader_stage(const struct brw_compiler *compiler, int stage);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index a086b43e11a..3bcd5cbddf3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -2002,7 +2002,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
     * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode.  Empirically, in
     * vec4 mode, the hardware appears to wedge unless we read something.
     */
-   if (compiler->scalar_vs)
+   if (compiler->scalar_stage[MESA_SHADER_VERTEX])
       prog_data->base.urb_read_length = DIV_ROUND_UP(nr_attributes, 2);
    else
       prog_data->base.urb_read_length = DIV_ROUND_UP(MAX2(nr_attributes, 1), 2);
@@ -2021,7 +2021,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
    else
       prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4);
 
-   if (compiler->scalar_vs) {
+   if (compiler->scalar_stage[MESA_SHADER_VERTEX]) {
       prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
 
       fs_visitor v(compiler, log_data, mem_ctx, key, &prog_data->base.base,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 1a09f76a20c..81353aeed7e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -819,7 +819,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
       brw_print_vue_map(stderr, &prog_data->base.vue_map);
    }
 
-   if (compiler->scalar_gs) {
+   if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) {
       /* TODO: Support instanced GS.  We have basically no tests... */
       assert(prog_data->invocations == 1);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 967448e0e41..7c783f66864 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -48,6 +48,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
                     struct brw_vertex_program *vp,
                     struct brw_vs_prog_key *key)
 {
+   const struct brw_compiler *compiler = brw->intelScreen->compiler;
    GLuint program_size;
    const GLuint *program;
    struct brw_vs_prog_data prog_data;
@@ -79,7 +80,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
     * by the state cache.
     */
    int param_count = vp->program.Base.nir->num_uniforms;
-   if (!brw->intelScreen->compiler->scalar_vs)
+   if (!compiler->scalar_stage[MESA_SHADER_VERTEX])
       param_count *= 4;
 
    if (vs)
@@ -102,7 +103,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
    if (prog) {
       brw_nir_setup_glsl_uniforms(vp->program.Base.nir, prog, &vp->program.Base,
                                   &prog_data.base.base,
-                                  brw->intelScreen->compiler->scalar_vs);
+                                  compiler->scalar_stage[MESA_SHADER_VERTEX]);
    } else {
       brw_nir_setup_arb_uniforms(vp->program.Base.nir, &vp->program.Base,
                                  &prog_data.base.base);
@@ -173,7 +174,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
    /* Emit GEN4 code.
     */
    char *error_str;
-   program = brw_compile_vs(brw->intelScreen->compiler, brw, mem_ctx, key,
+   program = brw_compile_vs(compiler, brw, mem_ctx, key,
                             &prog_data, vp->program.Base.nir,
                             brw_select_clip_planes(&brw->ctx),
                             !_mesa_is_gles3(&brw->ctx),

From 5ee5dfddeafde2e2b89f86d2a59769a61ce5d6b2 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 12 Nov 2015 13:46:16 -0800
Subject: [PATCH 019/335] i965: Clean up context constant initialization code.

This was getting pretty out of hand, and with compute partially in place
and tessellation on the way, it was only going to get worse.

This patch makes a "stage exists?" predicate and a "number of stages"
count and uses them to clean up a lot of calculations.  We can just
loop over shader stages and set things for the ones that exist.  For
combined counts, we can just multiply by the number of stages.

It also tries to organize a little bit.

We should probably use _mesa_has_geometry_shaders/tessellation/compute
here, but we can't because ctx->Version isn't initialized yet.  Perhaps
that could be fixed in the future.

No change in "glxinfo -l" on Broadwell.

v2: Drop stray compute shader hunk.  Mark stage_exists as const.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
---
 src/mesa/drivers/dri/i965/brw_context.c | 134 ++++++++++--------------
 1 file changed, 54 insertions(+), 80 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 2db99c74b8e..e70ad982f48 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -322,64 +322,81 @@ static void
 brw_initialize_context_constants(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
+   const struct brw_compiler *compiler = brw->intelScreen->compiler;
+
+   const bool stage_exists[MESA_SHADER_STAGES] = {
+      [MESA_SHADER_VERTEX] = true,
+      [MESA_SHADER_TESS_CTRL] = false,
+      [MESA_SHADER_TESS_EVAL] = false,
+      [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
+      [MESA_SHADER_FRAGMENT] = true,
+      [MESA_SHADER_COMPUTE] = _mesa_extension_override_enables.ARB_compute_shader,
+   };
+
+   unsigned num_stages = 0;
+   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+      if (stage_exists[i])
+         num_stages++;
+   }
 
    unsigned max_samplers =
       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 
+   ctx->Const.MaxDualSourceDrawBuffers = 1;
+   ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
+   ctx->Const.MaxCombinedShaderOutputResources =
+      MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
+
    ctx->Const.QueryCounterBits.Timestamp = 36;
 
+   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
+   ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
+   ctx->Const.MaxRenderbufferSize = 8192;
+   ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
+   ctx->Const.Max3DTextureLevels = 12; /* 2048 */
+   ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
+   ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
+   ctx->Const.MaxTextureMbytes = 1536;
+   ctx->Const.MaxTextureRectSize = 1 << 12;
+   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
    ctx->Const.StripTextureBorder = true;
+   if (brw->gen >= 7)
+      ctx->Const.MaxProgramTextureGatherComponents = 4;
+   else if (brw->gen == 6)
+      ctx->Const.MaxProgramTextureGatherComponents = 1;
 
    ctx->Const.MaxUniformBlockSize = 65536;
+
    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
       struct gl_program_constants *prog = &ctx->Const.Program[i];
+
+      if (!stage_exists[i])
+         continue;
+
+      prog->MaxTextureImageUnits = max_samplers;
+
       prog->MaxUniformBlocks = BRW_MAX_UBO;
       prog->MaxCombinedUniformComponents =
          prog->MaxUniformComponents +
          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
+
+      prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
+      prog->MaxAtomicBuffers = BRW_MAX_ABO;
+      prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
+      prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
    }
 
-   ctx->Const.MaxDualSourceDrawBuffers = 1;
-   ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
-   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
    ctx->Const.MaxTextureUnits =
       MIN2(ctx->Const.MaxTextureCoordUnits,
            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
-   ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
-   if (brw->gen >= 6)
-      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
-   else
-      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
-   if (_mesa_extension_override_enables.ARB_compute_shader) {
-      ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
-      ctx->Const.MaxUniformBufferBindings += BRW_MAX_UBO;
-   } else {
-      ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
-   }
-   ctx->Const.MaxCombinedTextureImageUnits =
-      ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
-      ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
-      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
-      ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
 
-   ctx->Const.MaxTextureLevels = 14; /* 8192 */
-   if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
-      ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
-   ctx->Const.Max3DTextureLevels = 12; /* 2048 */
-   ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
-   ctx->Const.MaxTextureMbytes = 1536;
+   ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
+   ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
+   ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
+   ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
+   ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
+   ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
 
-   if (brw->gen >= 7)
-      ctx->Const.MaxArrayTextureLayers = 2048;
-   else
-      ctx->Const.MaxArrayTextureLayers = 512;
-
-   ctx->Const.MaxTextureRectSize = 1 << 12;
-
-   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
-
-   ctx->Const.MaxRenderbufferSize = 8192;
 
    /* Hardware only supports a limited number of transform feedback buffers.
     * So we need to override the Mesa default (which is based only on software
@@ -427,6 +444,7 @@ brw_initialize_context_constants(struct brw_context *brw)
    ctx->Const.MaxColorTextureSamples = max_samples;
    ctx->Const.MaxDepthTextureSamples = max_samples;
    ctx->Const.MaxIntegerSamples = max_samples;
+   ctx->Const.MaxImageSamples = 0;
 
    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
     * to map indices of rectangular grid to sample numbers within a pixel.
@@ -436,11 +454,6 @@ brw_initialize_context_constants(struct brw_context *brw)
     */
    gen6_set_sample_maps(ctx);
 
-   if (brw->gen >= 7)
-      ctx->Const.MaxProgramTextureGatherComponents = 4;
-   else if (brw->gen == 6)
-      ctx->Const.MaxProgramTextureGatherComponents = 1;
-
    ctx->Const.MinLineWidth = 1.0;
    ctx->Const.MinLineWidthAA = 1.0;
    if (brw->gen >= 6) {
@@ -511,30 +524,6 @@ brw_initialize_context_constants(struct brw_context *brw)
    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 
-   if (brw->gen >= 7) {
-      ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
-      ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
-      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
-      ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
-      ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO;
-      ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO;
-      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
-      ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
-      ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
-
-      ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms =
-         BRW_MAX_IMAGES;
-      ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms =
-         (brw->intelScreen->compiler->scalar_stage[MESA_SHADER_VERTEX] ? BRW_MAX_IMAGES : 0);
-      ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms =
-         BRW_MAX_IMAGES;
-      ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
-      ctx->Const.MaxCombinedShaderOutputResources =
-         MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
-      ctx->Const.MaxImageSamples = 0;
-      ctx->Const.MaxCombinedImageUniforms = 3 * BRW_MAX_IMAGES;
-   }
-
    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
     * but we're not sure how it's actually done for vertex order,
     * that affect provoking vertex decision. Always use last vertex
@@ -586,21 +575,6 @@ brw_initialize_context_constants(struct brw_context *brw)
    ctx->Const.TextureBufferOffsetAlignment = 16;
    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 
-   /* FIXME: Tessellation stages are not yet supported in i965, so
-    * MaxCombinedShaderStorageBlocks doesn't take them into account.
-    */
-   ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = BRW_MAX_SSBO;
-   ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = BRW_MAX_SSBO;
-   ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks = 0;
-   ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks = 0;
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = BRW_MAX_SSBO;
-   ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = BRW_MAX_SSBO;
-   ctx->Const.MaxCombinedShaderStorageBlocks = BRW_MAX_SSBO * 3;
-   ctx->Const.MaxShaderStorageBufferBindings = BRW_MAX_SSBO * 3;
-
-   if (_mesa_extension_override_enables.ARB_compute_shader)
-      ctx->Const.MaxShaderStorageBufferBindings += BRW_MAX_SSBO;
-
    if (brw->gen >= 6) {
       ctx->Const.MaxVarying = 32;
       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;

From 292df1940126f267418e656b9ec33eb3f06667b8 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 13 Nov 2015 14:55:50 -0800
Subject: [PATCH 020/335] i965: Set MaxCombinedUniformBlocks properly.

Up until now, we've been letting core Mesa initialize it to 36 for us
(which is presumably BRW_MAX_UBO (12) * (VS+GS+FS stages -> 3)).

With compute and tessellation, we need to increase this.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
---
 src/mesa/drivers/dri/i965/brw_context.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index e70ad982f48..2ea0a9eca92 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -391,6 +391,7 @@ brw_initialize_context_constants(struct brw_context *brw)
            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 
    ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
+   ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
    ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
    ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
    ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;

From 023fd58fd685135bfb4ee401ac9bd1c3a3988e02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?= <tapani.palli@intel.com>
Date: Mon, 16 Nov 2015 08:44:18 +0200
Subject: [PATCH 021/335] glsl: initialize precision when adding per vertex
 record fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes issues with tessellation builtin variables since precision was
introduced to IR with commit f84bc57d7dc02fceb805803131426c791eadeff9.

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/builtin_variables.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index b06c1bc5c12..b927d506faf 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -327,6 +327,7 @@ per_vertex_accumulator::add_field(int slot, const glsl_type *type,
    this->fields[this->num_fields].centroid = 0;
    this->fields[this->num_fields].sample = 0;
    this->fields[this->num_fields].patch = 0;
+   this->fields[this->num_fields].precision = GLSL_PRECISION_NONE;
    this->num_fields++;
 }
 

From f4f30ad730b7dafaadafda63344012203543894c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?= <tapani.palli@intel.com>
Date: Mon, 16 Nov 2015 08:43:12 +0200
Subject: [PATCH 022/335] mesa: do runtime validation of precision varyings
 only on ES
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Precision qualifier should be ignored on desktop OpenGL.

v2: include spec quote (Samuel)

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
---
 src/mesa/main/shader_query.cpp | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index 58ba04153e6..14f849e0a94 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -1413,9 +1413,19 @@ _mesa_validate_pipeline_io(struct gl_pipeline_object *pipeline)
 
    for (idx = prev + 1; idx < ARRAY_SIZE(pipeline->CurrentProgram); idx++) {
       if (shProg[idx]) {
-         if (!validate_io(shProg[prev]->_LinkedShaders[prev],
-                          shProg[idx]->_LinkedShaders[idx]))
-            return false;
+         /* Since we now only validate precision, we can skip this step for
+          * desktop GLSL shaders, there precision qualifier is ignored.
+          *
+          * From OpenGL 4.50 Shading Language spec, section 4.7:
+          *     "For the purposes of determining if an output from one shader
+          *     stage matches an input of the next stage, the precision
+          *     qualifier need not match."
+          */
+         if (shProg[prev]->IsES || shProg[idx]->IsES) {
+            if (!validate_io(shProg[prev]->_LinkedShaders[prev],
+                             shProg[idx]->_LinkedShaders[idx]))
+               return false;
+         }
          prev = idx;
       }
    }

From a96afaced8d4498e3f24ff1e201b4569fd03d55d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20Iglesias=20Gons=C3=A1lvez?= <siglesias@igalia.com>
Date: Mon, 16 Nov 2015 10:23:42 +0100
Subject: [PATCH 023/335] nir: reduce memory footprint of glsl_struct_field's
 precision
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/glsl/nir/glsl_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/nir/glsl_types.h b/src/glsl/nir/glsl_types.h
index d841a3277db..2d44059cca5 100644
--- a/src/glsl/nir/glsl_types.h
+++ b/src/glsl/nir/glsl_types.h
@@ -837,7 +837,7 @@ struct glsl_struct_field {
    /**
     * Precision qualifier
     */
-   unsigned precision;
+   unsigned precision:2;
 
    /**
     * Image qualifiers, applicable to buffer variables defined in shader

From 58954e4daa5e874771e1a33320a4009249f2429a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20Iglesias=20Gons=C3=A1lvez?= <siglesias@igalia.com>
Date: Mon, 16 Nov 2015 11:43:20 +0100
Subject: [PATCH 024/335] glsl/nir: initialize precision field in
 glsl_struct_field constructor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/glsl/nir/glsl_types.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/glsl/nir/glsl_types.h b/src/glsl/nir/glsl_types.h
index 2d44059cca5..d8a999ad44e 100644
--- a/src/glsl/nir/glsl_types.h
+++ b/src/glsl/nir/glsl_types.h
@@ -851,7 +851,8 @@ struct glsl_struct_field {
 
    glsl_struct_field(const struct glsl_type *_type, const char *_name)
       : type(_type), name(_name), location(-1), interpolation(0), centroid(0),
-        sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0)
+        sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
+        precision(GLSL_PRECISION_NONE)
    {
       /* empty */
    }

From 91eefe850531adf7e1ed527e4b5bcb4a1f8c5d63 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20Iglesias=20Gons=C3=A1lvez?= <siglesias@igalia.com>
Date: Mon, 16 Nov 2015 11:59:18 +0100
Subject: [PATCH 025/335] glsl: initialize data.precision value in ir_variable
 constructor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/glsl/ir.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 8933b230177..8b5ba71fbba 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1676,6 +1676,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
    this->data.interpolation = INTERP_QUALIFIER_NONE;
    this->data.max_array_access = 0;
    this->data.atomic.offset = 0;
+   this->data.precision = GLSL_PRECISION_NONE;
    this->data.image_read_only = false;
    this->data.image_write_only = false;
    this->data.image_coherent = false;

From cfe32cfa8ed0cb2b41be8e63ddab9f68f2cc63de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20Iglesias=20Gons=C3=A1lvez?= <siglesias@igalia.com>
Date: Mon, 16 Nov 2015 12:01:37 +0100
Subject: [PATCH 026/335] glsl: copy each field's precision information when
 generating varying variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/glsl/builtin_variables.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index b927d506faf..fc7a3c3f64c 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -1187,6 +1187,7 @@ builtin_variable_generator::generate_varyings()
          var->data.centroid = fields[i].centroid;
          var->data.sample = fields[i].sample;
          var->data.patch = fields[i].patch;
+         var->data.precision = fields[i].precision;
          var->init_interface_type(per_vertex_out_type);
       }
    }

From 688b58c40c2fdf99c94706d02511293d30fe2430 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20Iglesias=20Gons=C3=A1lvez?= <siglesias@igalia.com>
Date: Mon, 16 Nov 2015 12:02:41 +0100
Subject: [PATCH 027/335] glsl: copy each field's precision information from
 the old gl_PerVertex interface block
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/glsl/ast_to_hir.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index f5292435058..97554cbd688 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -6603,6 +6603,8 @@ ast_interface_block::hir(exec_list *instructions,
                earlier_per_vertex->fields.structure[j].sample;
             fields[i].patch =
                earlier_per_vertex->fields.structure[j].patch;
+            fields[i].precision =
+               earlier_per_vertex->fields.structure[j].precision;
          }
       }
 

From dfa60e7057ef8e5b07c49c7ba22fbddaa75e848b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20Iglesias=20Gons=C3=A1lvez?= <siglesias@igalia.com>
Date: Mon, 16 Nov 2015 12:35:13 +0100
Subject: [PATCH 028/335] glsl: copy each field's precision information in
 glsl_types's structure constructor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/glsl/nir/glsl_types.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/glsl/nir/glsl_types.cpp b/src/glsl/nir/glsl_types.cpp
index 975b815b0cc..9cc3715db8a 100644
--- a/src/glsl/nir/glsl_types.cpp
+++ b/src/glsl/nir/glsl_types.cpp
@@ -129,6 +129,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
       this->fields.structure[i].image_coherent = fields[i].image_coherent;
       this->fields.structure[i].image_volatile = fields[i].image_volatile;
       this->fields.structure[i].image_restrict = fields[i].image_restrict;
+      this->fields.structure[i].precision = fields[i].precision;
    }
 
    mtx_unlock(&glsl_type::mutex);

From 252b143e9e8ca0b98143c237f14cb0b548ffd510 Mon Sep 17 00:00:00 2001
From: Eduardo Lima Mitev <elima@igalia.com>
Date: Tue, 17 Nov 2015 09:49:43 +0100
Subject: [PATCH 029/335] i965: Return the correct value type from
 brw_compile_gs()

brw_compile_gs() should return a pointer to unsigned, but it is returning the
bool 'false' at some point, hence annoying us with a compiler warning:

In function 'const unsigned int* brw::brw_compile_gs(const brw_compiler*,
   void*, void*, const brw_gs_prog_key*, brw_gs_prog_data*, const nir_shader*,
   gl_shader_program*, int, unsigned int*, char**)':

brw_vec4_gs_visitor.cpp:776:14: warning: converting 'false' to pointer type
                                'const unsigned int*' [-Wconversion-null]
                                return false;
                                       ^
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
---
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 81353aeed7e..0c49865eee2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -773,7 +773,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
    if (compiler->devinfo->gen == 6)
       max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
    if (output_size_bytes > max_output_size_bytes)
-      return false;
+      return NULL;
 
 
    /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and

From 6bd9ba7d07490059024d16ba32fafa1c5bb239ee Mon Sep 17 00:00:00 2001
From: Boyan Ding <boyan.j.ding@gmail.com>
Date: Tue, 21 Jul 2015 23:43:54 +0800
Subject: [PATCH 030/335] loader: Add dri3 helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2: From Martin Peres
 - Try to fit in the 80-col limit as much as possible

v3: From Martin Peres
 - introduce loader_dri3_helper.la to avoid dragging the xcb dep everywhere (Kristian & Emil)
 - get rid of the width, height, dri_screen and is_different_gpu vfuncs (Kristian)
 - replace the create/destroy functions with init/fini for dri3 drawables
 - prefix static functions with dri3_ and exported ones with loader_dri3 (Emil)
 - keep the function definition consistent (Emil)

Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
---
 configure.ac                    |    3 +-
 src/glx/Makefile.am             |    2 +
 src/loader/Makefile.am          |   15 +-
 src/loader/loader_dri3_helper.c | 1376 +++++++++++++++++++++++++++++++
 src/loader/loader_dri3_helper.h |  232 ++++++
 5 files changed, 1626 insertions(+), 2 deletions(-)
 create mode 100644 src/loader/loader_dri3_helper.c
 create mode 100644 src/loader/loader_dri3_helper.h

diff --git a/configure.ac b/configure.ac
index 3f15881a18e..9ea9ab22346 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1226,7 +1226,8 @@ xyesno)
 
             if test x"$enable_dri3" = xyes; then
                PKG_CHECK_EXISTS([xcb >= $XCB_REQUIRED], [], AC_MSG_ERROR([DRI3 requires xcb >= $XCB_REQUIRED]))
-               dri_modules="$dri_modules xcb-dri3 xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED"
+               dri3_modules="xcb-dri3 xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED"
+               PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules])
             fi
         fi
         if test x"$dri_platform" = xapple ; then
diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am
index e64955e3b3e..d08ff7a8dd2 100644
--- a/src/glx/Makefile.am
+++ b/src/glx/Makefile.am
@@ -133,6 +133,8 @@ if HAVE_DRI3
 libglx_la_SOURCES += \
 	dri3_glx.c \
 	dri3_priv.h
+
+libglx_la_LIBADD += $(XCB_DRI3_LIBS)
 endif
 
 if HAVE_APPLEDRI
diff --git a/src/loader/Makefile.am b/src/loader/Makefile.am
index 5190f7f8a46..c0f79475361 100644
--- a/src/loader/Makefile.am
+++ b/src/loader/Makefile.am
@@ -23,7 +23,7 @@ include Makefile.sources
 
 EXTRA_DIST = SConscript
 
-noinst_LTLIBRARIES = libloader.la
+noinst_LTLIBRARIES = libloader.la libloader_dri3_helper.la
 
 libloader_la_CPPFLAGS = \
 	$(DEFINES) \
@@ -55,3 +55,16 @@ libloader_la_CPPFLAGS += \
 libloader_la_LIBADD += \
 	$(LIBDRM_LIBS)
 endif
+
+if HAVE_DRI3
+libloader_dri3_helper_la_CPPFLAGS = \
+	$(DEFINES) \
+	-I$(top_srcdir)/include \
+	-I$(top_srcdir)/src \
+	$(LIBDRM_CFLAGS)
+
+libloader_dri3_helper_la_SOURCES = \
+	loader_dri3_helper.c \
+	loader_dri3_helper.h
+libloader_dri3_helper_la_LIBADD = $(XCB_DRI3_LIBS)
+endif
diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c
new file mode 100644
index 00000000000..9093b179317
--- /dev/null
+++ b/src/loader/loader_dri3_helper.c
@@ -0,0 +1,1376 @@
+/*
+ * Copyright © 2013 Keith Packard
+ * Copyright © 2015 Boyan Ding
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <X11/xshmfence.h>
+#include <xcb/xcb.h>
+#include <xcb/dri3.h>
+#include <xcb/present.h>
+
+#include <X11/Xlib-xcb.h>
+
+#include "loader_dri3_helper.h"
+
+/* From xmlpool/options.h, user exposed so should be stable */
+#define DRI_CONF_VBLANK_NEVER 0
+#define DRI_CONF_VBLANK_DEF_INTERVAL_0 1
+#define DRI_CONF_VBLANK_DEF_INTERVAL_1 2
+#define DRI_CONF_VBLANK_ALWAYS_SYNC 3
+
+static inline void
+dri3_fence_reset(xcb_connection_t *c, struct loader_dri3_buffer *buffer)
+{
+   xshmfence_reset(buffer->shm_fence);
+}
+
+static inline void
+dri3_fence_set(struct loader_dri3_buffer *buffer)
+{
+   xshmfence_trigger(buffer->shm_fence);
+}
+
+static inline void
+dri3_fence_trigger(xcb_connection_t *c, struct loader_dri3_buffer *buffer)
+{
+   xcb_sync_trigger_fence(c, buffer->sync_fence);
+}
+
+static inline void
+dri3_fence_await(xcb_connection_t *c, struct loader_dri3_buffer *buffer)
+{
+   xcb_flush(c);
+   xshmfence_await(buffer->shm_fence);
+}
+
+static void
+dri3_update_num_back(struct loader_dri3_drawable *draw)
+{
+   draw->num_back = 1;
+   if (draw->flipping) {
+      if (!draw->is_pixmap &&
+          !(draw->present_capabilities & XCB_PRESENT_CAPABILITY_ASYNC))
+         draw->num_back++;
+      draw->num_back++;
+   }
+   if (draw->vtable->get_swap_interval(draw) == 0)
+      draw->num_back++;
+}
+
+void
+loader_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval)
+{
+   interval = draw->vtable->clamp_swap_interval(draw, interval);
+   draw->vtable->set_swap_interval(draw, interval);
+   dri3_update_num_back(draw);
+}
+
+/** dri3_free_render_buffer
+ *
+ * Free everything associated with one render buffer including pixmap, fence
+ * stuff and the driver image
+ */
+static void
+dri3_free_render_buffer(struct loader_dri3_drawable *draw,
+                        struct loader_dri3_buffer *buffer)
+{
+   if (buffer->own_pixmap)
+      xcb_free_pixmap(draw->conn, buffer->pixmap);
+   xcb_sync_destroy_fence(draw->conn, buffer->sync_fence);
+   xshmfence_unmap_shm(buffer->shm_fence);
+   (draw->ext->image->destroyImage)(buffer->image);
+   if (buffer->linear_buffer)
+      (draw->ext->image->destroyImage)(buffer->linear_buffer);
+   free(buffer);
+}
+
+void
+loader_dri3_drawable_fini(struct loader_dri3_drawable *draw)
+{
+   int i;
+
+   (draw->ext->core->destroyDrawable)(draw->dri_drawable);
+
+   for (i = 0; i < LOADER_DRI3_NUM_BUFFERS; i++) {
+      if (draw->buffers[i])
+         dri3_free_render_buffer(draw, draw->buffers[i]);
+   }
+
+   if (draw->special_event)
+      xcb_unregister_for_special_event(draw->conn, draw->special_event);
+}
+
+int
+loader_dri3_drawable_init(xcb_connection_t *conn,
+                          xcb_drawable_t drawable,
+                          __DRIscreen *dri_screen,
+                          bool is_different_gpu,
+                          const __DRIconfig *dri_config,
+                          struct loader_dri3_extensions *ext,
+                          struct loader_dri3_vtable *vtable,
+                          struct loader_dri3_drawable *draw)
+{
+   xcb_get_geometry_cookie_t cookie;
+   xcb_get_geometry_reply_t *reply;
+   xcb_generic_error_t *error;
+   GLint vblank_mode = DRI_CONF_VBLANK_DEF_INTERVAL_1;
+   int swap_interval;
+
+   draw->conn = conn;
+   draw->ext = ext;
+   draw->vtable = vtable;
+   draw->drawable = drawable;
+   draw->dri_screen = dri_screen;
+   draw->is_different_gpu = is_different_gpu;
+
+   draw->have_back = 0;
+   draw->have_fake_front = 0;
+   draw->first_init = true;
+
+   if (draw->ext->config)
+      draw->ext->config->configQueryi(draw->dri_screen,
+                                      "vblank_mode", &vblank_mode);
+
+   switch (vblank_mode) {
+   case DRI_CONF_VBLANK_NEVER:
+   case DRI_CONF_VBLANK_DEF_INTERVAL_0:
+      swap_interval = 0;
+      break;
+   case DRI_CONF_VBLANK_DEF_INTERVAL_1:
+   case DRI_CONF_VBLANK_ALWAYS_SYNC:
+   default:
+      swap_interval = 1;
+      break;
+   }
+   draw->vtable->set_swap_interval(draw, swap_interval);
+
+   dri3_update_num_back(draw);
+
+   /* Create a new drawable */
+   draw->dri_drawable =
+      (draw->ext->image_driver->createNewDrawable)(dri_screen,
+                                                   dri_config,
+                                                   draw);
+
+   if (!draw->dri_drawable)
+      return 1;
+
+   cookie = xcb_get_geometry(draw->conn, draw->drawable);
+   reply = xcb_get_geometry_reply(draw->conn, cookie, &error);
+   if (reply == NULL || error != NULL) {
+      draw->ext->core->destroyDrawable(draw->dri_drawable);
+      return 1;
+   }
+
+   draw->width = reply->width;
+   draw->height = reply->height;
+   draw->depth = reply->depth;
+   draw->vtable->set_drawable_size(draw, draw->width, draw->height);
+   free(reply);
+
+   /*
+    * Make sure server has the same swap interval we do for the new
+    * drawable.
+    */
+   loader_dri3_set_swap_interval(draw, swap_interval);
+
+   return 0;
+}
+
+/*
+ * Process one Present event
+ */
+static void
+dri3_handle_present_event(struct loader_dri3_drawable *draw,
+                          xcb_present_generic_event_t *ge)
+{
+   switch (ge->evtype) {
+   case XCB_PRESENT_CONFIGURE_NOTIFY: {
+      xcb_present_configure_notify_event_t *ce = (void *) ge;
+
+      draw->width = ce->width;
+      draw->height = ce->height;
+      draw->vtable->set_drawable_size(draw, draw->width, draw->height);
+      break;
+   }
+   case XCB_PRESENT_COMPLETE_NOTIFY: {
+      xcb_present_complete_notify_event_t *ce = (void *) ge;
+
+      /* Compute the processed SBC number from the received 32-bit serial number
+       * merged with the upper 32-bits of the sent 64-bit serial number while
+       * checking for wrap.
+       */
+      if (ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP) {
+         draw->recv_sbc = (draw->send_sbc & 0xffffffff00000000LL) | ce->serial;
+         if (draw->recv_sbc > draw->send_sbc)
+            draw->recv_sbc -= 0x100000000;
+         switch (ce->mode) {
+         case XCB_PRESENT_COMPLETE_MODE_FLIP:
+            draw->flipping = true;
+            break;
+         case XCB_PRESENT_COMPLETE_MODE_COPY:
+            draw->flipping = false;
+            break;
+         }
+         dri3_update_num_back(draw);
+
+         if (draw->vtable->show_fps)
+            draw->vtable->show_fps(draw, ce->ust);
+
+         draw->ust = ce->ust;
+         draw->msc = ce->msc;
+      } else {
+         draw->recv_msc_serial = ce->serial;
+         draw->notify_ust = ce->ust;
+         draw->notify_msc = ce->msc;
+      }
+      break;
+   }
+   case XCB_PRESENT_EVENT_IDLE_NOTIFY: {
+      xcb_present_idle_notify_event_t *ie = (void *) ge;
+      int b;
+
+      for (b = 0; b < sizeof(draw->buffers) / sizeof(draw->buffers[0]); b++) {
+         struct loader_dri3_buffer *buf = draw->buffers[b];
+
+         if (buf && buf->pixmap == ie->pixmap) {
+            buf->busy = 0;
+            if (draw->num_back <= b && b < LOADER_DRI3_MAX_BACK) {
+               dri3_free_render_buffer(draw, buf);
+               draw->buffers[b] = NULL;
+            }
+            break;
+         }
+      }
+      break;
+   }
+   }
+   free(ge);
+}
+
+static bool
+dri3_wait_for_event(struct loader_dri3_drawable *draw)
+{
+   xcb_generic_event_t *ev;
+   xcb_present_generic_event_t *ge;
+
+   xcb_flush(draw->conn);
+   ev = xcb_wait_for_special_event(draw->conn, draw->special_event);
+   if (!ev)
+      return false;
+   ge = (void *) ev;
+   dri3_handle_present_event(draw, ge);
+   return true;
+}
+
+/** loader_dri3_wait_for_msc
+ *
+ * Get the X server to send an event when the target msc/divisor/remainder is
+ * reached.
+ */
+bool
+loader_dri3_wait_for_msc(struct loader_dri3_drawable *draw,
+                         int64_t target_msc,
+                         int64_t divisor, int64_t remainder,
+                         int64_t *ust, int64_t *msc, int64_t *sbc)
+{
+   uint32_t msc_serial;
+
+   msc_serial = ++draw->send_msc_serial;
+   xcb_present_notify_msc(draw->conn,
+                          draw->drawable,
+                          msc_serial,
+                          target_msc,
+                          divisor,
+                          remainder);
+
+   xcb_flush(draw->conn);
+
+   /* Wait for the event */
+   if (draw->special_event) {
+      while ((int32_t) (msc_serial - draw->recv_msc_serial) > 0) {
+         if (!dri3_wait_for_event(draw))
+            return false;
+      }
+   }
+
+   *ust = draw->notify_ust;
+   *msc = draw->notify_msc;
+   *sbc = draw->recv_sbc;
+
+   return true;
+}
+
+/** loader_dri3_wait_for_sbc
+ *
+ * Wait for the completed swap buffer count to reach the specified
+ * target. Presumably the application knows that this will be reached with
+ * outstanding complete events, or we're going to be here awhile.
+ */
+int
+loader_dri3_wait_for_sbc(struct loader_dri3_drawable *draw,
+                         int64_t target_sbc, int64_t *ust,
+                         int64_t *msc, int64_t *sbc)
+{
+   /* From the GLX_OML_sync_control spec:
+    *
+    *     "If <target_sbc> = 0, the function will block until all previous
+    *      swaps requested with glXSwapBuffersMscOML for that window have
+    *      completed."
+    */
+   if (!target_sbc)
+      target_sbc = draw->send_sbc;
+
+   while (draw->recv_sbc < target_sbc) {
+      if (!dri3_wait_for_event(draw))
+         return 0;
+   }
+
+   *ust = draw->ust;
+   *msc = draw->msc;
+   *sbc = draw->recv_sbc;
+   return 1;
+}
+
+/** loader_dri3_find_back
+ *
+ * Find an idle back buffer. If there isn't one, then
+ * wait for a present idle notify event from the X server
+ */
+static int
+dri3_find_back(struct loader_dri3_drawable *draw)
+{
+   int b;
+   xcb_generic_event_t *ev;
+   xcb_present_generic_event_t *ge;
+
+   for (;;) {
+      for (b = 0; b < draw->num_back; b++) {
+         int id = LOADER_DRI3_BACK_ID((b + draw->cur_back) % draw->num_back);
+         struct loader_dri3_buffer *buffer = draw->buffers[id];
+
+         if (!buffer || !buffer->busy) {
+            draw->cur_back = id;
+            return id;
+         }
+      }
+      xcb_flush(draw->conn);
+      ev = xcb_wait_for_special_event(draw->conn, draw->special_event);
+      if (!ev)
+         return -1;
+      ge = (void *) ev;
+      dri3_handle_present_event(draw, ge);
+   }
+}
+
+static xcb_gcontext_t
+dri3_drawable_gc(struct loader_dri3_drawable *draw)
+{
+   if (!draw->gc) {
+      uint32_t v = 0;
+      xcb_create_gc(draw->conn,
+                    (draw->gc = xcb_generate_id(draw->conn)),
+                    draw->drawable,
+                    XCB_GC_GRAPHICS_EXPOSURES,
+                    &v);
+   }
+   return draw->gc;
+}
+
+
+static struct loader_dri3_buffer *
+dri3_back_buffer(struct loader_dri3_drawable *draw)
+{
+   return draw->buffers[LOADER_DRI3_BACK_ID(draw->cur_back)];
+}
+
+static struct loader_dri3_buffer *
+dri3_fake_front_buffer(struct loader_dri3_drawable *draw)
+{
+   return draw->buffers[LOADER_DRI3_FRONT_ID];
+}
+
+static void
+dri3_copy_area(xcb_connection_t *c,
+               xcb_drawable_t    src_drawable,
+               xcb_drawable_t    dst_drawable,
+               xcb_gcontext_t    gc,
+               int16_t           src_x,
+               int16_t           src_y,
+               int16_t           dst_x,
+               int16_t           dst_y,
+               uint16_t          width,
+               uint16_t          height)
+{
+   xcb_void_cookie_t cookie;
+
+   cookie = xcb_copy_area_checked(c,
+                                  src_drawable,
+                                  dst_drawable,
+                                  gc,
+                                  src_x,
+                                  src_y,
+                                  dst_x,
+                                  dst_y,
+                                  width,
+                                  height);
+   xcb_discard_reply(c, cookie.sequence);
+}
+
+/**
+ * Asks the driver to flush any queued work necessary for serializing with the
+ * X command stream, and optionally the slightly more strict requirement of
+ * glFlush() equivalence (which would require flushing even if nothing had
+ * been drawn to a window system framebuffer, for example).
+ */
+void
+loader_dri3_flush(struct loader_dri3_drawable *draw,
+                  unsigned flags,
+                  enum __DRI2throttleReason throttle_reason)
+{
+   /* NEED TO CHECK WHETHER CONTEXT IS NULL */
+   __DRIcontext *dri_context = draw->vtable->get_dri_context(draw);
+
+   if (dri_context) {
+      draw->ext->flush->flush_with_flags(dri_context, draw->dri_drawable,
+                                         flags, throttle_reason);
+   }
+}
+
+void
+loader_dri3_copy_sub_buffer(struct loader_dri3_drawable *draw,
+                            int x, int y,
+                            int width, int height,
+                            bool flush)
+{
+   struct loader_dri3_buffer *back;
+   unsigned flags = __DRI2_FLUSH_DRAWABLE;
+   __DRIcontext *dri_context;
+
+   dri_context = draw->vtable->get_dri_context(draw);
+
+   /* Check we have the right attachments */
+   if (!draw->have_back || draw->is_pixmap)
+      return;
+
+   if (flush)
+      flags |= __DRI2_FLUSH_CONTEXT;
+   loader_dri3_flush(draw, flags, __DRI2_THROTTLE_SWAPBUFFER);
+
+   back = dri3_back_buffer(draw);
+   y = draw->height - y - height;
+
+   if (draw->is_different_gpu && draw->vtable->in_current_context(draw)) {
+      /* Update the linear buffer part of the back buffer
+       * for the dri3_copy_area operation
+       */
+      draw->ext->image->blitImage(dri_context,
+                                  back->linear_buffer,
+                                  back->image,
+                                  0, 0, back->width,
+                                  back->height,
+                                  0, 0, back->width,
+                                  back->height, __BLIT_FLAG_FLUSH);
+      /* We use blitImage to update our fake front,
+       */
+      if (draw->have_fake_front)
+         draw->ext->image->blitImage(dri_context,
+                                     dri3_fake_front_buffer(draw)->image,
+                                     back->image,
+                                     x, y, width, height,
+                                     x, y, width, height, __BLIT_FLAG_FLUSH);
+   }
+
+   dri3_fence_reset(draw->conn, back);
+   dri3_copy_area(draw->conn,
+                  dri3_back_buffer(draw)->pixmap,
+                  draw->drawable,
+                  dri3_drawable_gc(draw),
+                  x, y, x, y, width, height);
+   dri3_fence_trigger(draw->conn, back);
+   /* Refresh the fake front (if present) after we just damaged the real
+    * front.
+    */
+   if (draw->have_fake_front && !draw->is_different_gpu) {
+      dri3_fence_reset(draw->conn, dri3_fake_front_buffer(draw));
+      dri3_copy_area(draw->conn,
+                     dri3_back_buffer(draw)->pixmap,
+                     dri3_fake_front_buffer(draw)->pixmap,
+                     dri3_drawable_gc(draw),
+                     x, y, x, y, width, height);
+      dri3_fence_trigger(draw->conn, dri3_fake_front_buffer(draw));
+      dri3_fence_await(draw->conn, dri3_fake_front_buffer(draw));
+   }
+   dri3_fence_await(draw->conn, back);
+}
+
+void
+loader_dri3_copy_drawable(struct loader_dri3_drawable *draw,
+                          xcb_drawable_t dest,
+                          xcb_drawable_t src)
+{
+   loader_dri3_flush(draw, __DRI2_FLUSH_DRAWABLE, 0);
+
+   dri3_fence_reset(draw->conn, dri3_fake_front_buffer(draw));
+   dri3_copy_area(draw->conn,
+                  src, dest,
+                  dri3_drawable_gc(draw),
+                  0, 0, 0, 0, draw->width, draw->height);
+   dri3_fence_trigger(draw->conn, dri3_fake_front_buffer(draw));
+   dri3_fence_await(draw->conn, dri3_fake_front_buffer(draw));
+}
+
+void
+loader_dri3_wait_x(struct loader_dri3_drawable *draw)
+{
+   struct loader_dri3_buffer *front;
+   __DRIcontext *dri_context;
+
+   if (draw == NULL || !draw->have_fake_front)
+      return;
+
+   front = dri3_fake_front_buffer(draw);
+   dri_context = draw->vtable->get_dri_context(draw);
+
+   loader_dri3_copy_drawable(draw, front->pixmap, draw->drawable);
+
+   /* In the psc->is_different_gpu case, the linear buffer has been updated,
+    * but not yet the tiled buffer.
+    * Copy back to the tiled buffer we use for rendering.
+    * Note that we don't need flushing.
+    */
+   if (draw->is_different_gpu && draw->vtable->in_current_context(draw))
+      draw->ext->image->blitImage(dri_context,
+                                  front->image,
+                                  front->linear_buffer,
+                                  0, 0, front->width,
+                                  front->height,
+                                  0, 0, front->width,
+                                  front->height, 0);
+}
+
+void
+loader_dri3_wait_gl(struct loader_dri3_drawable *draw)
+{
+   struct loader_dri3_buffer *front;
+   __DRIcontext *dri_context;
+
+   if (draw == NULL || !draw->have_fake_front)
+      return;
+
+   front = dri3_fake_front_buffer(draw);
+   dri_context = draw->vtable->get_dri_context(draw);
+
+   /* In the psc->is_different_gpu case, we update the linear_buffer
+    * before updating the real front.
+    */
+   if (draw->is_different_gpu && draw->vtable->in_current_context(draw))
+      draw->ext->image->blitImage(dri_context,
+                                  front->linear_buffer,
+                                  front->image,
+                                  0, 0, front->width,
+                                  front->height,
+                                  0, 0, front->width,
+                                  front->height, __BLIT_FLAG_FLUSH);
+   loader_dri3_copy_drawable(draw, draw->drawable, front->pixmap);
+}
+
+/** dri3_flush_present_events
+ *
+ * Process any present events that have been received from the X server
+ */
+static void
+dri3_flush_present_events(struct loader_dri3_drawable *draw)
+{
+   /* Check to see if any configuration changes have occurred
+    * since we were last invoked
+    */
+   if (draw->special_event) {
+      xcb_generic_event_t    *ev;
+
+      while ((ev = xcb_poll_for_special_event(draw->conn,
+                                              draw->special_event)) != NULL) {
+         xcb_present_generic_event_t *ge = (void *) ev;
+         dri3_handle_present_event(draw, ge);
+      }
+   }
+}
+
+/** loader_dri3_swap_buffers_msc
+ *
+ * Make the current back buffer visible using the present extension
+ */
+int64_t
+loader_dri3_swap_buffers_msc(struct loader_dri3_drawable *draw,
+                             int64_t target_msc, int64_t divisor,
+                             int64_t remainder, unsigned flush_flags,
+                             bool force_copy)
+{
+   struct loader_dri3_buffer *back;
+   __DRIcontext *dri_context;
+   int64_t ret = 0;
+   uint32_t options = XCB_PRESENT_OPTION_NONE;
+   int swap_interval;
+
+   dri_context = draw->vtable->get_dri_context(draw);
+   swap_interval = draw->vtable->get_swap_interval(draw);
+
+   draw->vtable->flush_drawable(draw, flush_flags);
+
+   back = draw->buffers[LOADER_DRI3_BACK_ID(draw->cur_back)];
+   if (draw->is_different_gpu && back) {
+      /* Update the linear buffer before presenting the pixmap */
+      draw->ext->image->blitImage(dri_context,
+                                  back->linear_buffer,
+                                  back->image,
+                                  0, 0, back->width,
+                                  back->height,
+                                  0, 0, back->width,
+                                  back->height, __BLIT_FLAG_FLUSH);
+      /* Update the fake front */
+      if (draw->have_fake_front)
+         draw->ext->image->blitImage(dri_context,
+                                     draw->buffers[LOADER_DRI3_FRONT_ID]->image,
+                                     back->image,
+                                     0, 0, draw->width, draw->height,
+                                     0, 0, draw->width, draw->height,
+                                     __BLIT_FLAG_FLUSH);
+   }
+
+   dri3_flush_present_events(draw);
+
+   if (back && !draw->is_pixmap) {
+      dri3_fence_reset(draw->conn, back);
+
+      /* Compute when we want the frame shown by taking the last known
+       * successful MSC and adding in a swap interval for each outstanding swap
+       * request. target_msc=divisor=remainder=0 means "Use glXSwapBuffers()
+       * semantic"
+       */
+      ++draw->send_sbc;
+      if (target_msc == 0 && divisor == 0 && remainder == 0)
+         target_msc = draw->msc + swap_interval *
+                      (draw->send_sbc - draw->recv_sbc);
+      else if (divisor == 0 && remainder > 0) {
+         /* From the GLX_OML_sync_control spec:
+          *     "If <divisor> = 0, the swap will occur when MSC becomes
+          *      greater than or equal to <target_msc>."
+          *
+          * Note that there's no mention of the remainder.  The Present
+          * extension throws BadValue for remainder != 0 with divisor == 0, so
+          * just drop the passed in value.
+          */
+         remainder = 0;
+      }
+
+      /* From the GLX_EXT_swap_control spec
+       * and the EGL 1.4 spec (page 53):
+       *
+       *     "If <interval> is set to a value of 0, buffer swaps are not
+       *      synchronized to a video frame."
+       *
+       * Implementation note: It is possible to enable triple buffering
+       * behaviour by not using XCB_PRESENT_OPTION_ASYNC, but this should not be
+       * the default.
+       */
+      if (swap_interval == 0)
+          options |= XCB_PRESENT_OPTION_ASYNC;
+      if (force_copy)
+          options |= XCB_PRESENT_OPTION_COPY;
+
+      back->busy = 1;
+      back->last_swap = draw->send_sbc;
+      xcb_present_pixmap(draw->conn,
+                         draw->drawable,
+                         back->pixmap,
+                         (uint32_t) draw->send_sbc,
+                         0,                                    /* valid */
+                         0,                                    /* update */
+                         0,                                    /* x_off */
+                         0,                                    /* y_off */
+                         None,                                 /* target_crtc */
+                         None,
+                         back->sync_fence,
+                         options,
+                         target_msc,
+                         divisor,
+                         remainder, 0, NULL);
+      ret = (int64_t) draw->send_sbc;
+
+      /* If there's a fake front, then copy the source back buffer
+       * to the fake front to keep it up to date. This needs
+       * to reset the fence and make future users block until
+       * the X server is done copying the bits
+       */
+      if (draw->have_fake_front && !draw->is_different_gpu) {
+         dri3_fence_reset(draw->conn, draw->buffers[LOADER_DRI3_FRONT_ID]);
+         dri3_copy_area(draw->conn,
+                        back->pixmap,
+                        draw->buffers[LOADER_DRI3_FRONT_ID]->pixmap,
+                        dri3_drawable_gc(draw),
+                        0, 0, 0, 0,
+                        draw->width, draw->height);
+         dri3_fence_trigger(draw->conn, draw->buffers[LOADER_DRI3_FRONT_ID]);
+      }
+      xcb_flush(draw->conn);
+      if (draw->stamp)
+         ++(*draw->stamp);
+   }
+
+   (draw->ext->flush->invalidate)(draw->dri_drawable);
+
+   return ret;
+}
+
+int
+loader_dri3_query_buffer_age(struct loader_dri3_drawable *draw)
+{
+   int back_id = LOADER_DRI3_BACK_ID(dri3_find_back(draw));
+
+   if (back_id < 0 || !draw->buffers[back_id])
+      return 0;
+
+   if (draw->buffers[back_id]->last_swap != 0)
+      return draw->send_sbc - draw->buffers[back_id]->last_swap + 1;
+   else
+      return 0;
+}
+
+/** loader_dri3_open
+ *
+ * Wrapper around xcb_dri3_open
+ */
+int
+loader_dri3_open(xcb_connection_t *conn,
+                 xcb_window_t root,
+                 uint32_t provider)
+{
+   xcb_dri3_open_cookie_t       cookie;
+   xcb_dri3_open_reply_t        *reply;
+   int                          fd;
+
+   cookie = xcb_dri3_open(conn,
+                          root,
+                          provider);
+
+   reply = xcb_dri3_open_reply(conn, cookie, NULL);
+   if (!reply)
+      return -1;
+
+   if (reply->nfd != 1) {
+      free(reply);
+      return -1;
+   }
+
+   fd = xcb_dri3_open_reply_fds(conn, reply)[0];
+   fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+
+   return fd;
+}
+
+static uint32_t
+dri3_cpp_for_format(uint32_t format) {
+   switch (format) {
+   case  __DRI_IMAGE_FORMAT_R8:
+      return 1;
+   case  __DRI_IMAGE_FORMAT_RGB565:
+   case  __DRI_IMAGE_FORMAT_GR88:
+      return 2;
+   case  __DRI_IMAGE_FORMAT_XRGB8888:
+   case  __DRI_IMAGE_FORMAT_ARGB8888:
+   case  __DRI_IMAGE_FORMAT_ABGR8888:
+   case  __DRI_IMAGE_FORMAT_XBGR8888:
+   case  __DRI_IMAGE_FORMAT_XRGB2101010:
+   case  __DRI_IMAGE_FORMAT_ARGB2101010:
+   case  __DRI_IMAGE_FORMAT_SARGB8:
+      return 4;
+   case  __DRI_IMAGE_FORMAT_NONE:
+   default:
+      return 0;
+   }
+}
+
+/** loader_dri3_alloc_render_buffer
+ *
+ * Use the driver createImage function to construct a __DRIimage, then
+ * get a file descriptor for that and create an X pixmap from that
+ *
+ * Allocate an xshmfence for synchronization
+ */
+static struct loader_dri3_buffer *
+dri3_alloc_render_buffer(struct loader_dri3_drawable *draw, unsigned int format,
+                         int width, int height, int depth)
+{
+   struct loader_dri3_buffer *buffer;
+   __DRIimage *pixmap_buffer;
+   xcb_pixmap_t pixmap;
+   xcb_sync_fence_t sync_fence;
+   struct xshmfence *shm_fence;
+   int buffer_fd, fence_fd;
+   int stride;
+
+   /* Create an xshmfence object and
+    * prepare to send that to the X server
+    */
+
+   fence_fd = xshmfence_alloc_shm();
+   if (fence_fd < 0)
+      return NULL;
+
+   shm_fence = xshmfence_map_shm(fence_fd);
+   if (shm_fence == NULL)
+      goto no_shm_fence;
+
+   /* Allocate the image from the driver
+    */
+   buffer = calloc(1, sizeof *buffer);
+   if (!buffer)
+      goto no_buffer;
+
+   buffer->cpp = dri3_cpp_for_format(format);
+   if (!buffer->cpp)
+      goto no_image;
+
+   if (!draw->is_different_gpu) {
+      buffer->image = (draw->ext->image->createImage)(draw->dri_screen,
+                                                      width, height,
+                                                      format,
+                                                      __DRI_IMAGE_USE_SHARE |
+                                                      __DRI_IMAGE_USE_SCANOUT,
+                                                      buffer);
+      pixmap_buffer = buffer->image;
+
+      if (!buffer->image)
+         goto no_image;
+   } else {
+      buffer->image = (draw->ext->image->createImage)(draw->dri_screen,
+                                                      width, height,
+                                                      format,
+                                                      0,
+                                                      buffer);
+
+      if (!buffer->image)
+         goto no_image;
+
+      buffer->linear_buffer =
+        (draw->ext->image->createImage)(draw->dri_screen,
+                                        width, height, format,
+                                        __DRI_IMAGE_USE_SHARE |
+                                           __DRI_IMAGE_USE_LINEAR,
+                                        buffer);
+      pixmap_buffer = buffer->linear_buffer;
+
+      if (!buffer->linear_buffer)
+         goto no_linear_buffer;
+   }
+
+   /* X wants the stride, so ask the image for it
+    */
+   if (!(draw->ext->image->queryImage)(pixmap_buffer, __DRI_IMAGE_ATTRIB_STRIDE,
+                                       &stride))
+      goto no_buffer_attrib;
+
+   buffer->pitch = stride;
+
+   if (!(draw->ext->image->queryImage)(pixmap_buffer, __DRI_IMAGE_ATTRIB_FD,
+                                       &buffer_fd))
+      goto no_buffer_attrib;
+
+   xcb_dri3_pixmap_from_buffer(draw->conn,
+                               (pixmap = xcb_generate_id(draw->conn)),
+                               draw->drawable,
+                               buffer->size,
+                               width, height, buffer->pitch,
+                               depth, buffer->cpp * 8,
+                               buffer_fd);
+
+   xcb_dri3_fence_from_fd(draw->conn,
+                          pixmap,
+                          (sync_fence = xcb_generate_id(draw->conn)),
+                          false,
+                          fence_fd);
+
+   buffer->pixmap = pixmap;
+   buffer->own_pixmap = true;
+   buffer->sync_fence = sync_fence;
+   buffer->shm_fence = shm_fence;
+   buffer->width = width;
+   buffer->height = height;
+
+   /* Mark the buffer as idle
+    */
+   dri3_fence_set(buffer);
+
+   return buffer;
+
+no_buffer_attrib:
+   (draw->ext->image->destroyImage)(pixmap_buffer);
+no_linear_buffer:
+   if (draw->is_different_gpu)
+      (draw->ext->image->destroyImage)(buffer->image);
+no_image:
+   free(buffer);
+no_buffer:
+   xshmfence_unmap_shm(shm_fence);
+no_shm_fence:
+   close(fence_fd);
+   return NULL;
+}
+
+/** loader_dri3_update_drawable
+ *
+ * Called the first time we use the drawable and then
+ * after we receive present configure notify events to
+ * track the geometry of the drawable
+ */
+static int
+dri3_update_drawable(__DRIdrawable *driDrawable,
+                     struct loader_dri3_drawable *draw)
+{
+   if (draw->first_init) {
+      xcb_get_geometry_cookie_t                 geom_cookie;
+      xcb_get_geometry_reply_t                  *geom_reply;
+      xcb_void_cookie_t                         cookie;
+      xcb_generic_error_t                       *error;
+      xcb_present_query_capabilities_cookie_t   present_capabilities_cookie;
+      xcb_present_query_capabilities_reply_t    *present_capabilities_reply;
+
+      draw->first_init = false;
+
+      /* Try to select for input on the window.
+       *
+       * If the drawable is a window, this will get our events
+       * delivered.
+       *
+       * Otherwise, we'll get a BadWindow error back from this request which
+       * will let us know that the drawable is a pixmap instead.
+       */
+
+      draw->eid = xcb_generate_id(draw->conn);
+      cookie =
+         xcb_present_select_input_checked(draw->conn, draw->eid, draw->drawable,
+                                          XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY |
+                                          XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY |
+                                          XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY);
+
+      present_capabilities_cookie =
+         xcb_present_query_capabilities(draw->conn, draw->drawable);
+
+      /* Create an XCB event queue to hold present events outside of the usual
+       * application event queue
+       */
+      draw->special_event = xcb_register_for_special_xge(draw->conn,
+                                                         &xcb_present_id,
+                                                         draw->eid,
+                                                         draw->stamp);
+      geom_cookie = xcb_get_geometry(draw->conn, draw->drawable);
+
+      geom_reply = xcb_get_geometry_reply(draw->conn, geom_cookie, NULL);
+
+      if (!geom_reply)
+         return false;
+
+      draw->width = geom_reply->width;
+      draw->height = geom_reply->height;
+      draw->depth = geom_reply->depth;
+      draw->vtable->set_drawable_size(draw, draw->width, draw->height);
+
+      free(geom_reply);
+
+      draw->is_pixmap = false;
+
+      /* Check to see if our select input call failed. If it failed with a
+       * BadWindow error, then assume the drawable is a pixmap. Destroy the
+       * special event queue created above and mark the drawable as a pixmap
+       */
+
+      error = xcb_request_check(draw->conn, cookie);
+
+      present_capabilities_reply =
+          xcb_present_query_capabilities_reply(draw->conn,
+                                               present_capabilities_cookie,
+                                               NULL);
+
+      if (present_capabilities_reply) {
+         draw->present_capabilities = present_capabilities_reply->capabilities;
+         free(present_capabilities_reply);
+      } else
+         draw->present_capabilities = 0;
+
+      if (error) {
+         if (error->error_code != BadWindow) {
+            free(error);
+            return false;
+         }
+         draw->is_pixmap = true;
+         xcb_unregister_for_special_event(draw->conn, draw->special_event);
+         draw->special_event = NULL;
+      }
+   }
+   dri3_flush_present_events(draw);
+   return true;
+}
+
+/* the DRIimage createImage function takes __DRI_IMAGE_FORMAT codes, while
+ * the createImageFromFds call takes __DRI_IMAGE_FOURCC codes. To avoid
+ * complete confusion, just deal in __DRI_IMAGE_FORMAT codes for now and
+ * translate to __DRI_IMAGE_FOURCC codes in the call to createImageFromFds
+ */
+static int
+image_format_to_fourcc(int format)
+{
+
+   /* Convert from __DRI_IMAGE_FORMAT to __DRI_IMAGE_FOURCC (sigh) */
+   switch (format) {
+   case __DRI_IMAGE_FORMAT_SARGB8: return __DRI_IMAGE_FOURCC_SARGB8888;
+   case __DRI_IMAGE_FORMAT_RGB565: return __DRI_IMAGE_FOURCC_RGB565;
+   case __DRI_IMAGE_FORMAT_XRGB8888: return __DRI_IMAGE_FOURCC_XRGB8888;
+   case __DRI_IMAGE_FORMAT_ARGB8888: return __DRI_IMAGE_FOURCC_ARGB8888;
+   case __DRI_IMAGE_FORMAT_ABGR8888: return __DRI_IMAGE_FOURCC_ABGR8888;
+   case __DRI_IMAGE_FORMAT_XBGR8888: return __DRI_IMAGE_FOURCC_XBGR8888;
+   }
+   return 0;
+}
+
+/** dri3_get_pixmap_buffer
+ *
+ * Get the DRM object for a pixmap from the X server and
+ * wrap that with a __DRIimage structure using createImageFromFds
+ */
+static struct loader_dri3_buffer *
+dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, unsigned int format,
+                       enum loader_dri3_buffer_type buffer_type,
+                       struct loader_dri3_drawable *draw)
+{
+   int                                  buf_id = loader_dri3_pixmap_buf_id(buffer_type);
+   struct loader_dri3_buffer            *buffer = draw->buffers[buf_id];
+   xcb_drawable_t                       pixmap;
+   xcb_dri3_buffer_from_pixmap_cookie_t bp_cookie;
+   xcb_dri3_buffer_from_pixmap_reply_t  *bp_reply;
+   int                                  *fds;
+   xcb_sync_fence_t                     sync_fence;
+   struct xshmfence                     *shm_fence;
+   int                                  fence_fd;
+   __DRIimage                           *image_planar;
+   int                                  stride, offset;
+
+   if (buffer)
+      return buffer;
+
+   pixmap = draw->drawable;
+
+   buffer = calloc(1, sizeof *buffer);
+   if (!buffer)
+      goto no_buffer;
+
+   fence_fd = xshmfence_alloc_shm();
+   if (fence_fd < 0)
+      goto no_fence;
+   shm_fence = xshmfence_map_shm(fence_fd);
+   if (shm_fence == NULL) {
+      close (fence_fd);
+      goto no_fence;
+   }
+
+   xcb_dri3_fence_from_fd(draw->conn,
+                          pixmap,
+                          (sync_fence = xcb_generate_id(draw->conn)),
+                          false,
+                          fence_fd);
+
+   /* Get an FD for the pixmap object
+    */
+   bp_cookie = xcb_dri3_buffer_from_pixmap(draw->conn, pixmap);
+   bp_reply = xcb_dri3_buffer_from_pixmap_reply(draw->conn,
+                                                bp_cookie, NULL);
+   if (!bp_reply)
+      goto no_image;
+   fds = xcb_dri3_buffer_from_pixmap_reply_fds(draw->conn, bp_reply);
+
+   stride = bp_reply->stride;
+   offset = 0;
+
+   /* createImageFromFds creates a wrapper __DRIimage structure which
+    * can deal with multiple planes for things like Yuv images. So, once
+    * we've gotten the planar wrapper, pull the single plane out of it and
+    * discard the wrapper.
+    */
+   image_planar =
+      (draw->ext->image->createImageFromFds)(draw->dri_screen, bp_reply->width,
+                                             bp_reply->height,
+                                             image_format_to_fourcc(format),
+                                             fds, 1, &stride, &offset, buffer);
+   close(fds[0]);
+   if (!image_planar)
+      goto no_image;
+
+   buffer->image = (draw->ext->image->fromPlanar)(image_planar, 0, buffer);
+
+   (draw->ext->image->destroyImage)(image_planar);
+
+   if (!buffer->image)
+      goto no_image;
+
+   buffer->pixmap = pixmap;
+   buffer->own_pixmap = false;
+   buffer->width = bp_reply->width;
+   buffer->height = bp_reply->height;
+   buffer->buffer_type = buffer_type;
+   buffer->shm_fence = shm_fence;
+   buffer->sync_fence = sync_fence;
+
+   draw->buffers[buf_id] = buffer;
+   return buffer;
+
+no_image:
+   xcb_sync_destroy_fence(draw->conn, sync_fence);
+   xshmfence_unmap_shm(shm_fence);
+no_fence:
+   free(buffer);
+no_buffer:
+   return NULL;
+}
+
+/** dri3_get_buffer
+ *
+ * Find a front or back buffer, allocating new ones as necessary
+ */
+static struct loader_dri3_buffer *
+dri3_get_buffer(__DRIdrawable *driDrawable,
+                unsigned int format,
+                enum loader_dri3_buffer_type buffer_type,
+                struct loader_dri3_drawable *draw)
+{
+   struct loader_dri3_buffer *buffer;
+   int buf_id;
+   __DRIcontext *dri_context;
+
+   dri_context = draw->vtable->get_dri_context(draw);
+
+   if (buffer_type == loader_dri3_buffer_back) {
+      buf_id = dri3_find_back(draw);
+
+      if (buf_id < 0)
+         return NULL;
+   } else {
+      buf_id = LOADER_DRI3_FRONT_ID;
+   }
+
+   buffer = draw->buffers[buf_id];
+
+   /* Allocate a new buffer if there isn't an old one, or if that
+    * old one is the wrong size
+    */
+   if (!buffer || buffer->width != draw->width ||
+       buffer->height != draw->height) {
+      struct loader_dri3_buffer *new_buffer;
+
+      /* Allocate the new buffers
+       */
+      new_buffer = dri3_alloc_render_buffer(draw,
+                                                   format,
+                                                   draw->width,
+                                                   draw->height,
+                                                   draw->depth);
+      if (!new_buffer)
+         return NULL;
+
+      /* When resizing, copy the contents of the old buffer, waiting for that
+       * copy to complete using our fences before proceeding
+       */
+      switch (buffer_type) {
+      case loader_dri3_buffer_back:
+         if (buffer) {
+            if (!buffer->linear_buffer) {
+               dri3_fence_reset(draw->conn, new_buffer);
+               dri3_fence_await(draw->conn, buffer);
+               dri3_copy_area(draw->conn,
+                              buffer->pixmap,
+                              new_buffer->pixmap,
+                              dri3_drawable_gc(draw),
+                              0, 0, 0, 0,
+                              draw->width, draw->height);
+               dri3_fence_trigger(draw->conn, new_buffer);
+            } else if (draw->vtable->in_current_context(draw)) {
+               draw->ext->image->blitImage(dri_context,
+                                           new_buffer->image,
+                                           buffer->image,
+                                           0, 0, draw->width, draw->height,
+                                           0, 0, draw->width, draw->height, 0);
+            }
+            dri3_free_render_buffer(draw, buffer);
+         }
+         break;
+      case loader_dri3_buffer_front:
+         dri3_fence_reset(draw->conn, new_buffer);
+         dri3_copy_area(draw->conn,
+                        draw->drawable,
+                        new_buffer->pixmap,
+                        dri3_drawable_gc(draw),
+                        0, 0, 0, 0,
+                        draw->width, draw->height);
+         dri3_fence_trigger(draw->conn, new_buffer);
+
+         if (new_buffer->linear_buffer &&
+             draw->vtable->in_current_context(draw)) {
+            dri3_fence_await(draw->conn, new_buffer);
+            draw->ext->image->blitImage(dri_context,
+                                        new_buffer->image,
+                                        new_buffer->linear_buffer,
+                                        0, 0, draw->width, draw->height,
+                                        0, 0, draw->width, draw->height, 0);
+         }
+         break;
+      }
+      buffer = new_buffer;
+      buffer->buffer_type = buffer_type;
+      draw->buffers[buf_id] = buffer;
+   }
+   dri3_fence_await(draw->conn, buffer);
+
+   /* Return the requested buffer */
+   return buffer;
+}
+
+/** dri3_free_buffers
+ *
+ * Free the front bufffer or all of the back buffers. Used
+ * when the application changes which buffers it needs
+ */
+static void
+dri3_free_buffers(__DRIdrawable *driDrawable,
+                  enum loader_dri3_buffer_type buffer_type,
+                  struct loader_dri3_drawable *draw)
+{
+   struct loader_dri3_buffer *buffer;
+   int first_id;
+   int n_id;
+   int buf_id;
+
+   switch (buffer_type) {
+   case loader_dri3_buffer_back:
+      first_id = LOADER_DRI3_BACK_ID(0);
+      n_id = LOADER_DRI3_MAX_BACK;
+      break;
+   case loader_dri3_buffer_front:
+      first_id = LOADER_DRI3_FRONT_ID;
+      n_id = 1;
+   }
+
+   for (buf_id = first_id; buf_id < first_id + n_id; buf_id++) {
+      buffer = draw->buffers[buf_id];
+      if (buffer) {
+         dri3_free_render_buffer(draw, buffer);
+         draw->buffers[buf_id] = NULL;
+      }
+   }
+}
+
+/** loader_dri3_get_buffers
+ *
+ * The published buffer allocation API.
+ * Returns all of the necessary buffers, allocating
+ * as needed.
+ */
+int
+loader_dri3_get_buffers(__DRIdrawable *driDrawable,
+                        unsigned int format,
+                        uint32_t *stamp,
+                        void *loaderPrivate,
+                        uint32_t buffer_mask,
+                        struct __DRIimageList *buffers)
+{
+   struct loader_dri3_drawable *draw = loaderPrivate;
+   struct loader_dri3_buffer   *front, *back;
+
+   buffers->image_mask = 0;
+   buffers->front = NULL;
+   buffers->back = NULL;
+
+   front = NULL;
+   back = NULL;
+
+   if (!dri3_update_drawable(driDrawable, draw))
+      return false;
+
+   /* pixmaps always have front buffers */
+   if (draw->is_pixmap)
+      buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
+
+   if (buffer_mask & __DRI_IMAGE_BUFFER_FRONT) {
+      /* All pixmaps are owned by the server gpu.
+       * When we use a different gpu, we can't use the pixmap
+       * as buffer since it is potentially tiled a way
+       * our device can't understand. In this case, use
+       * a fake front buffer. Hopefully the pixmap
+       * content will get synced with the fake front
+       * buffer.
+       */
+      if (draw->is_pixmap && !draw->is_different_gpu)
+         front = dri3_get_pixmap_buffer(driDrawable,
+                                               format,
+                                               loader_dri3_buffer_front,
+                                               draw);
+      else
+         front = dri3_get_buffer(driDrawable,
+                                        format,
+                                        loader_dri3_buffer_front,
+                                        draw);
+
+      if (!front)
+         return false;
+   } else {
+      dri3_free_buffers(driDrawable, loader_dri3_buffer_front, draw);
+      draw->have_fake_front = 0;
+   }
+
+   if (buffer_mask & __DRI_IMAGE_BUFFER_BACK) {
+      back = dri3_get_buffer(driDrawable,
+                                    format,
+                                    loader_dri3_buffer_back,
+                                    draw);
+      if (!back)
+         return false;
+      draw->have_back = 1;
+   } else {
+      dri3_free_buffers(driDrawable, loader_dri3_buffer_back, draw);
+      draw->have_back = 0;
+   }
+
+   if (front) {
+      buffers->image_mask |= __DRI_IMAGE_BUFFER_FRONT;
+      buffers->front = front->image;
+      draw->have_fake_front = draw->is_different_gpu || !draw->is_pixmap;
+   }
+
+   if (back) {
+      buffers->image_mask |= __DRI_IMAGE_BUFFER_BACK;
+      buffers->back = back->image;
+   }
+
+   draw->stamp = stamp;
+
+   return true;
+}
diff --git a/src/loader/loader_dri3_helper.h b/src/loader/loader_dri3_helper.h
new file mode 100644
index 00000000000..54c2a52f5e1
--- /dev/null
+++ b/src/loader/loader_dri3_helper.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright © 2013 Keith Packard
+ * Copyright © 2015 Boyan Ding
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef LOADER_DRI3_HEADER_H
+#define LOADER_DRI3_HEADER_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <xcb/xcb.h>
+#include <xcb/present.h>
+
+#include <GL/gl.h>
+#include <GL/internal/dri_interface.h>
+
+enum loader_dri3_buffer_type {
+   loader_dri3_buffer_back = 0,
+   loader_dri3_buffer_front = 1
+};
+
+struct loader_dri3_buffer {
+   __DRIimage   *image;
+   __DRIimage   *linear_buffer;
+   uint32_t     pixmap;
+
+   /* Synchronization between the client and X server is done using an
+    * xshmfence that is mapped into an X server SyncFence. This lets the
+    * client check whether the X server is done using a buffer with a simple
+    * xshmfence call, rather than going to read X events from the wire.
+    *
+    * However, we can only wait for one xshmfence to be triggered at a time,
+    * so we need to know *which* buffer is going to be idle next. We do that
+    * by waiting for a PresentIdleNotify event. When that event arrives, the
+    * 'busy' flag gets cleared and the client knows that the fence has been
+    * triggered, and that the wait call will not block.
+    */
+
+   uint32_t     sync_fence;     /* XID of X SyncFence object */
+   struct xshmfence *shm_fence; /* pointer to xshmfence object */
+   bool         busy;           /* Set on swap, cleared on IdleNotify */
+   bool         own_pixmap;     /* We allocated the pixmap ID, free on destroy */
+
+   uint32_t     size;
+   uint32_t     pitch;
+   uint32_t     cpp;
+   uint32_t     flags;
+   uint32_t     width, height;
+   uint64_t     last_swap;
+
+   enum loader_dri3_buffer_type        buffer_type;
+};
+
+
+#define LOADER_DRI3_MAX_BACK   4
+#define LOADER_DRI3_BACK_ID(i) (i)
+#define LOADER_DRI3_FRONT_ID   (LOADER_DRI3_MAX_BACK)
+
+static inline int
+loader_dri3_pixmap_buf_id(enum loader_dri3_buffer_type buffer_type)
+{
+   if (buffer_type == loader_dri3_buffer_back)
+      return LOADER_DRI3_BACK_ID(0);
+   else
+      return LOADER_DRI3_FRONT_ID;
+}
+
+struct loader_dri3_extensions {
+   const __DRIcoreExtension *core;
+   const __DRIimageDriverExtension *image_driver;
+   const __DRI2flushExtension *flush;
+   const __DRI2configQueryExtension *config;
+   const __DRItexBufferExtension *tex_buffer;
+   const __DRIimageExtension *image;
+};
+
+struct loader_dri3_drawable;
+
+struct loader_dri3_vtable {
+   int (*get_swap_interval)(struct loader_dri3_drawable *);
+   int (*clamp_swap_interval)(struct loader_dri3_drawable *, int);
+   void (*set_swap_interval)(struct loader_dri3_drawable *, int);
+   void (*set_drawable_size)(struct loader_dri3_drawable *, int, int);
+   bool (*in_current_context)(struct loader_dri3_drawable *);
+   __DRIcontext *(*get_dri_context)(struct loader_dri3_drawable *);
+   void (*flush_drawable)(struct loader_dri3_drawable *, unsigned);
+   void (*show_fps)(struct loader_dri3_drawable *, uint64_t);
+};
+
+#define LOADER_DRI3_NUM_BUFFERS (1 + LOADER_DRI3_MAX_BACK)
+
+struct loader_dri3_drawable {
+   xcb_connection_t *conn;
+   __DRIdrawable *dri_drawable;
+   xcb_drawable_t drawable;
+   int width;
+   int height;
+   int depth;
+   uint8_t have_back;
+   uint8_t have_fake_front;
+   uint8_t is_pixmap;
+   uint8_t flipping;
+
+   /* Information about the GPU owning the buffer */
+   __DRIscreen *dri_screen;
+   bool is_different_gpu;
+
+   /* Present extension capabilities
+    */
+   uint32_t present_capabilities;
+
+   /* SBC numbers are tracked by using the serial numbers
+    * in the present request and complete events
+    */
+   uint64_t send_sbc;
+   uint64_t recv_sbc;
+
+   /* Last received UST/MSC values for pixmap present complete */
+   uint64_t ust, msc;
+
+   /* Last received UST/MSC values from present notify msc event */
+   uint64_t notify_ust, notify_msc;
+
+   /* Serial numbers for tracking wait_for_msc events */
+   uint32_t send_msc_serial;
+   uint32_t recv_msc_serial;
+
+   struct loader_dri3_buffer *buffers[LOADER_DRI3_NUM_BUFFERS];
+   int cur_back;
+   int num_back;
+
+   uint32_t *stamp;
+
+   xcb_present_event_t eid;
+   xcb_gcontext_t gc;
+   xcb_special_event_t *special_event;
+
+   bool first_init;
+
+   struct loader_dri3_extensions *ext;
+   struct loader_dri3_vtable *vtable;
+};
+
+void
+loader_dri3_set_swap_interval(struct loader_dri3_drawable *draw,
+                              int interval);
+
+void
+loader_dri3_drawable_fini(struct loader_dri3_drawable *draw);
+
+int
+loader_dri3_drawable_init(xcb_connection_t *conn,
+                          xcb_drawable_t drawable,
+                          __DRIscreen *dri_screen,
+                          bool is_different_gpu,
+                          const __DRIconfig *dri_config,
+                          struct loader_dri3_extensions *ext,
+                          struct loader_dri3_vtable *vtable,
+                          struct loader_dri3_drawable*);
+
+bool loader_dri3_wait_for_msc(struct loader_dri3_drawable *draw,
+                              int64_t target_msc,
+                              int64_t divisor, int64_t remainder,
+                              int64_t *ust, int64_t *msc, int64_t *sbc);
+
+int64_t
+loader_dri3_swap_buffers_msc(struct loader_dri3_drawable *draw,
+                             int64_t target_msc, int64_t divisor,
+                             int64_t remainder, unsigned flush_flags,
+                             bool force_copy);
+
+int
+loader_dri3_wait_for_sbc(struct loader_dri3_drawable *draw,
+                         int64_t target_sbc, int64_t *ust,
+                         int64_t *msc, int64_t *sbc);
+
+int loader_dri3_query_buffer_age(struct loader_dri3_drawable *draw);
+
+void
+loader_dri3_flush(struct loader_dri3_drawable *draw,
+                  unsigned flags,
+                  enum __DRI2throttleReason throttle_reason);
+
+void
+loader_dri3_copy_sub_buffer(struct loader_dri3_drawable *draw,
+                            int x, int y,
+                            int width, int height,
+                            bool flush);
+
+void
+loader_dri3_copy_drawable(struct loader_dri3_drawable *draw,
+                          xcb_drawable_t dest,
+                          xcb_drawable_t src);
+
+void
+loader_dri3_wait_x(struct loader_dri3_drawable *draw);
+
+void
+loader_dri3_wait_gl(struct loader_dri3_drawable *draw);
+
+int loader_dri3_open(xcb_connection_t *conn,
+                     xcb_window_t root,
+                     uint32_t provider);
+
+int
+loader_dri3_get_buffers(__DRIdrawable *driDrawable,
+                        unsigned int format,
+                        uint32_t *stamp,
+                        void *loaderPrivate,
+                        uint32_t buffer_mask,
+                        struct __DRIimageList *buffers);
+
+#endif

From fdacbc439edc1d26865a99524d1a9efc9edb4c0d Mon Sep 17 00:00:00 2001
From: Boyan Ding <boyan.j.ding@gmail.com>
Date: Tue, 21 Jul 2015 23:43:55 +0800
Subject: [PATCH 031/335] glx/dri3: Convert to use dri3 helper in loader
 library
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2: From Martin Peres
 - convert to the new drawable interface
 - delete dead code after the dropping of some vfuncs
 - delete the width and height attributes since they are found in the helper

Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
---
 src/glx/Makefile.am |    2 +-
 src/glx/dri3_glx.c  | 1435 +++++--------------------------------------
 src/glx/dri3_priv.h |   94 +--
 3 files changed, 145 insertions(+), 1386 deletions(-)

diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am
index d08ff7a8dd2..00925455b07 100644
--- a/src/glx/Makefile.am
+++ b/src/glx/Makefile.am
@@ -134,7 +134,7 @@ libglx_la_SOURCES += \
 	dri3_glx.c \
 	dri3_priv.h
 
-libglx_la_LIBADD += $(XCB_DRI3_LIBS)
+libglx_la_LIBADD += $(top_builddir)/src/loader/libloader_dri3_helper.la
 endif
 
 if HAVE_APPLEDRI
diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
index 96f13e6a07b..ee243126731 100644
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -78,39 +78,108 @@
 #include "loader.h"
 #include "dri2.h"
 
+static struct dri3_drawable *
+loader_drawable_to_dri3_drawable(struct loader_dri3_drawable *draw) {
+   size_t offset = offsetof(struct dri3_drawable, loader_drawable);
+   return (struct dri3_drawable *)(((void*) draw) - offset);
+}
+
+static int
+glx_dri3_get_swap_interval(struct loader_dri3_drawable *draw)
+{
+   struct dri3_drawable *priv = loader_drawable_to_dri3_drawable(draw);
+
+   return priv->swap_interval;
+}
+
+static int
+glx_dri3_clamp_swap_interval(struct loader_dri3_drawable *draw, int interval)
+{
+   return interval;
+}
+
+static void
+glx_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval)
+{
+   struct dri3_drawable *priv = loader_drawable_to_dri3_drawable(draw);
+
+   priv->swap_interval = interval;
+}
+
+static void
+glx_dri3_set_drawable_size(struct loader_dri3_drawable *draw,
+                           int width, int height)
+{
+   /* Nothing to do */
+}
+
+static bool
+glx_dri3_in_current_context(struct loader_dri3_drawable *draw)
+{
+   struct dri3_drawable *priv = loader_drawable_to_dri3_drawable(draw);
+   struct dri3_context *pcp = (struct dri3_context *) __glXGetCurrentContext();
+   struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc;
+
+   return (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base;
+}
+
+static __DRIcontext *
+glx_dri3_get_dri_context(struct loader_dri3_drawable *draw)
+{
+   struct glx_context *gc = __glXGetCurrentContext();
+
+   if (gc) {
+      struct dri3_context *dri3Ctx = (struct dri3_context *) gc;
+      return dri3Ctx->driContext;
+   }
+
+   return NULL;
+}
+
+static void
+glx_dri3_flush_drawable(struct loader_dri3_drawable *draw, unsigned flags)
+{
+   loader_dri3_flush(draw, flags, __DRI2_THROTTLE_SWAPBUFFER);
+}
+
+static void
+glx_dri3_show_fps(struct loader_dri3_drawable *draw, uint64_t current_ust)
+{
+   struct dri3_drawable *priv = loader_drawable_to_dri3_drawable(draw);
+   const uint64_t interval =
+      ((struct dri3_screen *) priv->base.psc)->show_fps_interval;
+
+   if (!interval)
+      return;
+
+   priv->frames++;
+
+   /* DRI3+Present together uses microseconds for UST. */
+   if (priv->previous_ust + interval * 1000000 <= current_ust) {
+      if (priv->previous_ust) {
+         fprintf(stderr, "libGL: FPS = %.1f\n",
+                 ((uint64_t) priv->frames * 1000000) /
+                 (double)(current_ust - priv->previous_ust));
+      }
+      priv->frames = 0;
+      priv->previous_ust = current_ust;
+   }
+}
+
+static struct loader_dri3_vtable glx_dri3_vtable = {
+   .get_swap_interval = glx_dri3_get_swap_interval,
+   .clamp_swap_interval = glx_dri3_clamp_swap_interval,
+   .set_swap_interval = glx_dri3_set_swap_interval,
+   .set_drawable_size = glx_dri3_set_drawable_size,
+   .in_current_context = glx_dri3_in_current_context,
+   .get_dri_context = glx_dri3_get_dri_context,
+   .flush_drawable = glx_dri3_flush_drawable,
+   .show_fps = glx_dri3_show_fps,
+};
+
+
 static const struct glx_context_vtable dri3_context_vtable;
 
-static inline void
-dri3_fence_reset(xcb_connection_t *c, struct dri3_buffer *buffer)
-{
-   xshmfence_reset(buffer->shm_fence);
-}
-
-static inline void
-dri3_fence_set(struct dri3_buffer *buffer)
-{
-   xshmfence_trigger(buffer->shm_fence);
-}
-
-static inline void
-dri3_fence_trigger(xcb_connection_t *c, struct dri3_buffer *buffer)
-{
-   xcb_sync_trigger_fence(c, buffer->sync_fence);
-}
-
-static inline void
-dri3_fence_await(xcb_connection_t *c, struct dri3_buffer *buffer)
-{
-   xcb_flush(c);
-   xshmfence_await(buffer->shm_fence);
-}
-
-static inline Bool
-dri3_fence_triggered(struct dri3_buffer *buffer)
-{
-   return xshmfence_query(buffer->shm_fence);
-}
-
 static void
 dri3_destroy_context(struct glx_context *context)
 {
@@ -143,7 +212,8 @@ dri3_bind_context(struct glx_context *context, struct glx_context *old,
       return GLXBadDrawable;
 
    if (!(*psc->core->bindContext) (pcp->driContext,
-                                   pdraw->driDrawable, pread->driDrawable))
+                                   pdraw->loader_drawable.dri_drawable,
+                                   pread->loader_drawable.dri_drawable))
       return GLXBadContext;
 
    return Success;
@@ -264,39 +334,13 @@ dri3_create_context(struct glx_screen *base,
                                       0, NULL, &error);
 }
 
-static void
-dri3_free_render_buffer(struct dri3_drawable *pdraw, struct dri3_buffer *buffer);
-
-static void
-dri3_update_num_back(struct dri3_drawable *priv)
-{
-   priv->num_back = 1;
-   if (priv->flipping) {
-      if (!priv->is_pixmap && !(priv->present_capabilities & XCB_PRESENT_CAPABILITY_ASYNC))
-         priv->num_back++;
-      priv->num_back++;
-   }
-   if (priv->swap_interval == 0)
-      priv->num_back++;
-}
-
 static void
 dri3_destroy_drawable(__GLXDRIdrawable *base)
 {
-   struct dri3_screen *psc = (struct dri3_screen *) base->psc;
    struct dri3_drawable *pdraw = (struct dri3_drawable *) base;
-   xcb_connection_t     *c = XGetXCBConnection(pdraw->base.psc->dpy);
-   int i;
 
-   (*psc->core->destroyDrawable) (pdraw->driDrawable);
+   loader_dri3_drawable_fini(&pdraw->loader_drawable);
 
-   for (i = 0; i < DRI3_NUM_BUFFERS; i++) {
-      if (pdraw->buffers[i])
-         dri3_free_render_buffer(pdraw, pdraw->buffers[i]);
-   }
-
-   if (pdraw->special_event)
-      xcb_unregister_for_special_event(c, pdraw->special_event);
    free(pdraw);
 }
 
@@ -307,7 +351,6 @@ dri3_create_drawable(struct glx_screen *base, XID xDrawable,
    struct dri3_drawable *pdraw;
    struct dri3_screen *psc = (struct dri3_screen *) base;
    __GLXDRIconfigPrivate *config = (__GLXDRIconfigPrivate *) config_base;
-   GLint vblank_mode = DRI_CONF_VBLANK_DEF_INTERVAL_1;
 
    pdraw = calloc(1, sizeof(*pdraw));
    if (!pdraw)
@@ -317,158 +360,21 @@ dri3_create_drawable(struct glx_screen *base, XID xDrawable,
    pdraw->base.xDrawable = xDrawable;
    pdraw->base.drawable = drawable;
    pdraw->base.psc = &psc->base;
-   pdraw->swap_interval = 1; /* default may be overridden below */
-   pdraw->have_back = 0;
-   pdraw->have_fake_front = 0;
-
-   if (psc->config)
-      psc->config->configQueryi(psc->driScreen,
-                                "vblank_mode", &vblank_mode);
-
-   switch (vblank_mode) {
-   case DRI_CONF_VBLANK_NEVER:
-   case DRI_CONF_VBLANK_DEF_INTERVAL_0:
-      pdraw->swap_interval = 0;
-      break;
-   case DRI_CONF_VBLANK_DEF_INTERVAL_1:
-   case DRI_CONF_VBLANK_ALWAYS_SYNC:
-   default:
-      pdraw->swap_interval = 1;
-      break;
-   }
-
-   dri3_update_num_back(pdraw);
 
    (void) __glXInitialize(psc->base.dpy);
 
-   /* Create a new drawable */
-   pdraw->driDrawable =
-      (*psc->image_driver->createNewDrawable) (psc->driScreen,
-                                               config->driConfig, pdraw);
-
-   if (!pdraw->driDrawable) {
+   if (loader_dri3_drawable_init(XGetXCBConnection(base->dpy),
+                                 xDrawable, psc->driScreen,
+                                 psc->is_different_gpu, config->driConfig,
+                                 &psc->loader_dri3_ext, &glx_dri3_vtable,
+                                 &pdraw->loader_drawable)) {
       free(pdraw);
       return NULL;
    }
 
-   /*
-    * Make sure server has the same swap interval we do for the new
-    * drawable.
-    */
-   if (psc->vtable.setSwapInterval)
-      psc->vtable.setSwapInterval(&pdraw->base, pdraw->swap_interval);
-
    return &pdraw->base;
 }
 
-static void
-show_fps(struct dri3_drawable *draw, uint64_t current_ust)
-{
-   const uint64_t interval =
-      ((struct dri3_screen *) draw->base.psc)->show_fps_interval;
-
-   draw->frames++;
-
-   /* DRI3+Present together uses microseconds for UST. */
-   if (draw->previous_ust + interval * 1000000 <= current_ust) {
-      if (draw->previous_ust) {
-         fprintf(stderr, "libGL: FPS = %.1f\n",
-                 ((uint64_t) draw->frames * 1000000) /
-                 (double)(current_ust - draw->previous_ust));
-      }
-      draw->frames = 0;
-      draw->previous_ust = current_ust;
-   }
-}
-
-/*
- * Process one Present event
- */
-static void
-dri3_handle_present_event(struct dri3_drawable *priv, xcb_present_generic_event_t *ge)
-{
-   struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc;
-
-   switch (ge->evtype) {
-   case XCB_PRESENT_CONFIGURE_NOTIFY: {
-      xcb_present_configure_notify_event_t *ce = (void *) ge;
-
-      priv->width = ce->width;
-      priv->height = ce->height;
-      break;
-   }
-   case XCB_PRESENT_COMPLETE_NOTIFY: {
-      xcb_present_complete_notify_event_t *ce = (void *) ge;
-
-      /* Compute the processed SBC number from the received 32-bit serial number merged
-       * with the upper 32-bits of the sent 64-bit serial number while checking for
-       * wrap
-       */
-      if (ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP) {
-         priv->recv_sbc = (priv->send_sbc & 0xffffffff00000000LL) | ce->serial;
-         if (priv->recv_sbc > priv->send_sbc)
-            priv->recv_sbc -= 0x100000000;
-         switch (ce->mode) {
-         case XCB_PRESENT_COMPLETE_MODE_FLIP:
-            priv->flipping = true;
-            break;
-         case XCB_PRESENT_COMPLETE_MODE_COPY:
-            priv->flipping = false;
-            break;
-         }
-         dri3_update_num_back(priv);
-
-         if (psc->show_fps_interval)
-            show_fps(priv, ce->ust);
-
-         priv->ust = ce->ust;
-         priv->msc = ce->msc;
-      } else {
-         priv->recv_msc_serial = ce->serial;
-         priv->notify_ust = ce->ust;
-         priv->notify_msc = ce->msc;
-      }
-      break;
-   }
-   case XCB_PRESENT_EVENT_IDLE_NOTIFY: {
-      xcb_present_idle_notify_event_t *ie = (void *) ge;
-      int b;
-
-      for (b = 0; b < sizeof (priv->buffers) / sizeof (priv->buffers[0]); b++) {
-         struct dri3_buffer        *buf = priv->buffers[b];
-
-         if (buf && buf->pixmap == ie->pixmap) {
-            buf->busy = 0;
-            if (priv->num_back <= b && b < DRI3_MAX_BACK) {
-               dri3_free_render_buffer(priv, buf);
-               priv->buffers[b] = NULL;
-            }
-            break;
-         }
-      }
-      break;
-   }
-   }
-   free(ge);
-}
-
-static bool
-dri3_wait_for_event(__GLXDRIdrawable *pdraw)
-{
-   xcb_connection_t *c = XGetXCBConnection(pdraw->psc->dpy);
-   struct dri3_drawable *priv = (struct dri3_drawable *) pdraw;
-   xcb_generic_event_t *ev;
-   xcb_present_generic_event_t *ge;
-
-   xcb_flush(c);
-   ev = xcb_wait_for_special_event(c, priv->special_event);
-   if (!ev)
-      return false;
-   ge = (void *) ev;
-   dri3_handle_present_event(priv, ge);
-   return true;
-}
-
 /** dri3_wait_for_msc
  *
  * Get the X server to send an event when the target msc/divisor/remainder is
@@ -478,32 +384,10 @@ static int
 dri3_wait_for_msc(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor,
                   int64_t remainder, int64_t *ust, int64_t *msc, int64_t *sbc)
 {
-   xcb_connection_t *c = XGetXCBConnection(pdraw->psc->dpy);
    struct dri3_drawable *priv = (struct dri3_drawable *) pdraw;
-   uint32_t msc_serial;
 
-   /* Ask for the an event for the target MSC */
-   msc_serial = ++priv->send_msc_serial;
-   xcb_present_notify_msc(c,
-                          priv->base.xDrawable,
-                          msc_serial,
-                          target_msc,
-                          divisor,
-                          remainder);
-
-   xcb_flush(c);
-
-   /* Wait for the event */
-   if (priv->special_event) {
-      while ((int32_t) (msc_serial - priv->recv_msc_serial) > 0) {
-         if (!dri3_wait_for_event(pdraw))
-            return 0;
-      }
-   }
-
-   *ust = priv->notify_ust;
-   *msc = priv->notify_msc;
-   *sbc = priv->recv_sbc;
+   loader_dri3_wait_for_msc(&priv->loader_drawable, target_msc, divisor,
+                            remainder, ust, msc, sbc);
 
    return 1;
 }
@@ -532,101 +416,8 @@ dri3_wait_for_sbc(__GLXDRIdrawable *pdraw, int64_t target_sbc, int64_t *ust,
 {
    struct dri3_drawable *priv = (struct dri3_drawable *) pdraw;
 
-   /* From the GLX_OML_sync_control spec:
-    *
-    *     "If <target_sbc> = 0, the function will block until all previous
-    *      swaps requested with glXSwapBuffersMscOML for that window have
-    *      completed."
-    */
-   if (!target_sbc)
-      target_sbc = priv->send_sbc;
-
-   while (priv->recv_sbc < target_sbc) {
-      if (!dri3_wait_for_event(pdraw))
-         return 0;
-   }
-
-   *ust = priv->ust;
-   *msc = priv->msc;
-   *sbc = priv->recv_sbc;
-   return 1;
-}
-
-/**
- * Asks the driver to flush any queued work necessary for serializing with the
- * X command stream, and optionally the slightly more strict requirement of
- * glFlush() equivalence (which would require flushing even if nothing had
- * been drawn to a window system framebuffer, for example).
- */
-static void
-dri3_flush(struct dri3_screen *psc,
-           struct dri3_drawable *draw,
-           unsigned flags,
-           enum __DRI2throttleReason throttle_reason)
-{
-   struct glx_context *gc = __glXGetCurrentContext();
-
-   if (gc) {
-      struct dri3_context *dri3Ctx = (struct dri3_context *)gc;
-
-      (*psc->f->flush_with_flags)(dri3Ctx->driContext, draw->driDrawable, flags, throttle_reason);
-   }
-}
-
-static xcb_gcontext_t
-dri3_drawable_gc(struct dri3_drawable *priv)
-{
-   if (!priv->gc) {
-      uint32_t v;
-      xcb_connection_t *c = XGetXCBConnection(priv->base.psc->dpy);
-
-      v = 0;
-      xcb_create_gc(c,
-                    (priv->gc = xcb_generate_id(c)),
-                    priv->base.xDrawable,
-                    XCB_GC_GRAPHICS_EXPOSURES,
-                    &v);
-   }
-   return priv->gc;
-}
-
-static struct dri3_buffer *
-dri3_back_buffer(struct dri3_drawable *priv)
-{
-   return priv->buffers[DRI3_BACK_ID(priv->cur_back)];
-}
-
-static struct dri3_buffer *
-dri3_fake_front_buffer(struct dri3_drawable *priv)
-{
-   return priv->buffers[DRI3_FRONT_ID];
-}
-
-static void
-dri3_copy_area (xcb_connection_t *c  /**< */,
-                xcb_drawable_t    src_drawable  /**< */,
-                xcb_drawable_t    dst_drawable  /**< */,
-                xcb_gcontext_t    gc  /**< */,
-                int16_t           src_x  /**< */,
-                int16_t           src_y  /**< */,
-                int16_t           dst_x  /**< */,
-                int16_t           dst_y  /**< */,
-                uint16_t          width  /**< */,
-                uint16_t          height  /**< */)
-{
-   xcb_void_cookie_t cookie;
-
-   cookie = xcb_copy_area_checked(c,
-                                  src_drawable,
-                                  dst_drawable,
-                                  gc,
-                                  src_x,
-                                  src_y,
-                                  dst_x,
-                                  dst_y,
-                                  width,
-                                  height);
-   xcb_discard_reply(c, cookie.sequence);
+   return loader_dri3_wait_for_sbc(&priv->loader_drawable, target_sbc,
+                                   ust, msc, sbc);
 }
 
 static void
@@ -635,144 +426,27 @@ dri3_copy_sub_buffer(__GLXDRIdrawable *pdraw, int x, int y,
                      Bool flush)
 {
    struct dri3_drawable *priv = (struct dri3_drawable *) pdraw;
-   struct dri3_screen *psc = (struct dri3_screen *) pdraw->psc;
-   struct dri3_context *pcp = (struct dri3_context *) __glXGetCurrentContext();
-   xcb_connection_t     *c = XGetXCBConnection(priv->base.psc->dpy);
-   struct dri3_buffer *back;
 
-   unsigned flags = __DRI2_FLUSH_DRAWABLE;
-
-   /* Check we have the right attachments */
-   if (!priv->have_back || priv->is_pixmap)
-      return;
-
-   if (flush)
-      flags |= __DRI2_FLUSH_CONTEXT;
-   dri3_flush(psc, priv, flags, __DRI2_THROTTLE_SWAPBUFFER);
-
-   back = dri3_back_buffer(priv);
-   y = priv->height - y - height;
-
-   if (psc->is_different_gpu && (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base) {
-      /* Update the linear buffer part of the back buffer
-       * for the dri3_copy_area operation
-       */
-      psc->image->blitImage(pcp->driContext,
-                            back->linear_buffer,
-                            back->image,
-                            0, 0, back->width,
-                            back->height,
-                            0, 0, back->width,
-                            back->height, __BLIT_FLAG_FLUSH);
-      /* We use blitImage to update our fake front,
-       */
-      if (priv->have_fake_front)
-         psc->image->blitImage(pcp->driContext,
-                               dri3_fake_front_buffer(priv)->image,
-                               back->image,
-                               x, y, width, height,
-                               x, y, width, height, __BLIT_FLAG_FLUSH);
-   }
-
-   dri3_fence_reset(c, back);
-   dri3_copy_area(c,
-                  dri3_back_buffer(priv)->pixmap,
-                  priv->base.xDrawable,
-                  dri3_drawable_gc(priv),
-                  x, y, x, y, width, height);
-   dri3_fence_trigger(c, back);
-   /* Refresh the fake front (if present) after we just damaged the real
-    * front.
-    */
-   if (priv->have_fake_front && !psc->is_different_gpu) {
-      dri3_fence_reset(c, dri3_fake_front_buffer(priv));
-      dri3_copy_area(c,
-                     dri3_back_buffer(priv)->pixmap,
-                     dri3_fake_front_buffer(priv)->pixmap,
-                     dri3_drawable_gc(priv),
-                     x, y, x, y, width, height);
-      dri3_fence_trigger(c, dri3_fake_front_buffer(priv));
-      dri3_fence_await(c, dri3_fake_front_buffer(priv));
-   }
-   dri3_fence_await(c, back);
-}
-
-static void
-dri3_copy_drawable(struct dri3_drawable *priv, Drawable dest, Drawable src)
-{
-   struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc;
-   xcb_connection_t     *c = XGetXCBConnection(priv->base.psc->dpy);
-
-   dri3_flush(psc, priv, __DRI2_FLUSH_DRAWABLE, 0);
-
-   dri3_fence_reset(c, dri3_fake_front_buffer(priv));
-   dri3_copy_area(c,
-                  src, dest,
-                  dri3_drawable_gc(priv),
-                  0, 0, 0, 0, priv->width, priv->height);
-   dri3_fence_trigger(c, dri3_fake_front_buffer(priv));
-   dri3_fence_await(c, dri3_fake_front_buffer(priv));
+   loader_dri3_copy_sub_buffer(&priv->loader_drawable, x, y,
+                               width, height, flush);
 }
 
 static void
 dri3_wait_x(struct glx_context *gc)
 {
-   struct dri3_context *pcp = (struct dri3_context *) gc;
    struct dri3_drawable *priv = (struct dri3_drawable *)
       GetGLXDRIDrawable(gc->currentDpy, gc->currentDrawable);
-   struct dri3_screen *psc;
-   struct dri3_buffer *front;
 
-   if (priv == NULL || !priv->have_fake_front)
-      return;
-
-   psc = (struct dri3_screen *) priv->base.psc;
-   front = dri3_fake_front_buffer(priv);
-
-   dri3_copy_drawable(priv, front->pixmap, priv->base.xDrawable);
-
-   /* In the psc->is_different_gpu case, the linear buffer has been updated,
-    * but not yet the tiled buffer.
-    * Copy back to the tiled buffer we use for rendering.
-    * Note that we don't need flushing.
-    */
-   if (psc->is_different_gpu && (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base)
-      psc->image->blitImage(pcp->driContext,
-                            front->image,
-                            front->linear_buffer,
-                            0, 0, front->width,
-                            front->height,
-                            0, 0, front->width,
-                            front->height, 0);
+   loader_dri3_wait_x(&priv->loader_drawable);
 }
 
 static void
 dri3_wait_gl(struct glx_context *gc)
 {
-   struct dri3_context *pcp = (struct dri3_context *) gc;
    struct dri3_drawable *priv = (struct dri3_drawable *)
       GetGLXDRIDrawable(gc->currentDpy, gc->currentDrawable);
-   struct dri3_screen *psc;
-   struct dri3_buffer *front;
 
-   if (priv == NULL || !priv->have_fake_front)
-      return;
-
-   psc = (struct dri3_screen *) priv->base.psc;
-   front = dri3_fake_front_buffer(priv);
-
-   /* In the psc->is_different_gpu case, we update the linear_buffer
-    * before updating the real front.
-    */
-   if (psc->is_different_gpu && (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base)
-      psc->image->blitImage(pcp->driContext,
-                            front->linear_buffer,
-                            front->image,
-                            0, 0, front->width,
-                            front->height,
-                            0, 0, front->width,
-                            front->height, __BLIT_FLAG_FLUSH);
-   dri3_copy_drawable(priv, priv->base.xDrawable, front->pixmap);
+   loader_dri3_wait_gl(&priv->loader_drawable);
 }
 
 /**
@@ -782,8 +456,8 @@ dri3_wait_gl(struct glx_context *gc)
 static void
 dri3_flush_front_buffer(__DRIdrawable *driDrawable, void *loaderPrivate)
 {
-   struct glx_context *gc;
-   struct dri3_drawable *pdraw = loaderPrivate;
+   struct loader_dri3_drawable *draw = loaderPrivate;
+   struct dri3_drawable *pdraw = loader_drawable_to_dri3_drawable(draw);
    struct dri3_screen *psc;
 
    if (!pdraw)
@@ -796,699 +470,9 @@ dri3_flush_front_buffer(__DRIdrawable *driDrawable, void *loaderPrivate)
 
    (void) __glXInitialize(psc->base.dpy);
 
-   gc = __glXGetCurrentContext();
+   loader_dri3_flush(draw, __DRI2_FLUSH_DRAWABLE, __DRI2_THROTTLE_FLUSHFRONT);
 
-   dri3_flush(psc, pdraw, __DRI2_FLUSH_DRAWABLE, __DRI2_THROTTLE_FLUSHFRONT);
-
-   dri3_wait_gl(gc);
-}
-
-static uint32_t
-dri3_cpp_for_format(uint32_t format) {
-   switch (format) {
-   case  __DRI_IMAGE_FORMAT_R8:
-      return 1;
-   case  __DRI_IMAGE_FORMAT_RGB565:
-   case  __DRI_IMAGE_FORMAT_GR88:
-      return 2;
-   case  __DRI_IMAGE_FORMAT_XRGB8888:
-   case  __DRI_IMAGE_FORMAT_ARGB8888:
-   case  __DRI_IMAGE_FORMAT_ABGR8888:
-   case  __DRI_IMAGE_FORMAT_XBGR8888:
-   case  __DRI_IMAGE_FORMAT_XRGB2101010:
-   case  __DRI_IMAGE_FORMAT_ARGB2101010:
-   case  __DRI_IMAGE_FORMAT_SARGB8:
-      return 4;
-   case  __DRI_IMAGE_FORMAT_NONE:
-   default:
-      return 0;
-   }
-}
-
-
-/** dri3_alloc_render_buffer
- *
- * Use the driver createImage function to construct a __DRIimage, then
- * get a file descriptor for that and create an X pixmap from that
- *
- * Allocate an xshmfence for synchronization
- */
-static struct dri3_buffer *
-dri3_alloc_render_buffer(struct glx_screen *glx_screen, Drawable draw,
-                         unsigned int format, int width, int height, int depth)
-{
-   struct dri3_screen *psc = (struct dri3_screen *) glx_screen;
-   Display *dpy = glx_screen->dpy;
-   struct dri3_buffer *buffer;
-   __DRIimage *pixmap_buffer;
-   xcb_connection_t *c = XGetXCBConnection(dpy);
-   xcb_pixmap_t pixmap;
-   xcb_sync_fence_t sync_fence;
-   struct xshmfence *shm_fence;
-   int buffer_fd, fence_fd;
-   int stride;
-
-   /* Create an xshmfence object and
-    * prepare to send that to the X server
-    */
-
-   fence_fd = xshmfence_alloc_shm();
-   if (fence_fd < 0) {
-      ErrorMessageF("DRI3 Fence object allocation failure %s\n", strerror(errno));
-      return NULL;
-   }
-   shm_fence = xshmfence_map_shm(fence_fd);
-   if (shm_fence == NULL) {
-      ErrorMessageF("DRI3 Fence object map failure %s\n", strerror(errno));
-      goto no_shm_fence;
-   }
-
-   /* Allocate the image from the driver
-    */
-   buffer = calloc(1, sizeof (struct dri3_buffer));
-   if (!buffer)
-      goto no_buffer;
-
-   buffer->cpp = dri3_cpp_for_format(format);
-   if (!buffer->cpp) {
-      ErrorMessageF("DRI3 buffer format %d invalid\n", format);
-      goto no_image;
-   }
-
-   if (!psc->is_different_gpu) {
-      buffer->image = (*psc->image->createImage) (psc->driScreen,
-                                                  width, height,
-                                                  format,
-                                                  __DRI_IMAGE_USE_SHARE |
-                                                  __DRI_IMAGE_USE_SCANOUT,
-                                                  buffer);
-      pixmap_buffer = buffer->image;
-
-      if (!buffer->image) {
-         ErrorMessageF("DRI3 gpu image creation failure\n");
-         goto no_image;
-      }
-   } else {
-      buffer->image = (*psc->image->createImage) (psc->driScreen,
-                                                  width, height,
-                                                  format,
-                                                  0,
-                                                  buffer);
-
-      if (!buffer->image) {
-         ErrorMessageF("DRI3 other gpu image creation failure\n");
-         goto no_image;
-      }
-
-      buffer->linear_buffer = (*psc->image->createImage) (psc->driScreen,
-                                                          width, height,
-                                                          format,
-                                                          __DRI_IMAGE_USE_SHARE |
-                                                          __DRI_IMAGE_USE_LINEAR,
-                                                          buffer);
-      pixmap_buffer = buffer->linear_buffer;
-
-      if (!buffer->linear_buffer) {
-         ErrorMessageF("DRI3 gpu linear image creation failure\n");
-         goto no_linear_buffer;
-      }
-   }
-
-   /* X wants the stride, so ask the image for it
-    */
-   if (!(*psc->image->queryImage)(pixmap_buffer, __DRI_IMAGE_ATTRIB_STRIDE, &stride)) {
-      ErrorMessageF("DRI3 get image stride failed\n");
-      goto no_buffer_attrib;
-   }
-
-   buffer->pitch = stride;
-
-   if (!(*psc->image->queryImage)(pixmap_buffer, __DRI_IMAGE_ATTRIB_FD, &buffer_fd)) {
-      ErrorMessageF("DRI3 get image FD failed\n");
-      goto no_buffer_attrib;
-   }
-
-   xcb_dri3_pixmap_from_buffer(c,
-                               (pixmap = xcb_generate_id(c)),
-                               draw,
-                               buffer->size,
-                               width, height, buffer->pitch,
-                               depth, buffer->cpp * 8,
-                               buffer_fd);
-
-   xcb_dri3_fence_from_fd(c,
-                          pixmap,
-                          (sync_fence = xcb_generate_id(c)),
-                          false,
-                          fence_fd);
-
-   buffer->pixmap = pixmap;
-   buffer->own_pixmap = true;
-   buffer->sync_fence = sync_fence;
-   buffer->shm_fence = shm_fence;
-   buffer->width = width;
-   buffer->height = height;
-
-   /* Mark the buffer as idle
-    */
-   dri3_fence_set(buffer);
-
-   return buffer;
-
-no_buffer_attrib:
-   (*psc->image->destroyImage)(pixmap_buffer);
-no_linear_buffer:
-   if (psc->is_different_gpu)
-      (*psc->image->destroyImage)(buffer->image);
-no_image:
-   free(buffer);
-no_buffer:
-   xshmfence_unmap_shm(shm_fence);
-no_shm_fence:
-   close(fence_fd);
-   ErrorMessageF("DRI3 alloc_render_buffer failed\n");
-   return NULL;
-}
-
-/** dri3_free_render_buffer
- *
- * Free everything associated with one render buffer including pixmap, fence
- * stuff and the driver image
- */
-static void
-dri3_free_render_buffer(struct dri3_drawable *pdraw, struct dri3_buffer *buffer)
-{
-   struct dri3_screen   *psc = (struct dri3_screen *) pdraw->base.psc;
-   xcb_connection_t     *c = XGetXCBConnection(pdraw->base.psc->dpy);
-
-   if (buffer->own_pixmap)
-      xcb_free_pixmap(c, buffer->pixmap);
-   xcb_sync_destroy_fence(c, buffer->sync_fence);
-   xshmfence_unmap_shm(buffer->shm_fence);
-   (*psc->image->destroyImage)(buffer->image);
-   if (buffer->linear_buffer)
-      (*psc->image->destroyImage)(buffer->linear_buffer);
-   free(buffer);
-}
-
-
-/** dri3_flush_present_events
- *
- * Process any present events that have been received from the X server
- */
-static void
-dri3_flush_present_events(struct dri3_drawable *priv)
-{
-   xcb_connection_t     *c = XGetXCBConnection(priv->base.psc->dpy);
-
-   /* Check to see if any configuration changes have occurred
-    * since we were last invoked
-    */
-   if (priv->special_event) {
-      xcb_generic_event_t    *ev;
-
-      while ((ev = xcb_poll_for_special_event(c, priv->special_event)) != NULL) {
-         xcb_present_generic_event_t *ge = (void *) ev;
-         dri3_handle_present_event(priv, ge);
-      }
-   }
-}
-
-/** dri3_update_drawable
- *
- * Called the first time we use the drawable and then
- * after we receive present configure notify events to
- * track the geometry of the drawable
- */
-static int
-dri3_update_drawable(__DRIdrawable *driDrawable, void *loaderPrivate)
-{
-   struct dri3_drawable *priv = loaderPrivate;
-   xcb_connection_t     *c = XGetXCBConnection(priv->base.psc->dpy);
-
-   /* First time through, go get the current drawable geometry
-    */
-   if (priv->width == 0 || priv->height == 0 || priv->depth == 0) {
-      xcb_get_geometry_cookie_t                 geom_cookie;
-      xcb_get_geometry_reply_t                  *geom_reply;
-      xcb_void_cookie_t                         cookie;
-      xcb_generic_error_t                       *error;
-      xcb_present_query_capabilities_cookie_t   present_capabilities_cookie;
-      xcb_present_query_capabilities_reply_t    *present_capabilities_reply;
-
-
-      /* Try to select for input on the window.
-       *
-       * If the drawable is a window, this will get our events
-       * delivered.
-       *
-       * Otherwise, we'll get a BadWindow error back from this request which
-       * will let us know that the drawable is a pixmap instead.
-       */
-
-
-      cookie = xcb_present_select_input_checked(c,
-                                                (priv->eid = xcb_generate_id(c)),
-                                                priv->base.xDrawable,
-                                                XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY|
-                                                XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY|
-                                                XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY);
-
-      present_capabilities_cookie = xcb_present_query_capabilities(c, priv->base.xDrawable);
-
-      /* Create an XCB event queue to hold present events outside of the usual
-       * application event queue
-       */
-      priv->special_event = xcb_register_for_special_xge(c,
-                                                         &xcb_present_id,
-                                                         priv->eid,
-                                                         priv->stamp);
-
-      geom_cookie = xcb_get_geometry(c, priv->base.xDrawable);
-
-      geom_reply = xcb_get_geometry_reply(c, geom_cookie, NULL);
-
-      if (!geom_reply)
-         return false;
-
-      priv->width = geom_reply->width;
-      priv->height = geom_reply->height;
-      priv->depth = geom_reply->depth;
-      priv->is_pixmap = false;
-
-      free(geom_reply);
-
-      /* Check to see if our select input call failed. If it failed with a
-       * BadWindow error, then assume the drawable is a pixmap. Destroy the
-       * special event queue created above and mark the drawable as a pixmap
-       */
-
-      error = xcb_request_check(c, cookie);
-
-      present_capabilities_reply = xcb_present_query_capabilities_reply(c,
-                                                                        present_capabilities_cookie,
-                                                                        NULL);
-
-      if (present_capabilities_reply) {
-         priv->present_capabilities = present_capabilities_reply->capabilities;
-         free(present_capabilities_reply);
-      } else
-         priv->present_capabilities = 0;
-
-      if (error) {
-         if (error->error_code != BadWindow) {
-            free(error);
-            return false;
-         }
-         priv->is_pixmap = true;
-         xcb_unregister_for_special_event(c, priv->special_event);
-         priv->special_event = NULL;
-      }
-   }
-   dri3_flush_present_events(priv);
-   return true;
-}
-
-/* the DRIimage createImage function takes __DRI_IMAGE_FORMAT codes, while
- * the createImageFromFds call takes __DRI_IMAGE_FOURCC codes. To avoid
- * complete confusion, just deal in __DRI_IMAGE_FORMAT codes for now and
- * translate to __DRI_IMAGE_FOURCC codes in the call to createImageFromFds
- */
-static int
-image_format_to_fourcc(int format)
-{
-
-   /* Convert from __DRI_IMAGE_FORMAT to __DRI_IMAGE_FOURCC (sigh) */
-   switch (format) {
-   case __DRI_IMAGE_FORMAT_SARGB8: return __DRI_IMAGE_FOURCC_SARGB8888;
-   case __DRI_IMAGE_FORMAT_RGB565: return __DRI_IMAGE_FOURCC_RGB565;
-   case __DRI_IMAGE_FORMAT_XRGB8888: return __DRI_IMAGE_FOURCC_XRGB8888;
-   case __DRI_IMAGE_FORMAT_ARGB8888: return __DRI_IMAGE_FOURCC_ARGB8888;
-   case __DRI_IMAGE_FORMAT_ABGR8888: return __DRI_IMAGE_FOURCC_ABGR8888;
-   case __DRI_IMAGE_FORMAT_XBGR8888: return __DRI_IMAGE_FOURCC_XBGR8888;
-   }
-   return 0;
-}
-
-/** dri3_get_pixmap_buffer
- *
- * Get the DRM object for a pixmap from the X server and
- * wrap that with a __DRIimage structure using createImageFromFds
- */
-static struct dri3_buffer *
-dri3_get_pixmap_buffer(__DRIdrawable *driDrawable,
-                       unsigned int format,
-                       enum dri3_buffer_type buffer_type,
-                       void *loaderPrivate)
-{
-   struct dri3_drawable                 *pdraw = loaderPrivate;
-   int                                  buf_id = dri3_pixmap_buf_id(buffer_type);
-   struct dri3_buffer                   *buffer = pdraw->buffers[buf_id];
-   Pixmap                               pixmap;
-   xcb_dri3_buffer_from_pixmap_cookie_t bp_cookie;
-   xcb_dri3_buffer_from_pixmap_reply_t  *bp_reply;
-   int                                  *fds;
-   Display                              *dpy;
-   struct dri3_screen                   *psc;
-   xcb_connection_t                     *c;
-   xcb_sync_fence_t                     sync_fence;
-   struct xshmfence                     *shm_fence;
-   int                                  fence_fd;
-   __DRIimage                           *image_planar;
-   int                                  stride, offset;
-
-   if (buffer)
-      return buffer;
-
-   pixmap = pdraw->base.xDrawable;
-   psc = (struct dri3_screen *) pdraw->base.psc;
-   dpy = psc->base.dpy;
-   c = XGetXCBConnection(dpy);
-
-   buffer = calloc(1, sizeof (struct dri3_buffer));
-   if (!buffer)
-      goto no_buffer;
-
-   fence_fd = xshmfence_alloc_shm();
-   if (fence_fd < 0)
-      goto no_fence;
-   shm_fence = xshmfence_map_shm(fence_fd);
-   if (shm_fence == NULL) {
-      close (fence_fd);
-      goto no_fence;
-   }
-
-   xcb_dri3_fence_from_fd(c,
-                          pixmap,
-                          (sync_fence = xcb_generate_id(c)),
-                          false,
-                          fence_fd);
-
-   /* Get an FD for the pixmap object
-    */
-   bp_cookie = xcb_dri3_buffer_from_pixmap(c, pixmap);
-   bp_reply = xcb_dri3_buffer_from_pixmap_reply(c, bp_cookie, NULL);
-   if (!bp_reply)
-      goto no_image;
-   fds = xcb_dri3_buffer_from_pixmap_reply_fds(c, bp_reply);
-
-   stride = bp_reply->stride;
-   offset = 0;
-
-   /* createImageFromFds creates a wrapper __DRIimage structure which
-    * can deal with multiple planes for things like Yuv images. So, once
-    * we've gotten the planar wrapper, pull the single plane out of it and
-    * discard the wrapper.
-    */
-   image_planar = (*psc->image->createImageFromFds) (psc->driScreen,
-                                                     bp_reply->width,
-                                                     bp_reply->height,
-                                                     image_format_to_fourcc(format),
-                                                     fds, 1,
-                                                     &stride, &offset, buffer);
-   close(fds[0]);
-   if (!image_planar)
-      goto no_image;
-
-   buffer->image = (*psc->image->fromPlanar)(image_planar, 0, buffer);
-
-   (*psc->image->destroyImage)(image_planar);
-
-   if (!buffer->image)
-      goto no_image;
-
-   buffer->pixmap = pixmap;
-   buffer->own_pixmap = false;
-   buffer->width = bp_reply->width;
-   buffer->height = bp_reply->height;
-   buffer->buffer_type = buffer_type;
-   buffer->shm_fence = shm_fence;
-   buffer->sync_fence = sync_fence;
-
-   pdraw->buffers[buf_id] = buffer;
-   return buffer;
-
-no_image:
-   xcb_sync_destroy_fence(c, sync_fence);
-   xshmfence_unmap_shm(shm_fence);
-no_fence:
-   free(buffer);
-no_buffer:
-   return NULL;
-}
-
-/** dri3_find_back
- *
- * Find an idle back buffer. If there isn't one, then
- * wait for a present idle notify event from the X server
- */
-static int
-dri3_find_back(xcb_connection_t *c, struct dri3_drawable *priv)
-{
-   int  b;
-   xcb_generic_event_t *ev;
-   xcb_present_generic_event_t *ge;
-
-   for (;;) {
-      for (b = 0; b < priv->num_back; b++) {
-         int id = DRI3_BACK_ID((b + priv->cur_back) % priv->num_back);
-         struct dri3_buffer *buffer = priv->buffers[id];
-
-         if (!buffer || !buffer->busy) {
-            priv->cur_back = id;
-            return id;
-         }
-      }
-      xcb_flush(c);
-      ev = xcb_wait_for_special_event(c, priv->special_event);
-      if (!ev)
-         return -1;
-      ge = (void *) ev;
-      dri3_handle_present_event(priv, ge);
-   }
-}
-
-/** dri3_get_buffer
- *
- * Find a front or back buffer, allocating new ones as necessary
- */
-static struct dri3_buffer *
-dri3_get_buffer(__DRIdrawable *driDrawable,
-                unsigned int format,
-                enum dri3_buffer_type buffer_type,
-                void *loaderPrivate)
-{
-   struct dri3_context *pcp = (struct dri3_context *) __glXGetCurrentContext();
-   struct dri3_drawable *priv = loaderPrivate;
-   struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc;
-   xcb_connection_t     *c = XGetXCBConnection(priv->base.psc->dpy);
-   struct dri3_buffer      *buffer;
-   int                  buf_id;
-
-   if (buffer_type == dri3_buffer_back) {
-      buf_id = dri3_find_back(c, priv);
-
-      if (buf_id < 0)
-         return NULL;
-   } else {
-      buf_id = DRI3_FRONT_ID;
-   }
-
-   buffer = priv->buffers[buf_id];
-
-   /* Allocate a new buffer if there isn't an old one, or if that
-    * old one is the wrong size
-    */
-   if (!buffer || buffer->width != priv->width || buffer->height != priv->height) {
-      struct dri3_buffer   *new_buffer;
-
-      /* Allocate the new buffers
-       */
-      new_buffer = dri3_alloc_render_buffer(priv->base.psc,
-                                            priv->base.xDrawable,
-                                            format, priv->width, priv->height, priv->depth);
-      if (!new_buffer)
-         return NULL;
-
-      /* When resizing, copy the contents of the old buffer, waiting for that
-       * copy to complete using our fences before proceeding
-       */
-      switch (buffer_type) {
-      case dri3_buffer_back:
-         if (buffer) {
-            if (!buffer->linear_buffer) {
-               dri3_fence_reset(c, new_buffer);
-               dri3_fence_await(c, buffer);
-               dri3_copy_area(c,
-                              buffer->pixmap,
-                              new_buffer->pixmap,
-                              dri3_drawable_gc(priv),
-                              0, 0, 0, 0, priv->width, priv->height);
-            dri3_fence_trigger(c, new_buffer);
-            } else if ((&pcp->base != &dummyContext) && pcp->base.psc == &psc->base) {
-               psc->image->blitImage(pcp->driContext,
-                                     new_buffer->image,
-                                     buffer->image,
-                                     0, 0, priv->width,
-                                     priv->height,
-                                     0, 0, priv->width,
-                                     priv->height, 0);
-            }
-            dri3_free_render_buffer(priv, buffer);
-         }
-         break;
-      case dri3_buffer_front:
-         dri3_fence_reset(c, new_buffer);
-         dri3_copy_area(c,
-                        priv->base.xDrawable,
-                        new_buffer->pixmap,
-                        dri3_drawable_gc(priv),
-                        0, 0, 0, 0, priv->width, priv->height);
-         dri3_fence_trigger(c, new_buffer);
-
-         if (new_buffer->linear_buffer && (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base) {
-            dri3_fence_await(c, new_buffer);
-            psc->image->blitImage(pcp->driContext,
-                                  new_buffer->image,
-                                  new_buffer->linear_buffer,
-                                  0, 0, priv->width,
-                                  priv->height,
-                                  0, 0, priv->width,
-                                  priv->height, 0);
-         }
-         break;
-      }
-      buffer = new_buffer;
-      buffer->buffer_type = buffer_type;
-      priv->buffers[buf_id] = buffer;
-   }
-   dri3_fence_await(c, buffer);
-
-   /* Return the requested buffer */
-   return buffer;
-}
-
-/** dri3_free_buffers
- *
- * Free the front bufffer or all of the back buffers. Used
- * when the application changes which buffers it needs
- */
-static void
-dri3_free_buffers(__DRIdrawable *driDrawable,
-                 enum dri3_buffer_type buffer_type,
-                 void *loaderPrivate)
-{
-   struct dri3_drawable *priv = loaderPrivate;
-   struct dri3_buffer      *buffer;
-   int                  first_id;
-   int                  n_id;
-   int                  buf_id;
-
-   switch (buffer_type) {
-   case dri3_buffer_back:
-      first_id = DRI3_BACK_ID(0);
-      n_id = DRI3_MAX_BACK;
-      break;
-   case dri3_buffer_front:
-      first_id = DRI3_FRONT_ID;
-      n_id = 1;
-   }
-
-   for (buf_id = first_id; buf_id < first_id + n_id; buf_id++) {
-      buffer = priv->buffers[buf_id];
-      if (buffer) {
-         dri3_free_render_buffer(priv, buffer);
-         priv->buffers[buf_id] = NULL;
-      }
-   }
-}
-
-/** dri3_get_buffers
- *
- * The published buffer allocation API.
- * Returns all of the necessary buffers, allocating
- * as needed.
- */
-static int
-dri3_get_buffers(__DRIdrawable *driDrawable,
-                 unsigned int format,
-                 uint32_t *stamp,
-                 void *loaderPrivate,
-                 uint32_t buffer_mask,
-                 struct __DRIimageList *buffers)
-{
-   struct dri3_drawable *priv = loaderPrivate;
-   struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc;
-   struct dri3_buffer   *front, *back;
-
-   buffers->image_mask = 0;
-   buffers->front = NULL;
-   buffers->back = NULL;
-
-   front = NULL;
-   back = NULL;
-
-   if (!dri3_update_drawable(driDrawable, loaderPrivate))
-      return false;
-
-   /* pixmaps always have front buffers */
-   if (priv->is_pixmap)
-      buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
-
-   if (buffer_mask & __DRI_IMAGE_BUFFER_FRONT) {
-      /* All pixmaps are owned by the server gpu.
-       * When we use a different gpu, we can't use the pixmap
-       * as buffer since it is potentially tiled a way
-       * our device can't understand. In this case, use
-       * a fake front buffer. Hopefully the pixmap
-       * content will get synced with the fake front
-       * buffer.
-       */
-      if (priv->is_pixmap && !psc->is_different_gpu)
-         front = dri3_get_pixmap_buffer(driDrawable,
-                                        format,
-                                        dri3_buffer_front,
-                                        loaderPrivate);
-      else
-         front = dri3_get_buffer(driDrawable,
-                                 format,
-                                 dri3_buffer_front,
-                                 loaderPrivate);
-
-      if (!front)
-         return false;
-   } else {
-      dri3_free_buffers(driDrawable, dri3_buffer_front, loaderPrivate);
-      priv->have_fake_front = 0;
-   }
-
-   if (buffer_mask & __DRI_IMAGE_BUFFER_BACK) {
-      back = dri3_get_buffer(driDrawable,
-                             format,
-                             dri3_buffer_back,
-                             loaderPrivate);
-      if (!back)
-         return false;
-      priv->have_back = 1;
-   } else {
-      dri3_free_buffers(driDrawable, dri3_buffer_back, loaderPrivate);
-      priv->have_back = 0;
-   }
-
-   if (front) {
-      buffers->image_mask |= __DRI_IMAGE_BUFFER_FRONT;
-      buffers->front = front->image;
-      priv->have_fake_front = psc->is_different_gpu || !priv->is_pixmap;
-   }
-
-   if (back) {
-      buffers->image_mask |= __DRI_IMAGE_BUFFER_BACK;
-      buffers->back = back->image;
-   }
-
-   priv->stamp = stamp;
-
-   return true;
+   loader_dri3_wait_gl(draw);
 }
 
 /* The image loader extension record for DRI3
@@ -1496,7 +480,7 @@ dri3_get_buffers(__DRIdrawable *driDrawable,
 static const __DRIimageLoaderExtension imageLoaderExtension = {
    .base = { __DRI_IMAGE_LOADER, 1 },
 
-   .getBuffers          = dri3_get_buffers,
+   .getBuffers          = loader_dri3_get_buffers,
    .flushFrontBuffer    = dri3_flush_front_buffer,
 };
 
@@ -1519,172 +503,25 @@ static int64_t
 dri3_swap_buffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor,
                   int64_t remainder, Bool flush)
 {
-   struct dri3_context *pcp = (struct dri3_context *) __glXGetCurrentContext();
    struct dri3_drawable *priv = (struct dri3_drawable *) pdraw;
-   struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc;
-   Display *dpy = priv->base.psc->dpy;
-   xcb_connection_t *c = XGetXCBConnection(dpy);
-   struct dri3_buffer *back;
-   int64_t ret = 0;
-   uint32_t options = XCB_PRESENT_OPTION_NONE;
-
    unsigned flags = __DRI2_FLUSH_DRAWABLE;
+
    if (flush)
       flags |= __DRI2_FLUSH_CONTEXT;
-   dri3_flush(psc, priv, flags, __DRI2_THROTTLE_SWAPBUFFER);
 
-   back = priv->buffers[DRI3_BACK_ID(priv->cur_back)];
-   if (psc->is_different_gpu && back) {
-      /* Update the linear buffer before presenting the pixmap */
-      psc->image->blitImage(pcp->driContext,
-                            back->linear_buffer,
-                            back->image,
-                            0, 0, back->width,
-                            back->height,
-                            0, 0, back->width,
-                            back->height, __BLIT_FLAG_FLUSH);
-      /* Update the fake front */
-      if (priv->have_fake_front)
-         psc->image->blitImage(pcp->driContext,
-                               priv->buffers[DRI3_FRONT_ID]->image,
-                               back->image,
-                               0, 0, priv->width,
-                               priv->height,
-                               0, 0, priv->width,
-                               priv->height, __BLIT_FLAG_FLUSH);
-   }
-
-   dri3_flush_present_events(priv);
-
-   if (back && !priv->is_pixmap) {
-      dri3_fence_reset(c, back);
-
-      /* Compute when we want the frame shown by taking the last known successful
-       * MSC and adding in a swap interval for each outstanding swap request.
-       * target_msc=divisor=remainder=0 means "Use glXSwapBuffers() semantic"
-       */
-      ++priv->send_sbc;
-      if (target_msc == 0 && divisor == 0 && remainder == 0)
-         target_msc = priv->msc + priv->swap_interval * (priv->send_sbc - priv->recv_sbc);
-      else if (divisor == 0 && remainder > 0) {
-         /* From the GLX_OML_sync_control spec:
-          *
-          *     "If <divisor> = 0, the swap will occur when MSC becomes
-          *      greater than or equal to <target_msc>."
-          *
-          * Note that there's no mention of the remainder.  The Present extension
-          * throws BadValue for remainder != 0 with divisor == 0, so just drop
-          * the passed in value.
-          */
-         remainder = 0;
-      }
-
-      /* From the GLX_EXT_swap_control spec:
-       *
-       *     "If <interval> is set to a value of 0, buffer swaps are not
-       *      synchronized to a video frame."
-       *
-       * Implementation note: It is possible to enable triple buffering behaviour
-       * by not using XCB_PRESENT_OPTION_ASYNC, but this should not be the default.
-       */
-      if (priv->swap_interval == 0)
-          options |= XCB_PRESENT_OPTION_ASYNC;
-
-      back->busy = 1;
-      back->last_swap = priv->send_sbc;
-      xcb_present_pixmap(c,
-                         priv->base.xDrawable,
-                         back->pixmap,
-                         (uint32_t) priv->send_sbc,
-                         0,                                    /* valid */
-                         0,                                    /* update */
-                         0,                                    /* x_off */
-                         0,                                    /* y_off */
-                         None,                                 /* target_crtc */
-                         None,
-                         back->sync_fence,
-                         options,
-                         target_msc,
-                         divisor,
-                         remainder, 0, NULL);
-      ret = (int64_t) priv->send_sbc;
-
-      /* If there's a fake front, then copy the source back buffer
-       * to the fake front to keep it up to date. This needs
-       * to reset the fence and make future users block until
-       * the X server is done copying the bits
-       */
-      if (priv->have_fake_front && !psc->is_different_gpu) {
-         dri3_fence_reset(c, priv->buffers[DRI3_FRONT_ID]);
-         dri3_copy_area(c,
-                        back->pixmap,
-                        priv->buffers[DRI3_FRONT_ID]->pixmap,
-                        dri3_drawable_gc(priv),
-                        0, 0, 0, 0, priv->width, priv->height);
-         dri3_fence_trigger(c, priv->buffers[DRI3_FRONT_ID]);
-      }
-      xcb_flush(c);
-      if (priv->stamp)
-         ++(*priv->stamp);
-   }
-
-   (*psc->f->invalidate)(priv->driDrawable);
-
-   return ret;
+   return loader_dri3_swap_buffers_msc(&priv->loader_drawable,
+                                       target_msc, divisor, remainder,
+                                       flags, false);
 }
 
 static int
 dri3_get_buffer_age(__GLXDRIdrawable *pdraw)
 {
-   xcb_connection_t *c = XGetXCBConnection(pdraw->psc->dpy);
-   struct dri3_drawable *priv = (struct dri3_drawable *) pdraw;
-   int back_id = DRI3_BACK_ID(dri3_find_back(c, priv));
+   struct dri3_drawable *priv = (struct dri3_drawable *)pdraw;
 
-   if (back_id < 0 || !priv->buffers[back_id])
-      return 0;
-
-   if (priv->buffers[back_id]->last_swap != 0)
-      return priv->send_sbc - priv->buffers[back_id]->last_swap + 1;
-   else
-      return 0;
+   return loader_dri3_query_buffer_age(&priv->loader_drawable);
 }
 
-/** dri3_open
- *
- * Wrapper around xcb_dri3_open
- */
-static int
-dri3_open(Display *dpy,
-          Window root,
-          CARD32 provider)
-{
-   xcb_dri3_open_cookie_t       cookie;
-   xcb_dri3_open_reply_t        *reply;
-   xcb_connection_t             *c = XGetXCBConnection(dpy);
-   int                          fd;
-
-   cookie = xcb_dri3_open(c,
-                          root,
-                          provider);
-
-   reply = xcb_dri3_open_reply(c, cookie, NULL);
-   if (!reply)
-      return -1;
-
-   if (reply->nfd != 1) {
-      free(reply);
-      return -1;
-   }
-
-   fd = xcb_dri3_open_reply_fds(c, reply)[0];
-   fcntl(fd, F_SETFD, FD_CLOEXEC);
-
-   free(reply);
-
-   return fd;
-}
-
-
 /** dri3_destroy_screen
  */
 static void
@@ -1727,8 +564,7 @@ dri3_set_swap_interval(__GLXDRIdrawable *pdraw, int interval)
       break;
    }
 
-   priv->swap_interval = interval;
-   dri3_update_num_back(priv);
+   loader_dri3_set_swap_interval(&priv->loader_drawable, interval);
 
    return 0;
 }
@@ -1759,14 +595,14 @@ dri3_bind_tex_image(Display * dpy,
    if (pdraw != NULL) {
       psc = (struct dri3_screen *) base->psc;
 
-      (*psc->f->invalidate)(pdraw->driDrawable);
+      (*psc->f->invalidate)(pdraw->loader_drawable.dri_drawable);
 
       XSync(dpy, false);
 
       (*psc->texBuffer->setTexBuffer2) (pcp->driContext,
                                         pdraw->base.textureTarget,
                                         pdraw->base.textureFormat,
-                                        pdraw->driDrawable);
+                                        pdraw->loader_drawable.dri_drawable);
    }
 }
 
@@ -1786,7 +622,7 @@ dri3_release_tex_image(Display * dpy, GLXDrawable drawable, int buffer)
           psc->texBuffer->releaseTexBuffer != NULL)
          (*psc->texBuffer->releaseTexBuffer) (pcp->driContext,
                                               pdraw->base.textureTarget,
-                                              pdraw->driDrawable);
+                                              pdraw->loader_drawable.dri_drawable);
    }
 }
 
@@ -1908,7 +744,7 @@ dri3_create_screen(int screen, struct glx_display * priv)
       return NULL;
    }
 
-   psc->fd = dri3_open(priv->dpy, RootWindow(priv->dpy, screen), None);
+   psc->fd = loader_dri3_open(c, RootWindow(priv->dpy, screen), None);
    if (psc->fd < 0) {
       int conn_error = xcb_connection_has_error(c);
 
@@ -2000,6 +836,13 @@ dri3_create_screen(int screen, struct glx_display * priv)
       goto handle_error;
    }
 
+   psc->loader_dri3_ext.core = psc->core;
+   psc->loader_dri3_ext.image_driver = psc->image_driver;
+   psc->loader_dri3_ext.flush = psc->f;
+   psc->loader_dri3_ext.tex_buffer = psc->texBuffer;
+   psc->loader_dri3_ext.image = psc->image;
+   psc->loader_dri3_ext.config = psc->config;
+
    configs = driConvertConfigs(psc->core, psc->base.configs, driver_configs);
    visuals = driConvertConfigs(psc->core, psc->base.visuals, driver_configs);
 
diff --git a/src/glx/dri3_priv.h b/src/glx/dri3_priv.h
index 160444907e6..56a63309f36 100644
--- a/src/glx/dri3_priv.h
+++ b/src/glx/dri3_priv.h
@@ -59,50 +59,14 @@
 #include <xcb/present.h>
 #include <xcb/sync.h>
 
+#include "loader_dri3_helper.h"
+
 /* From xmlpool/options.h, user exposed so should be stable */
 #define DRI_CONF_VBLANK_NEVER 0
 #define DRI_CONF_VBLANK_DEF_INTERVAL_0 1
 #define DRI_CONF_VBLANK_DEF_INTERVAL_1 2
 #define DRI_CONF_VBLANK_ALWAYS_SYNC 3
 
-enum dri3_buffer_type {
-   dri3_buffer_back = 0,
-   dri3_buffer_front = 1
-};
-
-struct dri3_buffer {
-   __DRIimage   *image;
-   __DRIimage   *linear_buffer;
-   uint32_t     pixmap;
-
-   /* Synchronization between the client and X server is done using an
-    * xshmfence that is mapped into an X server SyncFence. This lets the
-    * client check whether the X server is done using a buffer with a simple
-    * xshmfence call, rather than going to read X events from the wire.
-    *
-    * However, we can only wait for one xshmfence to be triggered at a time,
-    * so we need to know *which* buffer is going to be idle next. We do that
-    * by waiting for a PresentIdleNotify event. When that event arrives, the
-    * 'busy' flag gets cleared and the client knows that the fence has been
-    * triggered, and that the wait call will not block.
-    */
-
-   uint32_t     sync_fence;     /* XID of X SyncFence object */
-   struct xshmfence *shm_fence; /* pointer to xshmfence object */
-   GLboolean    busy;           /* Set on swap, cleared on IdleNotify */
-   GLboolean    own_pixmap;     /* We allocated the pixmap ID, free on destroy */
-   void         *driverPrivate;
-
-   uint32_t     size;
-   uint32_t     pitch;
-   uint32_t     cpp;
-   uint32_t     flags;
-   uint32_t     width, height;
-   uint64_t     last_swap;
-
-   enum dri3_buffer_type        buffer_type;
-};
-
 struct dri3_display
 {
    __GLXDRIdisplay base;
@@ -139,6 +103,8 @@ struct dri3_screen {
    int is_different_gpu;
 
    int show_fps_interval;
+
+   struct loader_dri3_extensions loader_dri3_ext;
 };
 
 struct dri3_context
@@ -147,60 +113,10 @@ struct dri3_context
    __DRIcontext *driContext;
 };
 
-#define DRI3_MAX_BACK   4
-#define DRI3_BACK_ID(i) (i)
-#define DRI3_FRONT_ID   (DRI3_MAX_BACK)
-
-static inline int
-dri3_pixmap_buf_id(enum dri3_buffer_type buffer_type)
-{
-   if (buffer_type == dri3_buffer_back)
-      return DRI3_BACK_ID(0);
-   else
-      return DRI3_FRONT_ID;
-}
-
-#define DRI3_NUM_BUFFERS        (1 + DRI3_MAX_BACK)
-
 struct dri3_drawable {
    __GLXDRIdrawable base;
-   __DRIdrawable *driDrawable;
-   int width, height, depth;
+   struct loader_dri3_drawable loader_drawable;
    int swap_interval;
-   uint8_t have_back;
-   uint8_t have_fake_front;
-   uint8_t is_pixmap;
-   uint8_t flipping;
-
-   /* Present extension capabilities
-    */
-   uint32_t present_capabilities;
-
-   /* SBC numbers are tracked by using the serial numbers
-    * in the present request and complete events
-    */
-   uint64_t send_sbc;
-   uint64_t recv_sbc;
-
-   /* Last received UST/MSC values for pixmap present complete */
-   uint64_t ust, msc;
-
-   /* Last received UST/MSC values from present notify msc event */
-   uint64_t notify_ust, notify_msc;
-
-   /* Serial numbers for tracking wait_for_msc events */
-   uint32_t send_msc_serial;
-   uint32_t recv_msc_serial;
-
-   struct dri3_buffer *buffers[DRI3_NUM_BUFFERS];
-   int cur_back;
-   int num_back;
-
-   uint32_t *stamp;
-
-   xcb_present_event_t eid;
-   xcb_gcontext_t gc;
-   xcb_special_event_t *special_event;
 
    /* LIBGL_SHOW_FPS support */
    uint64_t previous_ust;

From a25df5457121d40fef86929d4c10d8058a4d5c72 Mon Sep 17 00:00:00 2001
From: Boyan Ding <boyan.j.ding@gmail.com>
Date: Tue, 21 Jul 2015 23:43:59 +0800
Subject: [PATCH 032/335] egl_dri2: Add a function to let platform code return
 dri drawable from _EGLSurface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

dri3 for EGL will use different struct other than dri2_egl_surface for
an EGL surface, the common code only uses __DRIdrawable from that
struct, so instead of converting _EGLSurface to dri2_egl_surface, let
the platform code return the __DRIdrawable by its own (although the
current platforms use the same function).

v2: From Martin Peres
 - convert to the new drawable interface (Kristian)

Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
---
 src/egl/drivers/dri2/egl_dri2.c         | 46 +++++++++++++++----------
 src/egl/drivers/dri2/egl_dri2.h         |  5 +++
 src/egl/drivers/dri2/platform_android.c |  1 +
 src/egl/drivers/dri2/platform_drm.c     |  1 +
 src/egl/drivers/dri2/platform_wayland.c |  2 ++
 src/egl/drivers/dri2/platform_x11.c     |  2 ++
 6 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 4cc5f231333..ceff01ff523 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -1119,11 +1119,10 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
 {
    struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv);
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   struct dri2_egl_surface *dri2_dsurf = dri2_egl_surface(dsurf);
-   struct dri2_egl_surface *dri2_rsurf = dri2_egl_surface(rsurf);
    struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
    _EGLContext *old_ctx;
    _EGLSurface *old_dsurf, *old_rsurf;
+   _EGLSurface *tmp_dsurf, *tmp_rsurf;
    __DRIdrawable *ddraw, *rdraw;
    __DRIcontext *cctx;
 
@@ -1135,8 +1134,8 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
    if (old_ctx && dri2_drv->glFlush)
       dri2_drv->glFlush();
 
-   ddraw = (dri2_dsurf) ? dri2_dsurf->dri_drawable : NULL;
-   rdraw = (dri2_rsurf) ? dri2_rsurf->dri_drawable : NULL;
+   ddraw = (dsurf) ? dri2_dpy->vtbl->get_dri_drawable(dsurf) : NULL;
+   rdraw = (rsurf) ? dri2_dpy->vtbl->get_dri_drawable(rsurf) : NULL;
    cctx = (dri2_ctx) ? dri2_ctx->dri_context : NULL;
 
    if (old_ctx) {
@@ -1156,10 +1155,10 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
       return EGL_TRUE;
    } else {
       /* undo the previous _eglBindContext */
-      _eglBindContext(old_ctx, old_dsurf, old_rsurf, &ctx, &dsurf, &rsurf);
+      _eglBindContext(old_ctx, old_dsurf, old_rsurf, &ctx, &tmp_dsurf, &tmp_rsurf);
       assert(&dri2_ctx->base == ctx &&
-             &dri2_dsurf->base == dsurf &&
-             &dri2_rsurf->base == rsurf);
+             tmp_dsurf == dsurf &&
+             tmp_rsurf == rsurf);
 
       _eglPutSurface(dsurf);
       _eglPutSurface(rsurf);
@@ -1173,6 +1172,14 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
    }
 }
 
+__DRIdrawable *
+dri2_surface_get_dri_drawable(_EGLSurface *surf)
+{
+   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
+
+   return dri2_surf->dri_drawable;
+}
+
 /*
  * Called from eglGetProcAddress() via drv->API.GetProcAddress().
  */
@@ -1235,7 +1242,7 @@ void
 dri2_flush_drawable_for_swapbuffers(_EGLDisplay *disp, _EGLSurface *draw)
 {
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw);
+   __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(draw);
 
    if (dri2_dpy->flush) {
       if (dri2_dpy->flush->base.version >= 4) {
@@ -1253,12 +1260,12 @@ dri2_flush_drawable_for_swapbuffers(_EGLDisplay *disp, _EGLSurface *draw)
           *      after calling eglSwapBuffers."
           */
          dri2_dpy->flush->flush_with_flags(dri2_ctx->dri_context,
-                                           dri2_surf->dri_drawable,
+                                           dri_drawable,
                                            __DRI2_FLUSH_DRAWABLE |
                                            __DRI2_FLUSH_INVALIDATE_ANCILLARY,
                                            __DRI2_THROTTLE_SWAPBUFFER);
       } else {
-         dri2_dpy->flush->flush(dri2_surf->dri_drawable);
+         dri2_dpy->flush->flush(dri_drawable);
       }
    }
 }
@@ -1315,7 +1322,8 @@ static EGLBoolean
 dri2_wait_client(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx)
 {
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(ctx->DrawSurface);
+   _EGLSurface *surf = ctx->DrawSurface;
+   __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(surf);
 
    (void) drv;
 
@@ -1323,7 +1331,7 @@ dri2_wait_client(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx)
     * we need to copy fake to real here.*/
 
    if (dri2_dpy->flush != NULL)
-      dri2_dpy->flush->flush(dri2_surf->dri_drawable);
+      dri2_dpy->flush->flush(dri_drawable);
 
    return EGL_TRUE;
 }
@@ -1346,10 +1354,10 @@ dri2_bind_tex_image(_EGLDriver *drv,
 		    _EGLDisplay *disp, _EGLSurface *surf, EGLint buffer)
 {
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
    struct dri2_egl_context *dri2_ctx;
    _EGLContext *ctx;
    GLint format, target;
+   __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(surf);
 
    ctx = _eglGetCurrentContext();
    dri2_ctx = dri2_egl_context(ctx);
@@ -1357,7 +1365,7 @@ dri2_bind_tex_image(_EGLDriver *drv,
    if (!_eglBindTexImage(drv, disp, surf, buffer))
       return EGL_FALSE;
 
-   switch (dri2_surf->base.TextureFormat) {
+   switch (surf->TextureFormat) {
    case EGL_TEXTURE_RGB:
       format = __DRI_TEXTURE_FORMAT_RGB;
       break;
@@ -1369,7 +1377,7 @@ dri2_bind_tex_image(_EGLDriver *drv,
       format = __DRI_TEXTURE_FORMAT_RGBA;
    }
 
-   switch (dri2_surf->base.TextureTarget) {
+   switch (surf->TextureTarget) {
    case EGL_TEXTURE_2D:
       target = GL_TEXTURE_2D;
       break;
@@ -1380,7 +1388,7 @@ dri2_bind_tex_image(_EGLDriver *drv,
 
    (*dri2_dpy->tex_buffer->setTexBuffer2)(dri2_ctx->dri_context,
 					  target, format,
-					  dri2_surf->dri_drawable);
+					  dri_drawable);
 
    return EGL_TRUE;
 }
@@ -1390,10 +1398,10 @@ dri2_release_tex_image(_EGLDriver *drv,
 		       _EGLDisplay *disp, _EGLSurface *surf, EGLint buffer)
 {
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
    struct dri2_egl_context *dri2_ctx;
    _EGLContext *ctx;
    GLint  target;
+   __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(surf);
 
    ctx = _eglGetCurrentContext();
    dri2_ctx = dri2_egl_context(ctx);
@@ -1401,7 +1409,7 @@ dri2_release_tex_image(_EGLDriver *drv,
    if (!_eglReleaseTexImage(drv, disp, surf, buffer))
       return EGL_FALSE;
 
-   switch (dri2_surf->base.TextureTarget) {
+   switch (surf->TextureTarget) {
    case EGL_TEXTURE_2D:
       target = GL_TEXTURE_2D;
       break;
@@ -1413,7 +1421,7 @@ dri2_release_tex_image(_EGLDriver *drv,
        dri2_dpy->tex_buffer->releaseTexBuffer != NULL) {
       (*dri2_dpy->tex_buffer->releaseTexBuffer)(dri2_ctx->dri_context,
                                                 target,
-                                                dri2_surf->dri_drawable);
+                                                dri_drawable);
    }
 
    return EGL_TRUE;
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index 0e837b3eb8b..c3c9fc0166c 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -145,6 +145,8 @@ struct dri2_egl_display_vtbl {
    EGLBoolean (*get_sync_values)(_EGLDisplay *display, _EGLSurface *surface,
                                  EGLuint64KHR *ust, EGLuint64KHR *msc,
                                  EGLuint64KHR *sbc);
+
+   __DRIdrawable *(*get_dri_drawable)(_EGLSurface *surf);
 };
 
 struct dri2_egl_display
@@ -327,6 +329,9 @@ dri2_load_driver_swrast(_EGLDisplay *disp);
 EGLBoolean
 dri2_create_screen(_EGLDisplay *disp);
 
+__DRIdrawable *
+dri2_surface_get_dri_drawable(_EGLSurface *surf);
+
 __DRIimage *
 dri2_lookup_egl_image(__DRIscreen *screen, void *image, void *data);
 
diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c
index 4abe82f63a0..8f3abcb9867 100644
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -650,6 +650,7 @@ static struct dri2_egl_display_vtbl droid_display_vtbl = {
    .query_buffer_age = dri2_fallback_query_buffer_age,
    .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
    .get_sync_values = dri2_fallback_get_sync_values,
+   .get_dri_drawable = dri2_surface_get_dri_drawable,
 };
 
 EGLBoolean
diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c
index 815d2674cb2..3f4f7e78190 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -594,6 +594,7 @@ static struct dri2_egl_display_vtbl dri2_drm_display_vtbl = {
    .query_buffer_age = dri2_drm_query_buffer_age,
    .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
    .get_sync_values = dri2_fallback_get_sync_values,
+   .get_dri_drawable = dri2_surface_get_dri_drawable,
 };
 
 EGLBoolean
diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c
index a635c758da1..c2438f7509b 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1025,6 +1025,7 @@ static struct dri2_egl_display_vtbl dri2_wl_display_vtbl = {
    .query_buffer_age = dri2_wl_query_buffer_age,
    .create_wayland_buffer_from_image = dri2_wl_create_wayland_buffer_from_image,
    .get_sync_values = dri2_fallback_get_sync_values,
+   .get_dri_drawable = dri2_surface_get_dri_drawable,
 };
 
 static EGLBoolean
@@ -1752,6 +1753,7 @@ static struct dri2_egl_display_vtbl dri2_wl_swrast_display_vtbl = {
    .query_buffer_age = dri2_fallback_query_buffer_age,
    .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
    .get_sync_values = dri2_fallback_get_sync_values,
+   .get_dri_drawable = dri2_surface_get_dri_drawable,
 };
 
 static EGLBoolean
diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c
index 88a06a8c6a8..e75dcb90699 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -1112,6 +1112,7 @@ static struct dri2_egl_display_vtbl dri2_x11_swrast_display_vtbl = {
    .query_buffer_age = dri2_fallback_query_buffer_age,
    .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
    .get_sync_values = dri2_fallback_get_sync_values,
+   .get_dri_drawable = dri2_surface_get_dri_drawable,
 };
 
 static struct dri2_egl_display_vtbl dri2_x11_display_vtbl = {
@@ -1130,6 +1131,7 @@ static struct dri2_egl_display_vtbl dri2_x11_display_vtbl = {
    .query_buffer_age = dri2_fallback_query_buffer_age,
    .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
    .get_sync_values = dri2_x11_get_sync_values,
+   .get_dri_drawable = dri2_surface_get_dri_drawable,
 };
 
 static EGLBoolean

From f35198badeb956a8f435727d805a47c7e42610d0 Mon Sep 17 00:00:00 2001
From: Boyan Ding <boyan.j.ding@gmail.com>
Date: Tue, 21 Jul 2015 23:44:00 +0800
Subject: [PATCH 033/335] egl/x11: Implement dri3 support with loader's dri3
 helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2: From Martin Peres
 - Tell we are compiling the dri3 backend in configure.ac
 - Update the Makefile.am
 - get rid of the LIBDRM_HAS_RENDERNODE_SUPPORT macro
 - fix some warnings related to EGLuint64KHR to int64_t conversions
 - use dri2_get_dri_config to get the __DRIconfig instead of open-coding it
 - replace the occasional tabs with spaces

v3: From Martin Peres
 - fix and indent problem (Matt Turner)
 - drop the authenticate function, use NULL in the vtable instead (Emil)
 - drop some useless includes (Emil Velikov)
 - mandate libdrm (Emil Velikov)
 - link to xcb-dri3 (Kristian Høgsberg)
 - convert to the new loader interface for drwable (Kristian)
 - remove some dead code after the dropping of some vfuncs (Kristian)
 - add a comment on the topic of rendering to the frontbuffer

v4: From Martin Peres
 - do not expose the preserved swap behavior (Acked by Eric Anholt)

Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
---
 configure.ac                             |   9 +
 src/egl/Makefile.am                      |  10 +-
 src/egl/drivers/dri2/egl_dri2.c          |  66 +++-
 src/egl/drivers/dri2/egl_dri2.h          |  14 +-
 src/egl/drivers/dri2/platform_x11.c      | 117 +++++-
 src/egl/drivers/dri2/platform_x11_dri3.c | 472 +++++++++++++++++++++++
 src/egl/drivers/dri2/platform_x11_dri3.h |  41 ++
 7 files changed, 714 insertions(+), 15 deletions(-)
 create mode 100644 src/egl/drivers/dri2/platform_x11_dri3.c
 create mode 100644 src/egl/drivers/dri2/platform_x11_dri3.h

diff --git a/configure.ac b/configure.ac
index 9ea9ab22346..32fb989a898 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1568,6 +1568,12 @@ if test "x$enable_egl" = xyes; then
             if test "x$enable_shared_glapi" = xno; then
                 AC_MSG_ERROR([egl_dri2 requires --enable-shared-glapi])
             fi
+            if test "x$enable_dri3" = xyes; then
+                HAVE_EGL_DRIVER_DRI3=1
+                if test "x$enable_shared_glapi" = xno; then
+                    AC_MSG_ERROR([egl_dri3 requires --enable-shared-glapi])
+                fi
+            fi
         else
             # Avoid building an "empty" libEGL. Drop/update this
             # when other backends (haiku?) come along.
@@ -2520,6 +2526,9 @@ if test "$enable_egl" = yes; then
     if test "x$HAVE_EGL_DRIVER_DRI2" != "x"; then
         egl_drivers="$egl_drivers builtin:egl_dri2"
     fi
+    if test "x$HAVE_EGL_DRIVER_DRI3" != "x"; then
+        egl_drivers="$egl_drivers builtin:egl_dri3"
+    fi
 
     echo "        EGL drivers:    $egl_drivers"
 fi
diff --git a/src/egl/Makefile.am b/src/egl/Makefile.am
index 5c2ba301ffb..88fe13acbd4 100644
--- a/src/egl/Makefile.am
+++ b/src/egl/Makefile.am
@@ -47,12 +47,19 @@ libEGL_la_LDFLAGS = \
 	$(LD_NO_UNDEFINED)
 
 dri2_backend_FILES =
+dri3_backend_FILES =
 
 if HAVE_EGL_PLATFORM_X11
 AM_CFLAGS += -DHAVE_X11_PLATFORM
 AM_CFLAGS += $(XCB_DRI2_CFLAGS)
 libEGL_la_LIBADD += $(XCB_DRI2_LIBS)
 dri2_backend_FILES += drivers/dri2/platform_x11.c
+
+if HAVE_DRI3
+dri3_backend_FILES += \
+	drivers/dri2/platform_x11_dri3.c
+libEGL_la_LIBADD += $(top_builddir)/src/loader/libloader_dri3_helper.la
+endif
 endif
 
 if HAVE_EGL_PLATFORM_WAYLAND
@@ -88,7 +95,8 @@ AM_CFLAGS += \
 
 libEGL_la_SOURCES += \
 	$(dri2_backend_core_FILES) \
-	$(dri2_backend_FILES)
+	$(dri2_backend_FILES) \
+	$(dri3_backend_FILES)
 
 libEGL_la_LIBADD += $(top_builddir)/src/loader/libloader.la
 libEGL_la_LIBADD += $(DLOPEN_LIBS) $(LIBDRM_LIBS)
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index ceff01ff523..ba16b94e651 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -352,6 +352,12 @@ struct dri2_extension_match {
    int offset;
 };
 
+static struct dri2_extension_match dri3_driver_extensions[] = {
+   { __DRI_CORE, 1, offsetof(struct dri2_egl_display, core) },
+   { __DRI_IMAGE_DRIVER, 1, offsetof(struct dri2_egl_display, image_driver) },
+   { NULL, 0, 0 }
+};
+
 static struct dri2_extension_match dri2_driver_extensions[] = {
    { __DRI_CORE, 1, offsetof(struct dri2_egl_display, core) },
    { __DRI_DRI2, 2, offsetof(struct dri2_egl_display, dri2) },
@@ -493,6 +499,25 @@ dri2_open_driver(_EGLDisplay *disp)
    return extensions;
 }
 
+EGLBoolean
+dri2_load_driver_dri3(_EGLDisplay *disp)
+{
+   struct dri2_egl_display *dri2_dpy = disp->DriverData;
+   const __DRIextension **extensions;
+
+   extensions = dri2_open_driver(disp);
+   if (!extensions)
+      return EGL_FALSE;
+
+   if (!dri2_bind_extensions(dri2_dpy, dri3_driver_extensions, extensions)) {
+      dlclose(dri2_dpy->driver);
+      return EGL_FALSE;
+   }
+   dri2_dpy->driver_extensions = extensions;
+
+   return EGL_TRUE;
+}
+
 EGLBoolean
 dri2_load_driver(_EGLDisplay *disp)
 {
@@ -550,7 +575,9 @@ dri2_setup_screen(_EGLDisplay *disp)
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
    unsigned int api_mask;
 
-   if (dri2_dpy->dri2) {
+   if (dri2_dpy->image_driver) {
+      api_mask = dri2_dpy->image_driver->getAPIMask(dri2_dpy->dri_screen);
+   } else if (dri2_dpy->dri2) {
       api_mask = dri2_dpy->dri2->getAPIMask(dri2_dpy->dri_screen);
    } else {
       assert(dri2_dpy->swrast);
@@ -570,7 +597,7 @@ dri2_setup_screen(_EGLDisplay *disp)
    if (api_mask & (1 << __DRI_API_GLES3))
       disp->ClientAPIs |= EGL_OPENGL_ES3_BIT_KHR;
 
-   assert(dri2_dpy->dri2 || dri2_dpy->swrast);
+   assert(dri2_dpy->image_driver || dri2_dpy->dri2 || dri2_dpy->swrast);
    disp->Extensions.KHR_surfaceless_context = EGL_TRUE;
    disp->Extensions.MESA_configless_context = EGL_TRUE;
 
@@ -578,7 +605,8 @@ dri2_setup_screen(_EGLDisplay *disp)
                                    __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB))
       disp->Extensions.KHR_gl_colorspace = EGL_TRUE;
 
-   if ((dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) ||
+   if (dri2_dpy->image_driver ||
+       (dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) ||
        (dri2_dpy->swrast && dri2_dpy->swrast->base.version >= 3)) {
       disp->Extensions.KHR_create_context = EGL_TRUE;
 
@@ -641,7 +669,14 @@ dri2_create_screen(_EGLDisplay *disp)
 
    dri2_dpy = disp->DriverData;
 
-   if (dri2_dpy->dri2) {
+   if (dri2_dpy->image_driver) {
+      dri2_dpy->dri_screen =
+         dri2_dpy->image_driver->createNewScreen2(0, dri2_dpy->fd,
+                                                  dri2_dpy->extensions,
+                                                  dri2_dpy->driver_extensions,
+                                                  &dri2_dpy->driver_configs,
+                                                  disp);
+   } else if (dri2_dpy->dri2) {
       if (dri2_dpy->dri2->base.version >= 4) {
          dri2_dpy->dri_screen =
             dri2_dpy->dri2->createNewScreen2(0, dri2_dpy->fd,
@@ -677,7 +712,7 @@ dri2_create_screen(_EGLDisplay *disp)
 
    extensions = dri2_dpy->core->getExtensions(dri2_dpy->dri_screen);
 
-   if (dri2_dpy->dri2) {
+   if (dri2_dpy->image_driver || dri2_dpy->dri2) {
       if (!dri2_bind_extensions(dri2_dpy, dri2_core_extensions, extensions))
          goto cleanup_dri_screen;
    } else {
@@ -1024,7 +1059,26 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
    else
       dri_config = NULL;
 
-   if (dri2_dpy->dri2) {
+   if (dri2_dpy->image_driver) {
+      unsigned error;
+      unsigned num_attribs = 8;
+      uint32_t ctx_attribs[8];
+
+      if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
+                                        &num_attribs))
+         goto cleanup;
+
+      dri2_ctx->dri_context =
+         dri2_dpy->image_driver->createContextAttribs(dri2_dpy->dri_screen,
+                                                      api,
+                                                      dri_config,
+                                                      shared,
+                                                      num_attribs / 2,
+                                                      ctx_attribs,
+                                                      & error,
+                                                      dri2_ctx);
+      dri2_create_context_attribs_error(error);
+   } else if (dri2_dpy->dri2) {
       if (dri2_dpy->dri2->base.version >= 3) {
          unsigned error;
          unsigned num_attribs = 8;
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index c3c9fc0166c..52ad92b182d 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -35,6 +35,10 @@
 #include <xcb/dri2.h>
 #include <xcb/xfixes.h>
 #include <X11/Xlib-xcb.h>
+
+#ifdef HAVE_DRI3
+#include "loader_dri3_helper.h"
+#endif
 #endif
 
 #ifdef HAVE_WAYLAND_PLATFORM
@@ -160,6 +164,7 @@ struct dri2_egl_display
    const __DRIconfig       **driver_configs;
    void                     *driver;
    const __DRIcoreExtension       *core;
+   const __DRIimageDriverExtension *image_driver;
    const __DRIdri2Extension       *dri2;
    const __DRIswrastExtension     *swrast;
    const __DRI2flushExtension     *flush;
@@ -192,6 +197,9 @@ struct dri2_egl_display
 #ifdef HAVE_X11_PLATFORM
    xcb_connection_t         *conn;
    int                      screen;
+#ifdef HAVE_DRI3
+   struct loader_dri3_extensions loader_dri3_ext;
+#endif
 #endif
 
 #ifdef HAVE_WAYLAND_PLATFORM
@@ -205,8 +213,9 @@ struct dri2_egl_display
    int			     formats;
    uint32_t                  capabilities;
    int			     is_render_node;
-   int			     is_different_gpu;
 #endif
+
+   int			     is_different_gpu;
 };
 
 struct dri2_egl_context
@@ -326,6 +335,9 @@ dri2_setup_screen(_EGLDisplay *disp);
 EGLBoolean
 dri2_load_driver_swrast(_EGLDisplay *disp);
 
+EGLBoolean
+dri2_load_driver_dri3(_EGLDisplay *disp);
+
 EGLBoolean
 dri2_create_screen(_EGLDisplay *disp);
 
diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c
index e75dcb90699..d291b478a25 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -45,6 +45,10 @@
 #include "egl_dri2_fallbacks.h"
 #include "loader.h"
 
+#ifdef HAVE_DRI3
+#include "platform_x11_dri3.h"
+#endif
+
 static EGLBoolean
 dri2_x11_swap_interval(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf,
                        EGLint interval);
@@ -703,7 +707,7 @@ dri2_x11_local_authenticate(_EGLDisplay *disp)
 
 static EGLBoolean
 dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy,
-                                 _EGLDisplay *disp)
+                                 _EGLDisplay *disp, bool supports_preserved)
 {
    xcb_screen_iterator_t s;
    xcb_depth_iterator_t d;
@@ -724,8 +728,10 @@ dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy,
    surface_type =
       EGL_WINDOW_BIT |
       EGL_PIXMAP_BIT |
-      EGL_PBUFFER_BIT |
-      EGL_SWAP_BEHAVIOR_PRESERVED_BIT;
+      EGL_PBUFFER_BIT;
+
+   if (supports_preserved)
+      surface_type |= EGL_SWAP_BEHAVIOR_PRESERVED_BIT;
 
    while (d.rem > 0) {
       EGLBoolean class_added[6] = { 0, };
@@ -1181,7 +1187,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp)
    if (!dri2_create_screen(disp))
       goto cleanup_driver;
 
-   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp))
+   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true))
       goto cleanup_configs;
 
    /* Fill vtbl last to prevent accidentally calling virtual function during
@@ -1252,6 +1258,96 @@ dri2_x11_setup_swap_interval(struct dri2_egl_display *dri2_dpy)
    }
 }
 
+#ifdef HAVE_DRI3
+static EGLBoolean
+dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp)
+{
+   struct dri2_egl_display *dri2_dpy;
+
+   dri2_dpy = calloc(1, sizeof *dri2_dpy);
+   if (!dri2_dpy)
+      return _eglError(EGL_BAD_ALLOC, "eglInitialize");
+
+   disp->DriverData = (void *) dri2_dpy;
+   if (disp->PlatformDisplay == NULL) {
+      dri2_dpy->conn = xcb_connect(0, &dri2_dpy->screen);
+      dri2_dpy->own_device = true;
+   } else {
+      Display *dpy = disp->PlatformDisplay;
+
+      dri2_dpy->conn = XGetXCBConnection(dpy);
+      dri2_dpy->screen = DefaultScreen(dpy);
+   }
+
+   if (xcb_connection_has_error(dri2_dpy->conn)) {
+      _eglLog(_EGL_WARNING, "DRI2: xcb_connect failed");
+      goto cleanup_dpy;
+   }
+
+   if (dri2_dpy->conn) {
+      if (!dri3_x11_connect(dri2_dpy))
+         goto cleanup_conn;
+   }
+
+   if (!dri2_load_driver_dri3(disp))
+      goto cleanup_conn;
+
+   dri2_dpy->extensions[0] = &dri3_image_loader_extension.base;
+   dri2_dpy->extensions[1] = &use_invalidate.base;
+   dri2_dpy->extensions[2] = &image_lookup_extension.base;
+   dri2_dpy->extensions[3] = NULL;
+
+   dri2_dpy->swap_available = true;
+   dri2_dpy->invalidate_available = true;
+
+   if (!dri2_create_screen(disp))
+      goto cleanup_fd;
+
+   dri2_x11_setup_swap_interval(dri2_dpy);
+
+   disp->Extensions.NOK_texture_from_pixmap = EGL_TRUE;
+   disp->Extensions.CHROMIUM_sync_control = EGL_TRUE;
+   disp->Extensions.EXT_buffer_age = EGL_TRUE;
+
+#ifdef HAVE_WAYLAND_PLATFORM
+   disp->Extensions.WL_bind_wayland_display = EGL_TRUE;
+#endif
+
+   if (dri2_dpy->conn) {
+      if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, false))
+         goto cleanup_configs;
+   }
+
+   dri2_dpy->loader_dri3_ext.core = dri2_dpy->core;
+   dri2_dpy->loader_dri3_ext.image_driver = dri2_dpy->image_driver;
+   dri2_dpy->loader_dri3_ext.flush = dri2_dpy->flush;
+   dri2_dpy->loader_dri3_ext.tex_buffer = dri2_dpy->tex_buffer;
+   dri2_dpy->loader_dri3_ext.image = dri2_dpy->image;
+   dri2_dpy->loader_dri3_ext.config = dri2_dpy->config;
+
+   /* Fill vtbl last to prevent accidentally calling virtual function during
+    * initialization.
+    */
+   dri2_dpy->vtbl = &dri3_x11_display_vtbl;
+
+   return EGL_TRUE;
+
+ cleanup_configs:
+   _eglCleanupDisplay(disp);
+   dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen);
+   dlclose(dri2_dpy->driver);
+ cleanup_fd:
+   close(dri2_dpy->fd);
+ cleanup_conn:
+   if (disp->PlatformDisplay == NULL)
+      xcb_disconnect(dri2_dpy->conn);
+ cleanup_dpy:
+   free(dri2_dpy);
+
+   return EGL_FALSE;
+}
+#endif
+
 static EGLBoolean
 dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
 {
@@ -1323,7 +1419,7 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
    disp->Extensions.WL_bind_wayland_display = EGL_TRUE;
 #endif
 
-   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp))
+   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true))
       goto cleanup_configs;
 
    /* Fill vtbl last to prevent accidentally calling virtual function during
@@ -1357,9 +1453,16 @@ dri2_initialize_x11(_EGLDriver *drv, _EGLDisplay *disp)
    int x11_dri2_accel = (getenv("LIBGL_ALWAYS_SOFTWARE") == NULL);
 
    if (x11_dri2_accel) {
-      if (!dri2_initialize_x11_dri2(drv, disp)) {
-         initialized = dri2_initialize_x11_swrast(drv, disp);
+#ifdef HAVE_DRI3
+      if (getenv("LIBGL_DRI3_DISABLE") != NULL ||
+          !dri2_initialize_x11_dri3(drv, disp)) {
+#endif
+         if (!dri2_initialize_x11_dri2(drv, disp)) {
+            initialized = dri2_initialize_x11_swrast(drv, disp);
+         }
+#ifdef HAVE_DRI3
       }
+#endif
    } else {
       initialized = dri2_initialize_x11_swrast(drv, disp);
    }
diff --git a/src/egl/drivers/dri2/platform_x11_dri3.c b/src/egl/drivers/dri2/platform_x11_dri3.c
new file mode 100644
index 00000000000..0b95e4d42de
--- /dev/null
+++ b/src/egl/drivers/dri2/platform_x11_dri3.c
@@ -0,0 +1,472 @@
+/*
+ * Copyright © 2015 Boyan Ding
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <xcb/xcb.h>
+#include <xcb/dri3.h>
+#include <xcb/present.h>
+
+#include <xf86drm.h>
+
+#include "egl_dri2.h"
+#include "egl_dri2_fallbacks.h"
+#include "platform_x11_dri3.h"
+
+#include "loader.h"
+#include "loader_dri3_helper.h"
+
+static struct dri3_egl_surface *
+loader_drawable_to_egl_surface(struct loader_dri3_drawable *draw) {
+   size_t offset = offsetof(struct dri3_egl_surface, loader_drawable);
+   return (struct dri3_egl_surface *)(((void*) draw) - offset);
+}
+
+static int
+egl_dri3_get_swap_interval(struct loader_dri3_drawable *draw)
+{
+   struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+
+   return dri3_surf->base.SwapInterval;
+}
+
+static int
+egl_dri3_clamp_swap_interval(struct loader_dri3_drawable *draw, int interval)
+{
+   struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+
+   if (interval > dri3_surf->base.Config->MaxSwapInterval)
+      interval = dri3_surf->base.Config->MaxSwapInterval;
+   else if (interval < dri3_surf->base.Config->MinSwapInterval)
+      interval = dri3_surf->base.Config->MinSwapInterval;
+
+   return interval;
+}
+
+static void
+egl_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval)
+{
+   struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+
+   dri3_surf->base.SwapInterval = interval;
+}
+
+static void
+egl_dri3_set_drawable_size(struct loader_dri3_drawable *draw,
+                           int width, int height)
+{
+   struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+
+   dri3_surf->base.Width = width;
+   dri3_surf->base.Height = height;
+}
+
+static bool
+egl_dri3_in_current_context(struct loader_dri3_drawable *draw)
+{
+   struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+   _EGLContext *ctx = _eglGetCurrentContext();
+
+   return ctx->Resource.Display == dri3_surf->base.Resource.Display;
+}
+
+static __DRIcontext *
+egl_dri3_get_dri_context(struct loader_dri3_drawable *draw)
+{
+   _EGLContext *ctx = _eglGetCurrentContext();
+   struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
+
+   return dri2_ctx->dri_context;
+}
+
+static void
+egl_dri3_flush_drawable(struct loader_dri3_drawable *draw, unsigned flags)
+{
+   struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+   _EGLDisplay *disp = dri3_surf->base.Resource.Display;
+
+   dri2_flush_drawable_for_swapbuffers(disp, &dri3_surf->base);
+}
+
+static struct loader_dri3_vtable egl_dri3_vtable = {
+   .get_swap_interval = egl_dri3_get_swap_interval,
+   .clamp_swap_interval = egl_dri3_clamp_swap_interval,
+   .set_swap_interval = egl_dri3_set_swap_interval,
+   .set_drawable_size = egl_dri3_set_drawable_size,
+   .in_current_context = egl_dri3_in_current_context,
+   .get_dri_context = egl_dri3_get_dri_context,
+   .flush_drawable = egl_dri3_flush_drawable,
+   .show_fps = NULL,
+};
+
+static EGLBoolean
+dri3_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
+{
+   struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+
+   (void) drv;
+
+   if (!_eglPutSurface(surf))
+      return EGL_TRUE;
+
+   loader_dri3_drawable_fini(&dri3_surf->loader_drawable);
+
+   free(surf);
+
+   return EGL_TRUE;
+}
+
+static EGLBoolean
+dri3_set_swap_interval(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf,
+                       EGLint interval)
+{
+   struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+
+   loader_dri3_set_swap_interval(&dri3_surf->loader_drawable, interval);
+
+   return EGL_TRUE;
+}
+
+static xcb_screen_t *
+get_xcb_screen(xcb_screen_iterator_t iter, int screen)
+{
+    for (; iter.rem; --screen, xcb_screen_next(&iter))
+        if (screen == 0)
+            return iter.data;
+
+    return NULL;
+}
+
+static _EGLSurface *
+dri3_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
+                    _EGLConfig *conf, void *native_surface,
+                    const EGLint *attrib_list)
+{
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+   struct dri2_egl_config *dri2_conf = dri2_egl_config(conf);
+   struct dri3_egl_surface *dri3_surf;
+   const __DRIconfig *dri_config;
+   xcb_drawable_t drawable;
+   xcb_screen_iterator_t s;
+   xcb_screen_t *screen;
+
+   STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_surface));
+   drawable = (uintptr_t) native_surface;
+
+   (void) drv;
+
+   dri3_surf = calloc(1, sizeof *dri3_surf);
+   if (!dri3_surf) {
+      _eglError(EGL_BAD_ALLOC, "dri3_create_surface");
+      return NULL;
+   }
+
+   if (!_eglInitSurface(&dri3_surf->base, disp, type, conf, attrib_list))
+      goto cleanup_surf;
+
+   if (type == EGL_PBUFFER_BIT) {
+      s = xcb_setup_roots_iterator(xcb_get_setup(dri2_dpy->conn));
+      screen = get_xcb_screen(s, dri2_dpy->screen);
+      if (!screen) {
+         _eglError(EGL_BAD_NATIVE_WINDOW, "dri3_create_surface");
+         goto cleanup_surf;
+      }
+
+      drawable = xcb_generate_id(dri2_dpy->conn);
+      xcb_create_pixmap(dri2_dpy->conn, conf->BufferSize,
+                        drawable, screen->root,
+                        dri3_surf->base.Width, dri3_surf->base.Height);
+   }
+
+   dri_config = dri2_get_dri_config(dri2_conf, type,
+                                    dri3_surf->base.GLColorspace);
+
+   if (loader_dri3_drawable_init(dri2_dpy->conn, drawable,
+                                 dri2_dpy->dri_screen,
+                                 dri2_dpy->is_different_gpu, dri_config,
+                                 &dri2_dpy->loader_dri3_ext,
+                                 &egl_dri3_vtable,
+                                 &dri3_surf->loader_drawable)) {
+      _eglError(EGL_BAD_ALLOC, "dri3_surface_create");
+      goto cleanup_pixmap;
+   }
+
+   return &dri3_surf->base;
+
+ cleanup_pixmap:
+   if (type == EGL_PBUFFER_BIT)
+      xcb_free_pixmap(dri2_dpy->conn, drawable);
+ cleanup_surf:
+   free(dri3_surf);
+
+   return NULL;
+}
+
+/**
+ * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface().
+ */
+static _EGLSurface *
+dri3_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
+                           _EGLConfig *conf, void *native_window,
+                           const EGLint *attrib_list)
+{
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+   _EGLSurface *surf;
+
+   surf = dri3_create_surface(drv, disp, EGL_WINDOW_BIT, conf,
+                              native_window, attrib_list);
+   if (surf != NULL)
+      dri3_set_swap_interval(drv, disp, surf, dri2_dpy->default_swap_interval);
+
+   return surf;
+}
+
+static _EGLSurface *
+dri3_create_pixmap_surface(_EGLDriver *drv, _EGLDisplay *disp,
+                           _EGLConfig *conf, void *native_pixmap,
+                           const EGLint *attrib_list)
+{
+   return dri3_create_surface(drv, disp, EGL_PIXMAP_BIT, conf,
+                              native_pixmap, attrib_list);
+}
+
+static _EGLSurface *
+dri3_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *disp,
+                                _EGLConfig *conf, const EGLint *attrib_list)
+{
+   return dri3_create_surface(drv, disp, EGL_PBUFFER_BIT, conf,
+                              XCB_WINDOW_NONE, attrib_list);
+}
+
+static EGLBoolean
+dri3_get_sync_values(_EGLDisplay *display, _EGLSurface *surface,
+                     EGLuint64KHR *ust, EGLuint64KHR *msc,
+                     EGLuint64KHR *sbc)
+{
+   struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surface);
+
+   return loader_dri3_wait_for_msc(&dri3_surf->loader_drawable, 0, 0, 0,
+                                   (int64_t *) ust, (int64_t *) msc,
+                                   (int64_t *) sbc) ? EGL_TRUE : EGL_FALSE;
+}
+
+/**
+ * Called by the driver when it needs to update the real front buffer with the
+ * contents of its fake front buffer.
+ */
+static void
+dri3_flush_front_buffer(__DRIdrawable *driDrawable, void *loaderPrivate)
+{
+   /* There does not seem to be any kind of consensus on whether we should
+    * support front-buffer rendering or not:
+    * http://lists.freedesktop.org/archives/mesa-dev/2013-June/040129.html
+    */
+   _eglLog(_EGL_WARNING, "FIXME: egl/x11 doesn't support front buffer rendering.");
+   (void) driDrawable;
+   (void) loaderPrivate;
+}
+
+const __DRIimageLoaderExtension dri3_image_loader_extension = {
+   .base = { __DRI_IMAGE_LOADER, 1 },
+
+   .getBuffers          = loader_dri3_get_buffers,
+   .flushFrontBuffer    = dri3_flush_front_buffer,
+};
+
+static EGLBoolean
+dri3_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
+{
+   struct dri3_egl_surface *dri3_surf = dri3_egl_surface(draw);
+
+   /* No-op for a pixmap or pbuffer surface */
+   if (draw->Type == EGL_PIXMAP_BIT || draw->Type == EGL_PBUFFER_BIT)
+      return 0;
+
+   return loader_dri3_swap_buffers_msc(&dri3_surf->loader_drawable,
+                                       0, 0, 0, 0,
+                                       draw->SwapBehavior == EGL_BUFFER_PRESERVED) != -1;
+}
+
+static EGLBoolean
+dri3_copy_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf,
+                  void *native_pixmap_target)
+{
+   struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+   xcb_pixmap_t target;
+
+   STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_pixmap_target));
+   target = (uintptr_t) native_pixmap_target;
+
+   loader_dri3_copy_drawable(&dri3_surf->loader_drawable, target,
+                             dri3_surf->loader_drawable.drawable);
+
+   return EGL_TRUE;
+}
+
+static int
+dri3_query_buffer_age(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf)
+{
+   struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+
+   return loader_dri3_query_buffer_age(&dri3_surf->loader_drawable);
+}
+
+static __DRIdrawable *
+dri3_get_dri_drawable(_EGLSurface *surf)
+{
+   struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+
+   return dri3_surf->loader_drawable.dri_drawable;
+}
+
+struct dri2_egl_display_vtbl dri3_x11_display_vtbl = {
+   .authenticate = NULL,
+   .create_window_surface = dri3_create_window_surface,
+   .create_pixmap_surface = dri3_create_pixmap_surface,
+   .create_pbuffer_surface = dri3_create_pbuffer_surface,
+   .destroy_surface = dri3_destroy_surface,
+   .create_image = dri2_create_image_khr,
+   .swap_interval = dri3_set_swap_interval,
+   .swap_buffers = dri3_swap_buffers,
+   .swap_buffers_with_damage = dri2_fallback_swap_buffers_with_damage,
+   .swap_buffers_region = dri2_fallback_swap_buffers_region,
+   .post_sub_buffer = dri2_fallback_post_sub_buffer,
+   .copy_buffers = dri3_copy_buffers,
+   .query_buffer_age = dri3_query_buffer_age,
+   .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
+   .get_sync_values = dri3_get_sync_values,
+   .get_dri_drawable = dri3_get_dri_drawable,
+};
+
+static char *
+dri3_get_device_name(int fd)
+{
+   char *ret = NULL;
+
+   ret = drmGetRenderDeviceNameFromFd(fd);
+   if (ret)
+      return ret;
+
+   /* For dri3, render node support is required for WL_bind_wayland_display.
+    * In order not to regress on older systems without kernel or libdrm
+    * support, fall back to dri2. User can override it with environment
+    * variable if they don't need to use that extension.
+    */
+   if (getenv("EGL_FORCE_DRI3") == NULL) {
+      _eglLog(_EGL_WARNING, "Render node support not available, falling back to dri2");
+      _eglLog(_EGL_WARNING, "If you want to force dri3, set EGL_FORCE_DRI3 environment variable");
+   } else
+      ret = loader_get_device_name_for_fd(fd);
+
+   return ret;
+}
+
+EGLBoolean
+dri3_x11_connect(struct dri2_egl_display *dri2_dpy)
+{
+   xcb_dri3_query_version_reply_t *dri3_query;
+   xcb_dri3_query_version_cookie_t dri3_query_cookie;
+   xcb_present_query_version_reply_t *present_query;
+   xcb_present_query_version_cookie_t present_query_cookie;
+   xcb_generic_error_t *error;
+   xcb_screen_iterator_t s;
+   xcb_screen_t *screen;
+   const xcb_query_extension_reply_t *extension;
+
+   xcb_prefetch_extension_data (dri2_dpy->conn, &xcb_dri3_id);
+   xcb_prefetch_extension_data (dri2_dpy->conn, &xcb_present_id);
+
+   extension = xcb_get_extension_data(dri2_dpy->conn, &xcb_dri3_id);
+   if (!(extension && extension->present))
+      return EGL_FALSE;
+
+   extension = xcb_get_extension_data(dri2_dpy->conn, &xcb_present_id);
+   if (!(extension && extension->present))
+      return EGL_FALSE;
+
+   dri3_query_cookie = xcb_dri3_query_version(dri2_dpy->conn,
+                                              XCB_DRI3_MAJOR_VERSION,
+                                              XCB_DRI3_MINOR_VERSION);
+
+   present_query_cookie = xcb_present_query_version(dri2_dpy->conn,
+                                                    XCB_PRESENT_MAJOR_VERSION,
+                                                    XCB_PRESENT_MINOR_VERSION);
+
+   dri3_query =
+      xcb_dri3_query_version_reply(dri2_dpy->conn, dri3_query_cookie, &error);
+   if (dri3_query == NULL || error != NULL) {
+      _eglLog(_EGL_WARNING, "DRI2: failed to query dri3 version");
+      free(dri3_query);
+      free(error);
+      return EGL_FALSE;
+   }
+   free(dri3_query);
+
+   present_query =
+      xcb_present_query_version_reply(dri2_dpy->conn,
+                                      present_query_cookie, &error);
+   if (present_query == NULL || error != NULL) {
+      _eglLog(_EGL_WARNING, "DRI2: failed to query Present version");
+      free(present_query);
+      free(error);
+      return EGL_FALSE;
+   }
+   free(present_query);
+
+   s = xcb_setup_roots_iterator(xcb_get_setup(dri2_dpy->conn));
+   screen = get_xcb_screen(s, dri2_dpy->screen);
+   if (!screen) {
+      _eglError(EGL_BAD_NATIVE_WINDOW, "dri3_x11_connect");
+      return EGL_FALSE;
+   }
+
+   dri2_dpy->fd = loader_dri3_open(dri2_dpy->conn, screen->root, 0);
+   if (dri2_dpy->fd < 0) {
+      int conn_error = xcb_connection_has_error(dri2_dpy->conn);
+      _eglLog(_EGL_WARNING, "DRI2: Screen seem not DRI3 capable");
+
+      if (conn_error)
+         _eglLog(_EGL_WARNING, "DRI2: Failed to initialize DRI3");
+
+      return EGL_FALSE;
+   }
+
+   dri2_dpy->fd = loader_get_user_preferred_fd(dri2_dpy->fd, &dri2_dpy->is_different_gpu);
+
+   dri2_dpy->driver_name = loader_get_driver_for_fd(dri2_dpy->fd, 0);
+   if (!dri2_dpy->driver_name) {
+      _eglLog(_EGL_WARNING, "DRI2: No driver found");
+      close(dri2_dpy->fd);
+      return EGL_FALSE;
+   }
+
+   dri2_dpy->device_name = dri3_get_device_name(dri2_dpy->fd);
+   if (!dri2_dpy->device_name) {
+      close(dri2_dpy->fd);
+      return EGL_FALSE;
+   }
+
+   return EGL_TRUE;
+}
diff --git a/src/egl/drivers/dri2/platform_x11_dri3.h b/src/egl/drivers/dri2/platform_x11_dri3.h
new file mode 100644
index 00000000000..13d85724288
--- /dev/null
+++ b/src/egl/drivers/dri2/platform_x11_dri3.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright © 2015 Boyan Ding
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef EGL_X11_DRI3_INCLUDED
+#define EGL_X11_DRI3_INCLUDED
+
+#include "egl_dri2.h"
+
+_EGL_DRIVER_TYPECAST(dri3_egl_surface, _EGLSurface, obj)
+
+struct dri3_egl_surface {
+   _EGLSurface base;
+   struct loader_dri3_drawable loader_drawable;
+};
+
+extern const __DRIimageLoaderExtension dri3_image_loader_extension;
+extern struct dri2_egl_display_vtbl dri3_x11_display_vtbl;
+
+EGLBoolean
+dri3_x11_connect(struct dri2_egl_display *dri2_dpy);
+
+#endif

From bd6131a8d1e1cf0e6eb5494b50607a4ccb21e1f9 Mon Sep 17 00:00:00 2001
From: Boyan Ding <boyan.j.ding@gmail.com>
Date: Tue, 21 Jul 2015 23:44:01 +0800
Subject: [PATCH 034/335] loader/dri3: Expose function to create __DRIimage
 from pixmap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Used to support EGL_KHR_image_pixmap.

Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
Reviewed-by: Martin Peres <martin.peres@linux.intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
---
 src/loader/loader_dri3_helper.c | 78 +++++++++++++++++++++------------
 src/loader/loader_dri3_helper.h |  9 ++++
 2 files changed, 58 insertions(+), 29 deletions(-)

diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c
index 9093b179317..62bfe845c08 100644
--- a/src/loader/loader_dri3_helper.c
+++ b/src/loader/loader_dri3_helper.c
@@ -1054,6 +1054,47 @@ image_format_to_fourcc(int format)
    return 0;
 }
 
+__DRIimage *
+loader_dri3_create_image(xcb_connection_t *c,
+                         xcb_dri3_buffer_from_pixmap_reply_t *bp_reply,
+                         unsigned int format,
+                         __DRIscreen *dri_screen,
+                         const __DRIimageExtension *image,
+                         void *loaderPrivate)
+{
+   int                                  *fds;
+   __DRIimage                           *image_planar, *ret;
+   int                                  stride, offset;
+
+   /* Get an FD for the pixmap object
+    */
+   fds = xcb_dri3_buffer_from_pixmap_reply_fds(c, bp_reply);
+
+   stride = bp_reply->stride;
+   offset = 0;
+
+   /* createImageFromFds creates a wrapper __DRIimage structure which
+    * can deal with multiple planes for things like Yuv images. So, once
+    * we've gotten the planar wrapper, pull the single plane out of it and
+    * discard the wrapper.
+    */
+   image_planar = (image->createImageFromFds)(dri_screen,
+                                              bp_reply->width,
+                                              bp_reply->height,
+                                              image_format_to_fourcc(format),
+                                              fds, 1,
+                                              &stride, &offset, loaderPrivate);
+   close(fds[0]);
+   if (!image_planar)
+      return NULL;
+
+   ret = (image->fromPlanar)(image_planar, 0, loaderPrivate);
+
+   (image->destroyImage)(image_planar);
+
+   return ret;
+}
+
 /** dri3_get_pixmap_buffer
  *
  * Get the DRM object for a pixmap from the X server and
@@ -1069,12 +1110,9 @@ dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, unsigned int format,
    xcb_drawable_t                       pixmap;
    xcb_dri3_buffer_from_pixmap_cookie_t bp_cookie;
    xcb_dri3_buffer_from_pixmap_reply_t  *bp_reply;
-   int                                  *fds;
    xcb_sync_fence_t                     sync_fence;
    struct xshmfence                     *shm_fence;
    int                                  fence_fd;
-   __DRIimage                           *image_planar;
-   int                                  stride, offset;
 
    if (buffer)
       return buffer;
@@ -1100,36 +1138,14 @@ dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, unsigned int format,
                           false,
                           fence_fd);
 
-   /* Get an FD for the pixmap object
-    */
    bp_cookie = xcb_dri3_buffer_from_pixmap(draw->conn, pixmap);
-   bp_reply = xcb_dri3_buffer_from_pixmap_reply(draw->conn,
-                                                bp_cookie, NULL);
+   bp_reply = xcb_dri3_buffer_from_pixmap_reply(draw->conn, bp_cookie, NULL);
    if (!bp_reply)
       goto no_image;
-   fds = xcb_dri3_buffer_from_pixmap_reply_fds(draw->conn, bp_reply);
-
-   stride = bp_reply->stride;
-   offset = 0;
-
-   /* createImageFromFds creates a wrapper __DRIimage structure which
-    * can deal with multiple planes for things like Yuv images. So, once
-    * we've gotten the planar wrapper, pull the single plane out of it and
-    * discard the wrapper.
-    */
-   image_planar =
-      (draw->ext->image->createImageFromFds)(draw->dri_screen, bp_reply->width,
-                                             bp_reply->height,
-                                             image_format_to_fourcc(format),
-                                             fds, 1, &stride, &offset, buffer);
-   close(fds[0]);
-   if (!image_planar)
-      goto no_image;
-
-   buffer->image = (draw->ext->image->fromPlanar)(image_planar, 0, buffer);
-
-   (draw->ext->image->destroyImage)(image_planar);
 
+   buffer->image = loader_dri3_create_image(draw->conn, bp_reply, format,
+                                            draw->dri_screen, draw->ext->image,
+                                            buffer);
    if (!buffer->image)
       goto no_image;
 
@@ -1142,9 +1158,13 @@ dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, unsigned int format,
    buffer->sync_fence = sync_fence;
 
    draw->buffers[buf_id] = buffer;
+
+   free(bp_reply);
+
    return buffer;
 
 no_image:
+   free(bp_reply);
    xcb_sync_destroy_fence(draw->conn, sync_fence);
    xshmfence_unmap_shm(shm_fence);
 no_fence:
diff --git a/src/loader/loader_dri3_helper.h b/src/loader/loader_dri3_helper.h
index 54c2a52f5e1..5b8fd1d24ca 100644
--- a/src/loader/loader_dri3_helper.h
+++ b/src/loader/loader_dri3_helper.h
@@ -28,6 +28,7 @@
 #include <stdint.h>
 
 #include <xcb/xcb.h>
+#include <xcb/dri3.h>
 #include <xcb/present.h>
 
 #include <GL/gl.h>
@@ -221,6 +222,14 @@ int loader_dri3_open(xcb_connection_t *conn,
                      xcb_window_t root,
                      uint32_t provider);
 
+__DRIimage *
+loader_dri3_create_image(xcb_connection_t *c,
+                         xcb_dri3_buffer_from_pixmap_reply_t *bp_reply,
+                         unsigned int format,
+                         __DRIscreen *dri_screen,
+                         const __DRIimageExtension *image,
+                         void *loaderPrivate);
+
 int
 loader_dri3_get_buffers(__DRIdrawable *driDrawable,
                         unsigned int format,

From fcdc798515a74d12e4f1f848ac8b8bacce928855 Mon Sep 17 00:00:00 2001
From: Boyan Ding <boyan.j.ding@gmail.com>
Date: Tue, 21 Jul 2015 23:44:02 +0800
Subject: [PATCH 035/335] egl/x11_dri3: Implement EGL_KHR_image_pixmap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2: from Martin Peres
 - Replace a tab with spaces

v3: from Martin Peres
 - disable EGL_KHR_image_pixmap when is_different_gpu is set (Axel Davy)

Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
Reviewed-by: Martin Peres <martin.peres@linux.intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
---
 src/egl/drivers/dri2/platform_x11.c      |  2 +
 src/egl/drivers/dri2/platform_x11_dri3.c | 77 +++++++++++++++++++++++-
 2 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c
index d291b478a25..9dd5defd6a6 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -1305,6 +1305,8 @@ dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp)
 
    dri2_x11_setup_swap_interval(dri2_dpy);
 
+   if (!dri2_dpy->is_different_gpu)
+      disp->Extensions.KHR_image_pixmap = EGL_TRUE;
    disp->Extensions.NOK_texture_from_pixmap = EGL_TRUE;
    disp->Extensions.CHROMIUM_sync_control = EGL_TRUE;
    disp->Extensions.EXT_buffer_age = EGL_TRUE;
diff --git a/src/egl/drivers/dri2/platform_x11_dri3.c b/src/egl/drivers/dri2/platform_x11_dri3.c
index 0b95e4d42de..aaafd1d6708 100644
--- a/src/egl/drivers/dri2/platform_x11_dri3.c
+++ b/src/egl/drivers/dri2/platform_x11_dri3.c
@@ -272,6 +272,81 @@ dri3_get_sync_values(_EGLDisplay *display, _EGLSurface *surface,
                                    (int64_t *) sbc) ? EGL_TRUE : EGL_FALSE;
 }
 
+static _EGLImage *
+dri3_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext *ctx,
+                             EGLClientBuffer buffer, const EGLint *attr_list)
+{
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+   struct dri2_egl_image *dri2_img;
+   xcb_drawable_t drawable;
+   xcb_dri3_buffer_from_pixmap_cookie_t bp_cookie;
+   xcb_dri3_buffer_from_pixmap_reply_t  *bp_reply;
+   unsigned int format;
+
+   drawable = (xcb_drawable_t) (uintptr_t) buffer;
+   bp_cookie = xcb_dri3_buffer_from_pixmap(dri2_dpy->conn, drawable);
+   bp_reply = xcb_dri3_buffer_from_pixmap_reply(dri2_dpy->conn,
+                                                bp_cookie, NULL);
+   if (!bp_reply) {
+      _eglError(EGL_BAD_ALLOC, "xcb_dri3_buffer_from_pixmap");
+      return NULL;
+   }
+
+   switch (bp_reply->depth) {
+   case 16:
+      format = __DRI_IMAGE_FORMAT_RGB565;
+      break;
+   case 24:
+      format = __DRI_IMAGE_FORMAT_XRGB8888;
+      break;
+   case 32:
+      format = __DRI_IMAGE_FORMAT_ARGB8888;
+      break;
+   default:
+      _eglError(EGL_BAD_PARAMETER,
+                "dri3_create_image_khr: unsupported pixmap depth");
+      free(bp_reply);
+      return EGL_NO_IMAGE_KHR;
+   }
+
+   dri2_img = malloc(sizeof *dri2_img);
+   if (!dri2_img) {
+      _eglError(EGL_BAD_ALLOC, "dri3_create_image_khr");
+      return EGL_NO_IMAGE_KHR;
+   }
+
+   if (!_eglInitImage(&dri2_img->base, disp)) {
+      free(dri2_img);
+      return EGL_NO_IMAGE_KHR;
+   }
+
+   dri2_img->dri_image = loader_dri3_create_image(dri2_dpy->conn,
+                                                  bp_reply,
+                                                  format,
+                                                  dri2_dpy->dri_screen,
+                                                  dri2_dpy->image,
+                                                  dri2_img);
+
+   free(bp_reply);
+
+   return &dri2_img->base;
+}
+
+static _EGLImage *
+dri3_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp,
+                      _EGLContext *ctx, EGLenum target,
+                      EGLClientBuffer buffer, const EGLint *attr_list)
+{
+   (void) drv;
+
+   switch (target) {
+   case EGL_NATIVE_PIXMAP_KHR:
+      return dri3_create_image_khr_pixmap(disp, ctx, buffer, attr_list);
+   default:
+      return dri2_create_image_khr(drv, disp, ctx, target, buffer, attr_list);
+   }
+}
+
 /**
  * Called by the driver when it needs to update the real front buffer with the
  * contents of its fake front buffer.
@@ -347,7 +422,7 @@ struct dri2_egl_display_vtbl dri3_x11_display_vtbl = {
    .create_pixmap_surface = dri3_create_pixmap_surface,
    .create_pbuffer_surface = dri3_create_pbuffer_surface,
    .destroy_surface = dri3_destroy_surface,
-   .create_image = dri2_create_image_khr,
+   .create_image = dri3_create_image_khr,
    .swap_interval = dri3_set_swap_interval,
    .swap_buffers = dri3_swap_buffers,
    .swap_buffers_with_damage = dri2_fallback_swap_buffers_with_damage,

From 4518eea065df5c6877b996738ed425c292e3b144 Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@linux.intel.com>
Date: Fri, 30 Oct 2015 17:16:35 +0200
Subject: [PATCH 036/335] egl: make it clear which platform x11 backend is
 being used (dri2 or 3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
Reviewed-by: Boyan Ding <boyan.j.ding@gmail.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
---
 src/egl/drivers/dri2/egl_dri2.c          |  6 +++---
 src/egl/drivers/dri2/platform_x11.c      |  6 +++++-
 src/egl/drivers/dri2/platform_x11_dri3.c | 10 +++++-----
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index ba16b94e651..d34b16119e2 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -391,13 +391,13 @@ dri2_bind_extensions(struct dri2_egl_display *dri2_dpy,
    void *field;
 
    for (i = 0; extensions[i]; i++) {
-      _eglLog(_EGL_DEBUG, "DRI2: found extension `%s'", extensions[i]->name);
+      _eglLog(_EGL_DEBUG, "found extension `%s'", extensions[i]->name);
       for (j = 0; matches[j].name; j++) {
 	 if (strcmp(extensions[i]->name, matches[j].name) == 0 &&
 	     extensions[i]->version >= matches[j].version) {
 	    field = ((char *) dri2_dpy + matches[j].offset);
 	    *(const __DRIextension **) field = extensions[i];
-	    _eglLog(_EGL_INFO, "DRI2: found extension %s version %d",
+	    _eglLog(_EGL_INFO, "found extension %s version %d",
 		    extensions[i]->name, extensions[i]->version);
 	 }
       }
@@ -406,7 +406,7 @@ dri2_bind_extensions(struct dri2_egl_display *dri2_dpy,
    for (j = 0; matches[j].name; j++) {
       field = ((char *) dri2_dpy + matches[j].offset);
       if (*(const __DRIextension **) field == NULL) {
-	 _eglLog(_EGL_WARNING, "DRI2: did not find extension %s version %d",
+         _eglLog(_EGL_WARNING, "did not find extension %s version %d",
 		 matches[j].name, matches[j].version);
 	 ret = EGL_FALSE;
       }
diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c
index 9dd5defd6a6..08cbf2d8393 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -1280,7 +1280,7 @@ dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp)
    }
 
    if (xcb_connection_has_error(dri2_dpy->conn)) {
-      _eglLog(_EGL_WARNING, "DRI2: xcb_connect failed");
+      _eglLog(_EGL_WARNING, "DRI3: xcb_connect failed");
       goto cleanup_dpy;
    }
 
@@ -1332,6 +1332,8 @@ dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp)
     */
    dri2_dpy->vtbl = &dri3_x11_display_vtbl;
 
+   _eglLog(_EGL_INFO, "Using DRI3");
+
    return EGL_TRUE;
 
  cleanup_configs:
@@ -1429,6 +1431,8 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
     */
    dri2_dpy->vtbl = &dri2_x11_display_vtbl;
 
+   _eglLog(_EGL_INFO, "Using DRI2");
+
    return EGL_TRUE;
 
  cleanup_configs:
diff --git a/src/egl/drivers/dri2/platform_x11_dri3.c b/src/egl/drivers/dri2/platform_x11_dri3.c
index aaafd1d6708..8e4a131b11a 100644
--- a/src/egl/drivers/dri2/platform_x11_dri3.c
+++ b/src/egl/drivers/dri2/platform_x11_dri3.c
@@ -492,7 +492,7 @@ dri3_x11_connect(struct dri2_egl_display *dri2_dpy)
    dri3_query =
       xcb_dri3_query_version_reply(dri2_dpy->conn, dri3_query_cookie, &error);
    if (dri3_query == NULL || error != NULL) {
-      _eglLog(_EGL_WARNING, "DRI2: failed to query dri3 version");
+      _eglLog(_EGL_WARNING, "DRI3: failed to query the version");
       free(dri3_query);
       free(error);
       return EGL_FALSE;
@@ -503,7 +503,7 @@ dri3_x11_connect(struct dri2_egl_display *dri2_dpy)
       xcb_present_query_version_reply(dri2_dpy->conn,
                                       present_query_cookie, &error);
    if (present_query == NULL || error != NULL) {
-      _eglLog(_EGL_WARNING, "DRI2: failed to query Present version");
+      _eglLog(_EGL_WARNING, "DRI3: failed to query Present version");
       free(present_query);
       free(error);
       return EGL_FALSE;
@@ -520,10 +520,10 @@ dri3_x11_connect(struct dri2_egl_display *dri2_dpy)
    dri2_dpy->fd = loader_dri3_open(dri2_dpy->conn, screen->root, 0);
    if (dri2_dpy->fd < 0) {
       int conn_error = xcb_connection_has_error(dri2_dpy->conn);
-      _eglLog(_EGL_WARNING, "DRI2: Screen seem not DRI3 capable");
+      _eglLog(_EGL_WARNING, "DRI3: Screen seems not DRI3 capable");
 
       if (conn_error)
-         _eglLog(_EGL_WARNING, "DRI2: Failed to initialize DRI3");
+         _eglLog(_EGL_WARNING, "DRI3: Failed to initialize");
 
       return EGL_FALSE;
    }
@@ -532,7 +532,7 @@ dri3_x11_connect(struct dri2_egl_display *dri2_dpy)
 
    dri2_dpy->driver_name = loader_get_driver_for_fd(dri2_dpy->fd, 0);
    if (!dri2_dpy->driver_name) {
-      _eglLog(_EGL_WARNING, "DRI2: No driver found");
+      _eglLog(_EGL_WARNING, "DRI3: No driver found");
       close(dri2_dpy->fd);
       return EGL_FALSE;
    }

From 874a1ed813c62af245524df6f6d55c1d64e0781d Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 17 Nov 2015 01:37:27 -0800
Subject: [PATCH 037/335] i965: Add missing stdio.h include to brw_compiler.h.

This is needed for the FILE * type in brw_print_vue_map().

Apparently, all files that include brw_compiler.h already pick this up
via some include chain, so this isn't actually a build fix.  However,
I have patches which introduce new consumers of brw_compiler.h that
fail to build because of the missing #include.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/mesa/drivers/dri/i965/brw_compiler.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index 3f546161409..8f147d3f75a 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -23,6 +23,7 @@
 
 #pragma once
 
+#include <stdio.h>
 #include "brw_device_info.h"
 #include "main/mtypes.h"
 

From e9b0fa496ca3d8645ff528b0b44c76ebfa76b534 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 10 Nov 2015 18:06:07 -0800
Subject: [PATCH 038/335] i965: Add more MAX_*_URB_ENTRY_SIZE_BYTES #defines.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
---
 src/mesa/drivers/dri/i965/brw_defines.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 0b8de63df42..ade3ede082f 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1938,8 +1938,14 @@ enum brw_message_target {
 
 /* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum gs_size
  * is 2^9, or 512.  It's counted in multiples of 64 bytes.
+ *
+ * Identical for VS, DS, and HS.
  */
 #define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES                (512*64)
+#define GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES                (512*64)
+#define GEN7_MAX_HS_URB_ENTRY_SIZE_BYTES                (512*64)
+#define GEN7_MAX_VS_URB_ENTRY_SIZE_BYTES                (512*64)
+
 /* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit
  * (128 bytes) URB rows and the maximum allowed value is 5 rows.
  */

From df87cb837f995827072056d361207fd9fce514f2 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sat, 25 Jul 2015 19:28:59 -0700
Subject: [PATCH 039/335] i965: Add INTEL_DEBUG=tcs,tes and hs,ds flags for
 tessellation shaders.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Even though both tessellation shader stages must be used together, I
still think it makes sense to add separate debug flags for each stage.
It makes it possible to read the TCS/HS, rule out problems, then read
the TES/DS separately, without sifting through as much printed text.

I decided to add both the GL names (tcs/tes) and hardware names (hs/ds)
so they can be used interchangeably.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/mesa/drivers/dri/i965/intel_debug.c | 8 ++++++--
 src/mesa/drivers/dri/i965/intel_debug.h | 2 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c
index c00d2e786f3..f53c4ab518a 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.c
+++ b/src/mesa/drivers/dri/i965/intel_debug.c
@@ -75,6 +75,10 @@ static const struct debug_control debug_control[] = {
    { "cs",          DEBUG_CS },
    { "hex",         DEBUG_HEX },
    { "nocompact",   DEBUG_NO_COMPACTION },
+   { "hs",          DEBUG_TCS },
+   { "tcs",         DEBUG_TCS },
+   { "ds",          DEBUG_TES },
+   { "tes",         DEBUG_TES },
    { NULL,    0 }
 };
 
@@ -83,8 +87,8 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage)
 {
    uint64_t flags[] = {
       [MESA_SHADER_VERTEX] = DEBUG_VS,
-      [MESA_SHADER_TESS_CTRL] = 0,
-      [MESA_SHADER_TESS_EVAL] = 0,
+      [MESA_SHADER_TESS_CTRL] = DEBUG_TCS,
+      [MESA_SHADER_TESS_EVAL] = DEBUG_TES,
       [MESA_SHADER_GEOMETRY] = DEBUG_GS,
       [MESA_SHADER_FRAGMENT] = DEBUG_WM,
       [MESA_SHADER_COMPUTE] = DEBUG_CS,
diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h
index 98bd7e93956..9c6030a6d7d 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.h
+++ b/src/mesa/drivers/dri/i965/intel_debug.h
@@ -69,6 +69,8 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_CS                  (1ull << 33)
 #define DEBUG_HEX                 (1ull << 34)
 #define DEBUG_NO_COMPACTION       (1ull << 35)
+#define DEBUG_TCS                 (1ull << 36)
+#define DEBUG_TES                 (1ull << 37)
 
 #ifdef HAVE_ANDROID_PLATFORM
 #define LOG_TAG "INTEL-MESA"

From 5b596f38785a11ad429e30b2237de2c8c59a451f Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 10 Nov 2015 01:53:33 -0800
Subject: [PATCH 040/335] i965: Add INTEL_DEBUG=shader_time support for
 tessellation shaders.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/mesa/drivers/dri/i965/brw_context.h |  2 ++
 src/mesa/drivers/dri/i965/brw_program.c | 12 ++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 4b2db61c758..8d6bc196401 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -523,6 +523,8 @@ struct brw_tracked_state {
 enum shader_time_shader_type {
    ST_NONE,
    ST_VS,
+   ST_TCS,
+   ST_TES,
    ST_GS,
    ST_FS8,
    ST_FS16,
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 2297fa69488..f137c8735fb 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -344,6 +344,8 @@ brw_report_shader_time(struct brw_context *brw)
 
       switch (type) {
       case ST_VS:
+      case ST_TCS:
+      case ST_TES:
       case ST_GS:
       case ST_FS8:
       case ST_FS16:
@@ -370,6 +372,8 @@ brw_report_shader_time(struct brw_context *brw)
 
       switch (type) {
       case ST_VS:
+      case ST_TCS:
+      case ST_TES:
       case ST_GS:
       case ST_FS8:
       case ST_FS16:
@@ -407,6 +411,12 @@ brw_report_shader_time(struct brw_context *brw)
       case ST_VS:
          stage = "vs";
          break;
+      case ST_TCS:
+         stage = "tcs";
+         break;
+      case ST_TES:
+         stage = "tes";
+         break;
       case ST_GS:
          stage = "gs";
          break;
@@ -430,6 +440,8 @@ brw_report_shader_time(struct brw_context *brw)
 
    fprintf(stderr, "\n");
    print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
+   print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
+   print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
    print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
    print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
    print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);

From 649803742dd96ea6702c6ef16c35c36e6d4fd676 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Thu, 12 Nov 2015 17:43:52 +1100
Subject: [PATCH 041/335] glsl: move ast layout qualifier handling code into
 its own function

We now also only apply these rules to variables rather than also
trying to apply them to function params.

V2: move code for handling stream layout qualifier

Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 414 ++++++++++++++++++++--------------------
 1 file changed, 212 insertions(+), 202 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 97554cbd688..d96caf40304 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2937,6 +2937,216 @@ validate_array_dimensions(const glsl_type *t,
    }
 }
 
+static void
+apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
+                                   ir_variable *var,
+                                   struct _mesa_glsl_parse_state *state,
+                                   YYLTYPE *loc)
+{
+   if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) {
+
+      /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says:
+       *
+       *    "Within any shader, the first redeclarations of gl_FragCoord
+       *     must appear before any use of gl_FragCoord."
+       *
+       * Generate a compiler error if above condition is not met by the
+       * fragment shader.
+       */
+      ir_variable *earlier = state->symbols->get_variable("gl_FragCoord");
+      if (earlier != NULL &&
+          earlier->data.used &&
+          !state->fs_redeclares_gl_fragcoord) {
+         _mesa_glsl_error(loc, state,
+                          "gl_FragCoord used before its first redeclaration "
+                          "in fragment shader");
+      }
+
+      /* Make sure all gl_FragCoord redeclarations specify the same layout
+       * qualifiers.
+       */
+      if (is_conflicting_fragcoord_redeclaration(state, qual)) {
+         const char *const qual_string =
+            get_layout_qualifier_string(qual->flags.q.origin_upper_left,
+                                        qual->flags.q.pixel_center_integer);
+
+         const char *const state_string =
+            get_layout_qualifier_string(state->fs_origin_upper_left,
+                                        state->fs_pixel_center_integer);
+
+         _mesa_glsl_error(loc, state,
+                          "gl_FragCoord redeclared with different layout "
+                          "qualifiers (%s) and (%s) ",
+                          state_string,
+                          qual_string);
+      }
+      state->fs_origin_upper_left = qual->flags.q.origin_upper_left;
+      state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer;
+      state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers =
+         !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer;
+      state->fs_redeclares_gl_fragcoord =
+         state->fs_origin_upper_left ||
+         state->fs_pixel_center_integer ||
+         state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers;
+   }
+
+   var->data.pixel_center_integer = qual->flags.q.pixel_center_integer;
+   var->data.origin_upper_left = qual->flags.q.origin_upper_left;
+   if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer)
+       && (strcmp(var->name, "gl_FragCoord") != 0)) {
+      const char *const qual_string = (qual->flags.q.origin_upper_left)
+         ? "origin_upper_left" : "pixel_center_integer";
+
+      _mesa_glsl_error(loc, state,
+		       "layout qualifier `%s' can only be applied to "
+		       "fragment shader input `gl_FragCoord'",
+		       qual_string);
+   }
+
+   if (qual->flags.q.explicit_location) {
+      validate_explicit_location(qual, var, state, loc);
+   } else if (qual->flags.q.explicit_index) {
+      _mesa_glsl_error(loc, state, "explicit index requires explicit location");
+   }
+
+   if (qual->flags.q.explicit_binding &&
+       validate_binding_qualifier(state, loc, var->type, qual)) {
+      var->data.explicit_binding = true;
+      var->data.binding = qual->binding;
+   }
+
+   if (state->stage == MESA_SHADER_GEOMETRY &&
+       qual->flags.q.out && qual->flags.q.stream) {
+      var->data.stream = qual->stream;
+   }
+
+   if (var->type->contains_atomic()) {
+      if (var->data.mode == ir_var_uniform) {
+         if (var->data.explicit_binding) {
+            unsigned *offset =
+               &state->atomic_counter_offsets[var->data.binding];
+
+            if (*offset % ATOMIC_COUNTER_SIZE)
+               _mesa_glsl_error(loc, state,
+                                "misaligned atomic counter offset");
+
+            var->data.atomic.offset = *offset;
+            *offset += var->type->atomic_size();
+
+         } else {
+            _mesa_glsl_error(loc, state,
+                             "atomic counters require explicit binding point");
+         }
+      } else if (var->data.mode != ir_var_function_in) {
+         _mesa_glsl_error(loc, state, "atomic counters may only be declared as "
+                          "function parameters or uniform-qualified "
+                          "global variables");
+      }
+   }
+
+   /* Is the 'layout' keyword used with parameters that allow relaxed checking.
+    * Many implementations of GL_ARB_fragment_coord_conventions_enable and some
+    * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable
+    * allowed the layout qualifier to be used with 'varying' and 'attribute'.
+    * These extensions and all following extensions that add the 'layout'
+    * keyword have been modified to require the use of 'in' or 'out'.
+    *
+    * The following extension do not allow the deprecated keywords:
+    *
+    *    GL_AMD_conservative_depth
+    *    GL_ARB_conservative_depth
+    *    GL_ARB_gpu_shader5
+    *    GL_ARB_separate_shader_objects
+    *    GL_ARB_tessellation_shader
+    *    GL_ARB_transform_feedback3
+    *    GL_ARB_uniform_buffer_object
+    *
+    * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5
+    * allow layout with the deprecated keywords.
+    */
+   const bool relaxed_layout_qualifier_checking =
+      state->ARB_fragment_coord_conventions_enable;
+
+   const bool uses_deprecated_qualifier = qual->flags.q.attribute
+      || qual->flags.q.varying;
+   if (qual->has_layout() && uses_deprecated_qualifier) {
+      if (relaxed_layout_qualifier_checking) {
+         _mesa_glsl_warning(loc, state,
+                            "`layout' qualifier may not be used with "
+                            "`attribute' or `varying'");
+      } else {
+         _mesa_glsl_error(loc, state,
+                          "`layout' qualifier may not be used with "
+                          "`attribute' or `varying'");
+      }
+   }
+
+   /* Layout qualifiers for gl_FragDepth, which are enabled by extension
+    * AMD_conservative_depth.
+    */
+   int depth_layout_count = qual->flags.q.depth_any
+      + qual->flags.q.depth_greater
+      + qual->flags.q.depth_less
+      + qual->flags.q.depth_unchanged;
+   if (depth_layout_count > 0
+       && !state->AMD_conservative_depth_enable
+       && !state->ARB_conservative_depth_enable) {
+       _mesa_glsl_error(loc, state,
+                        "extension GL_AMD_conservative_depth or "
+                        "GL_ARB_conservative_depth must be enabled "
+                        "to use depth layout qualifiers");
+   } else if (depth_layout_count > 0
+              && strcmp(var->name, "gl_FragDepth") != 0) {
+       _mesa_glsl_error(loc, state,
+                        "depth layout qualifiers can be applied only to "
+                        "gl_FragDepth");
+   } else if (depth_layout_count > 1
+              && strcmp(var->name, "gl_FragDepth") == 0) {
+      _mesa_glsl_error(loc, state,
+                       "at most one depth layout qualifier can be applied to "
+                       "gl_FragDepth");
+   }
+   if (qual->flags.q.depth_any)
+      var->data.depth_layout = ir_depth_layout_any;
+   else if (qual->flags.q.depth_greater)
+      var->data.depth_layout = ir_depth_layout_greater;
+   else if (qual->flags.q.depth_less)
+      var->data.depth_layout = ir_depth_layout_less;
+   else if (qual->flags.q.depth_unchanged)
+       var->data.depth_layout = ir_depth_layout_unchanged;
+   else
+       var->data.depth_layout = ir_depth_layout_none;
+
+   if (qual->flags.q.std140 ||
+       qual->flags.q.std430 ||
+       qual->flags.q.packed ||
+       qual->flags.q.shared) {
+      _mesa_glsl_error(loc, state,
+                       "uniform and shader storage block layout qualifiers "
+                       "std140, std430, packed, and shared can only be "
+                       "applied to uniform or shader storage blocks, not "
+                       "members");
+   }
+
+   if (qual->flags.q.row_major || qual->flags.q.column_major) {
+      validate_matrix_layout_for_type(state, loc, var->type, var);
+   }
+
+   /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader
+    * Inputs):
+    *
+    *  "Fragment shaders also allow the following layout qualifier on in only
+    *   (not with variable declarations)
+    *     layout-qualifier-id
+    *        early_fragment_tests
+    *   [...]"
+    */
+   if (qual->flags.q.early_fragment_tests) {
+      _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only "
+                       "valid in fragment shader input layout declaration.");
+   }
+}
+
 static void
 apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
                                  ir_variable *var,
@@ -2991,11 +3201,6 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
          select_gles_precision(qual->precision, var->type, state, loc);
    }
 
-   if (state->stage == MESA_SHADER_GEOMETRY &&
-       qual->flags.q.out && qual->flags.q.stream) {
-      var->data.stream = qual->stream;
-   }
-
    if (qual->flags.q.patch)
       var->data.patch = 1;
 
@@ -3135,102 +3340,6 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
       interpret_interpolation_qualifier(qual, (ir_variable_mode) var->data.mode,
                                         state, loc);
 
-   var->data.pixel_center_integer = qual->flags.q.pixel_center_integer;
-   var->data.origin_upper_left = qual->flags.q.origin_upper_left;
-   if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer)
-       && (strcmp(var->name, "gl_FragCoord") != 0)) {
-      const char *const qual_string = (qual->flags.q.origin_upper_left)
-         ? "origin_upper_left" : "pixel_center_integer";
-
-      _mesa_glsl_error(loc, state,
-		       "layout qualifier `%s' can only be applied to "
-		       "fragment shader input `gl_FragCoord'",
-		       qual_string);
-   }
-
-   if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) {
-
-      /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says:
-       *
-       *    "Within any shader, the first redeclarations of gl_FragCoord
-       *     must appear before any use of gl_FragCoord."
-       *
-       * Generate a compiler error if above condition is not met by the
-       * fragment shader.
-       */
-      ir_variable *earlier = state->symbols->get_variable("gl_FragCoord");
-      if (earlier != NULL &&
-          earlier->data.used &&
-          !state->fs_redeclares_gl_fragcoord) {
-         _mesa_glsl_error(loc, state,
-                          "gl_FragCoord used before its first redeclaration "
-                          "in fragment shader");
-      }
-
-      /* Make sure all gl_FragCoord redeclarations specify the same layout
-       * qualifiers.
-       */
-      if (is_conflicting_fragcoord_redeclaration(state, qual)) {
-         const char *const qual_string =
-            get_layout_qualifier_string(qual->flags.q.origin_upper_left,
-                                        qual->flags.q.pixel_center_integer);
-
-         const char *const state_string =
-            get_layout_qualifier_string(state->fs_origin_upper_left,
-                                        state->fs_pixel_center_integer);
-
-         _mesa_glsl_error(loc, state,
-                          "gl_FragCoord redeclared with different layout "
-                          "qualifiers (%s) and (%s) ",
-                          state_string,
-                          qual_string);
-      }
-      state->fs_origin_upper_left = qual->flags.q.origin_upper_left;
-      state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer;
-      state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers =
-         !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer;
-      state->fs_redeclares_gl_fragcoord =
-         state->fs_origin_upper_left ||
-         state->fs_pixel_center_integer ||
-         state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers;
-   }
-
-   if (qual->flags.q.explicit_location) {
-      validate_explicit_location(qual, var, state, loc);
-   } else if (qual->flags.q.explicit_index) {
-      _mesa_glsl_error(loc, state, "explicit index requires explicit location");
-   }
-
-   if (qual->flags.q.explicit_binding &&
-       validate_binding_qualifier(state, loc, var->type, qual)) {
-      var->data.explicit_binding = true;
-      var->data.binding = qual->binding;
-   }
-
-   if (var->type->contains_atomic()) {
-      if (var->data.mode == ir_var_uniform) {
-         if (var->data.explicit_binding) {
-            unsigned *offset =
-               &state->atomic_counter_offsets[var->data.binding];
-
-            if (*offset % ATOMIC_COUNTER_SIZE)
-               _mesa_glsl_error(loc, state,
-                                "misaligned atomic counter offset");
-
-            var->data.atomic.offset = *offset;
-            *offset += var->type->atomic_size();
-
-         } else {
-            _mesa_glsl_error(loc, state,
-                             "atomic counters require explicit binding point");
-         }
-      } else if (var->data.mode != ir_var_function_in) {
-         _mesa_glsl_error(loc, state, "atomic counters may only be declared as "
-                          "function parameters or uniform-qualified "
-                          "global variables");
-      }
-   }
-
    /* Does the declaration use the deprecated 'attribute' or 'varying'
     * keywords?
     */
@@ -3266,114 +3375,13 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
                        "`out' or `varying' variables between shader stages");
    }
 
-
-   /* Is the 'layout' keyword used with parameters that allow relaxed checking.
-    * Many implementations of GL_ARB_fragment_coord_conventions_enable and some
-    * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable
-    * allowed the layout qualifier to be used with 'varying' and 'attribute'.
-    * These extensions and all following extensions that add the 'layout'
-    * keyword have been modified to require the use of 'in' or 'out'.
-    *
-    * The following extension do not allow the deprecated keywords:
-    *
-    *    GL_AMD_conservative_depth
-    *    GL_ARB_conservative_depth
-    *    GL_ARB_gpu_shader5
-    *    GL_ARB_separate_shader_objects
-    *    GL_ARB_tessellation_shader
-    *    GL_ARB_transform_feedback3
-    *    GL_ARB_uniform_buffer_object
-    *
-    * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5
-    * allow layout with the deprecated keywords.
-    */
-   const bool relaxed_layout_qualifier_checking =
-      state->ARB_fragment_coord_conventions_enable;
-
-   if (qual->has_layout() && uses_deprecated_qualifier) {
-      if (relaxed_layout_qualifier_checking) {
-         _mesa_glsl_warning(loc, state,
-                            "`layout' qualifier may not be used with "
-                            "`attribute' or `varying'");
-      } else {
-         _mesa_glsl_error(loc, state,
-                          "`layout' qualifier may not be used with "
-                          "`attribute' or `varying'");
-      }
-   }
-
-   /* Layout qualifiers for gl_FragDepth, which are enabled by extension
-    * AMD_conservative_depth.
-    */
-   int depth_layout_count = qual->flags.q.depth_any
-      + qual->flags.q.depth_greater
-      + qual->flags.q.depth_less
-      + qual->flags.q.depth_unchanged;
-   if (depth_layout_count > 0
-       && !state->AMD_conservative_depth_enable
-       && !state->ARB_conservative_depth_enable) {
-       _mesa_glsl_error(loc, state,
-                        "extension GL_AMD_conservative_depth or "
-                        "GL_ARB_conservative_depth must be enabled "
-                        "to use depth layout qualifiers");
-   } else if (depth_layout_count > 0
-              && strcmp(var->name, "gl_FragDepth") != 0) {
-       _mesa_glsl_error(loc, state,
-                        "depth layout qualifiers can be applied only to "
-                        "gl_FragDepth");
-   } else if (depth_layout_count > 1
-              && strcmp(var->name, "gl_FragDepth") == 0) {
-      _mesa_glsl_error(loc, state,
-                       "at most one depth layout qualifier can be applied to "
-                       "gl_FragDepth");
-   }
-   if (qual->flags.q.depth_any)
-      var->data.depth_layout = ir_depth_layout_any;
-   else if (qual->flags.q.depth_greater)
-      var->data.depth_layout = ir_depth_layout_greater;
-   else if (qual->flags.q.depth_less)
-      var->data.depth_layout = ir_depth_layout_less;
-   else if (qual->flags.q.depth_unchanged)
-       var->data.depth_layout = ir_depth_layout_unchanged;
-   else
-       var->data.depth_layout = ir_depth_layout_none;
-
-   if (qual->flags.q.std140 ||
-       qual->flags.q.std430 ||
-       qual->flags.q.packed ||
-       qual->flags.q.shared) {
-      _mesa_glsl_error(loc, state,
-                       "uniform and shader storage block layout qualifiers "
-                       "std140, std430, packed, and shared can only be "
-                       "applied to uniform or shader storage blocks, not "
-                       "members");
-   }
-
    if (qual->flags.q.shared_storage && state->stage != MESA_SHADER_COMPUTE) {
       _mesa_glsl_error(loc, state,
                        "the shared storage qualifiers can only be used with "
                        "compute shaders");
    }
 
-   if (qual->flags.q.row_major || qual->flags.q.column_major) {
-      validate_matrix_layout_for_type(state, loc, var->type, var);
-   }
-
    apply_image_qualifier_to_variable(qual, var, state, loc);
-
-   /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader
-    * Inputs):
-    *
-    *  "Fragment shaders also allow the following layout qualifier on in only
-    *   (not with variable declarations)
-    *     layout-qualifier-id
-    *        early_fragment_tests
-    *   [...]"
-    */
-   if (qual->flags.q.early_fragment_tests) {
-      _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only "
-                       "valid in fragment shader input layout declaration.");
-   }
 }
 
 /**
@@ -4187,6 +4195,8 @@ ast_declarator_list::hir(exec_list *instructions,
 
       apply_type_qualifier_to_variable(& this->type->qualifier, var, state,
 				       & loc, false);
+      apply_layout_qualifier_to_variable(&this->type->qualifier, var, state,
+                                         &loc);
 
       if (this->type->qualifier.flags.q.invariant) {
          if (!is_varying_var(var, state->stage)) {

From 8cf795dc7c874a9062a27fc393066e7bdae94501 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Fri, 13 Nov 2015 09:45:36 +1100
Subject: [PATCH 042/335] glsl: move block validation outside function that
 validates members

Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index d96caf40304..33b0f1b76bb 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -6068,14 +6068,6 @@ ast_process_structure_or_interface_block(exec_list *instructions,
 {
    unsigned decl_count = 0;
 
-   /* For blocks that accept memory qualifiers (i.e. shader storage), verify
-    * that we don't have incompatible qualifiers
-    */
-   if (layout && layout->flags.q.read_only && layout->flags.q.write_only) {
-      _mesa_glsl_error(&loc, state,
-                       "Interface block sets both readonly and writeonly");
-   }
-
    /* Make an initial pass over the list of fields to determine how
     * many there are.  Each element in this list is an ast_declarator_list.
     * This means that we actually need to count the number of elements in the
@@ -6492,6 +6484,14 @@ ast_interface_block::hir(exec_list *instructions,
     */
    state->struct_specifier_depth++;
 
+   /* For blocks that accept memory qualifiers (i.e. shader storage), verify
+    * that we don't have incompatible qualifiers
+    */
+   if (this->layout.flags.q.read_only && this->layout.flags.q.write_only) {
+      _mesa_glsl_error(&loc, state,
+                       "Interface block sets both readonly and writeonly");
+   }
+
    unsigned int num_variables =
       ast_process_structure_or_interface_block(&declared_variables,
                                                state,

From 14d343b0247e8ce826b43c4cb0f9e8c94102167e Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Fri, 13 Nov 2015 09:49:31 +1100
Subject: [PATCH 043/335] glsl: rename function that processes struct and iface
 members

As of the previous commit this function handles only struct/iface
members.

Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 60 ++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 33b0f1b76bb..ea7b2c4016d 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -6055,16 +6055,16 @@ ast_type_specifier::hir(exec_list *instructions,
  * stored in \c *fields_ret.
  */
 unsigned
-ast_process_structure_or_interface_block(exec_list *instructions,
-                                         struct _mesa_glsl_parse_state *state,
-                                         exec_list *declarations,
-                                         YYLTYPE &loc,
-                                         glsl_struct_field **fields_ret,
-                                         bool is_interface,
-                                         enum glsl_matrix_layout matrix_layout,
-                                         bool allow_reserved_names,
-                                         ir_variable_mode var_mode,
-                                         ast_type_qualifier *layout)
+ast_process_struct_or_iface_block_members(exec_list *instructions,
+                                          struct _mesa_glsl_parse_state *state,
+                                          exec_list *declarations,
+                                          YYLTYPE &loc,
+                                          glsl_struct_field **fields_ret,
+                                          bool is_interface,
+                                          enum glsl_matrix_layout matrix_layout,
+                                          bool allow_reserved_names,
+                                          ir_variable_mode var_mode,
+                                          ast_type_qualifier *layout)
 {
    unsigned decl_count = 0;
 
@@ -6329,16 +6329,16 @@ ast_struct_specifier::hir(exec_list *instructions,
 
    glsl_struct_field *fields;
    unsigned decl_count =
-      ast_process_structure_or_interface_block(instructions,
-                                               state,
-                                               &this->declarations,
-                                               loc,
-                                               &fields,
-                                               false,
-                                               GLSL_MATRIX_LAYOUT_INHERITED,
-                                               false /* allow_reserved_names */,
-                                               ir_var_auto,
-                                               NULL);
+      ast_process_struct_or_iface_block_members(instructions,
+                                                state,
+                                                &this->declarations,
+                                                loc,
+                                                &fields,
+                                                false,
+                                                GLSL_MATRIX_LAYOUT_INHERITED,
+                                                false /* allow_reserved_names */,
+                                                ir_var_auto,
+                                                NULL);
 
    validate_identifier(this->name, loc, state);
 
@@ -6493,16 +6493,16 @@ ast_interface_block::hir(exec_list *instructions,
    }
 
    unsigned int num_variables =
-      ast_process_structure_or_interface_block(&declared_variables,
-                                               state,
-                                               &this->declarations,
-                                               loc,
-                                               &fields,
-                                               true,
-                                               matrix_layout,
-                                               redeclaring_per_vertex,
-                                               var_mode,
-                                               &this->layout);
+      ast_process_struct_or_iface_block_members(&declared_variables,
+                                                state,
+                                                &this->declarations,
+                                                loc,
+                                                &fields,
+                                                true,
+                                                matrix_layout,
+                                                redeclaring_per_vertex,
+                                                var_mode,
+                                                &this->layout);
 
    state->struct_specifier_depth--;
 

From c54865db784ec26406aa98ebe67d86568ab9fc96 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Fri, 13 Nov 2015 10:27:00 +1100
Subject: [PATCH 044/335] glsl: only do type and qualifier validation once per
 declaration

For struct and block members previously we were doing it for
every variable declaration.

So for example

struct S {
  atomic_uint x, y, z;
};

Would previously generate three error messages when one is sufficient.

Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 210 ++++++++++++++++++++--------------------
 1 file changed, 104 insertions(+), 106 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index ea7b2c4016d..b553a0dd326 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -6102,75 +6102,115 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
       const glsl_type *decl_type =
          decl_list->type->glsl_type(& type_name, state);
 
+      const struct ast_type_qualifier *const qual =
+         &decl_list->type->qualifier;
+
+      /* From section 4.3.9 of the GLSL 4.40 spec:
+       *
+       *    "[In interface blocks] opaque types are not allowed."
+       *
+       * It should be impossible for decl_type to be NULL here.  Cases that
+       * might naturally lead to decl_type being NULL, especially for the
+       * is_interface case, will have resulted in compilation having
+       * already halted due to a syntax error.
+       */
+      assert(decl_type);
+
+      if (is_interface && decl_type->contains_opaque()) {
+         YYLTYPE loc = decl_list->get_location();
+         _mesa_glsl_error(&loc, state,
+                          "uniform/buffer in non-default interface block contains "
+                          "opaque variable");
+      }
+
+      if (decl_type->contains_atomic()) {
+         /* From section 4.1.7.3 of the GLSL 4.40 spec:
+          *
+          *    "Members of structures cannot be declared as atomic counter
+          *     types."
+          */
+         YYLTYPE loc = decl_list->get_location();
+         _mesa_glsl_error(&loc, state, "atomic counter in structure, "
+                          "shader storage block or uniform block");
+      }
+
+      if (decl_type->contains_image()) {
+         /* FINISHME: Same problem as with atomic counters.
+          * FINISHME: Request clarification from Khronos and add
+          * FINISHME: spec quotation here.
+          */
+         YYLTYPE loc = decl_list->get_location();
+         _mesa_glsl_error(&loc, state,
+                          "image in structure, shader storage block or "
+                          "uniform block");
+      }
+
+      if (qual->flags.q.explicit_binding)
+         validate_binding_qualifier(state, &loc, decl_type, qual);
+
+      if (qual->flags.q.std140 ||
+          qual->flags.q.std430 ||
+          qual->flags.q.packed ||
+          qual->flags.q.shared) {
+         _mesa_glsl_error(&loc, state,
+                          "uniform/shader storage block layout qualifiers "
+                          "std140, std430, packed, and shared can only be "
+                          "applied to uniform/shader storage blocks, not "
+                          "members");
+      }
+
+      if (qual->flags.q.constant) {
+         YYLTYPE loc = decl_list->get_location();
+         _mesa_glsl_error(&loc, state,
+                          "const storage qualifier cannot be applied "
+                          "to struct or interface block members");
+      }
+
+      /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec:
+       *
+       *   "A block member may be declared with a stream identifier, but
+       *   the specified stream must match the stream associated with the
+       *   containing block."
+       */
+      if (qual->flags.q.explicit_stream &&
+          qual->stream != layout->stream) {
+         _mesa_glsl_error(&loc, state, "stream layout qualifier on interface "
+                          "block member does not match the interface block "
+                          "(%d vs %d)", qual->stream, layout->stream);
+      }
+
+      if (qual->flags.q.uniform && qual->has_interpolation()) {
+         _mesa_glsl_error(&loc, state,
+                          "interpolation qualifiers cannot be used "
+                          "with uniform interface blocks");
+      }
+
+      if ((qual->flags.q.uniform || !is_interface) &&
+          qual->has_auxiliary_storage()) {
+         _mesa_glsl_error(&loc, state,
+                          "auxiliary storage qualifiers cannot be used "
+                          "in uniform blocks or structures.");
+      }
+
+      if (qual->flags.q.row_major || qual->flags.q.column_major) {
+         if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
+            _mesa_glsl_error(&loc, state,
+                             "row_major and column_major can only be "
+                             "applied to interface blocks");
+         } else
+            validate_matrix_layout_for_type(state, &loc, decl_type, NULL);
+      }
+
+      if (qual->flags.q.read_only && qual->flags.q.write_only) {
+         _mesa_glsl_error(&loc, state, "buffer variable can't be both "
+                          "readonly and writeonly.");
+      }
+
       foreach_list_typed (ast_declaration, decl, link,
                           &decl_list->declarations) {
          if (!allow_reserved_names)
             validate_identifier(decl->identifier, loc, state);
 
-         /* From section 4.3.9 of the GLSL 4.40 spec:
-          *
-          *    "[In interface blocks] opaque types are not allowed."
-          *
-          * It should be impossible for decl_type to be NULL here.  Cases that
-          * might naturally lead to decl_type being NULL, especially for the
-          * is_interface case, will have resulted in compilation having
-          * already halted due to a syntax error.
-          */
-         assert(decl_type);
-
-         if (is_interface && decl_type->contains_opaque()) {
-            YYLTYPE loc = decl_list->get_location();
-            _mesa_glsl_error(&loc, state,
-                             "uniform/buffer in non-default interface block contains "
-                             "opaque variable");
-         }
-
-         if (decl_type->contains_atomic()) {
-            /* From section 4.1.7.3 of the GLSL 4.40 spec:
-             *
-             *    "Members of structures cannot be declared as atomic counter
-             *     types."
-             */
-            YYLTYPE loc = decl_list->get_location();
-            _mesa_glsl_error(&loc, state, "atomic counter in structure, "
-                             "shader storage block or uniform block");
-         }
-
-         if (decl_type->contains_image()) {
-            /* FINISHME: Same problem as with atomic counters.
-             * FINISHME: Request clarification from Khronos and add
-             * FINISHME: spec quotation here.
-             */
-            YYLTYPE loc = decl_list->get_location();
-            _mesa_glsl_error(&loc, state,
-                             "image in structure, shader storage block or "
-                             "uniform block");
-         }
-
-         const struct ast_type_qualifier *const qual =
-            & decl_list->type->qualifier;
-
-         if (qual->flags.q.explicit_binding)
-            validate_binding_qualifier(state, &loc, decl_type, qual);
-
-         if (qual->flags.q.std140 ||
-             qual->flags.q.std430 ||
-             qual->flags.q.packed ||
-             qual->flags.q.shared) {
-            _mesa_glsl_error(&loc, state,
-                             "uniform/shader storage block layout qualifiers "
-                             "std140, std430, packed, and shared can only be "
-                             "applied to uniform/shader storage blocks, not "
-                             "members");
-         }
-
-         if (qual->flags.q.constant) {
-            YYLTYPE loc = decl_list->get_location();
-            _mesa_glsl_error(&loc, state,
-                             "const storage qualifier cannot be applied "
-                             "to struct or interface block members");
-         }
-
          const struct glsl_type *field_type =
             process_array_type(&loc, decl_type, decl->array_specifier, state);
          validate_array_dimensions(field_type, state, &loc);
@@ -6184,42 +6224,6 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
          fields[i].patch = qual->flags.q.patch ? 1 : 0;
          fields[i].precision = qual->precision;
 
-         /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec:
-          *
-          *   "A block member may be declared with a stream identifier, but
-          *   the specified stream must match the stream associated with the
-          *   containing block."
-          */
-         if (qual->flags.q.explicit_stream &&
-             qual->stream != layout->stream) {
-            _mesa_glsl_error(&loc, state, "stream layout qualifier on "
-                             "interface block member `%s' does not match "
-                             "the interface block (%d vs %d)",
-                             fields[i].name, qual->stream, layout->stream);
-         }
-
-         if (qual->flags.q.row_major || qual->flags.q.column_major) {
-            if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
-               _mesa_glsl_error(&loc, state,
-                                "row_major and column_major can only be "
-                                "applied to interface blocks");
-            } else
-               validate_matrix_layout_for_type(state, &loc, field_type, NULL);
-         }
-
-         if (qual->flags.q.uniform && qual->has_interpolation()) {
-            _mesa_glsl_error(&loc, state,
-                             "interpolation qualifiers cannot be used "
-                             "with uniform interface blocks");
-         }
-
-         if ((qual->flags.q.uniform || !is_interface) &&
-             qual->has_auxiliary_storage()) {
-            _mesa_glsl_error(&loc, state,
-                             "auxiliary storage qualifiers cannot be used "
-                             "in uniform blocks or structures.");
-         }
-
          /* Propogate row- / column-major information down the fields of the
           * structure or interface block.  Structures need this data because
           * the structure may contain a structure that contains ... a matrix
@@ -6249,12 +6253,6 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
           * be defined inside shader storage buffer objects
           */
          if (layout && var_mode == ir_var_shader_storage) {
-            if (qual->flags.q.read_only && qual->flags.q.write_only) {
-               _mesa_glsl_error(&loc, state,
-                                "buffer variable `%s' can't be "
-                                "readonly and writeonly.", fields[i].name);
-            }
-
             /* For readonly and writeonly qualifiers the field definition,
              * if set, overwrites the layout qualifier.
              */

From f8b5cc827e2fcbd64424495eac31e5d3d3f0567c Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Fri, 13 Nov 2015 10:49:48 +1100
Subject: [PATCH 045/335] glsl: use better location in struct and block error
 messages

Previously we only gave the location for some members and never
gave the variable location. In those cases we were just giving
the location of the struct/block.

Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index b553a0dd326..1088ca26779 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -6058,7 +6058,6 @@ unsigned
 ast_process_struct_or_iface_block_members(exec_list *instructions,
                                           struct _mesa_glsl_parse_state *state,
                                           exec_list *declarations,
-                                          YYLTYPE &loc,
                                           glsl_struct_field **fields_ret,
                                           bool is_interface,
                                           enum glsl_matrix_layout matrix_layout,
@@ -6088,6 +6087,7 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
    unsigned i = 0;
    foreach_list_typed (ast_declarator_list, decl_list, link, declarations) {
       const char *type_name;
+      YYLTYPE loc = decl_list->get_location();
 
       decl_list->type->specifier->hir(instructions, state);
 
@@ -6117,7 +6117,6 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
       assert(decl_type);
 
       if (is_interface && decl_type->contains_opaque()) {
-         YYLTYPE loc = decl_list->get_location();
          _mesa_glsl_error(&loc, state,
                           "uniform/buffer in non-default interface block contains "
                           "opaque variable");
@@ -6129,7 +6128,6 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
           *    "Members of structures cannot be declared as atomic counter
           *     types."
           */
-         YYLTYPE loc = decl_list->get_location();
          _mesa_glsl_error(&loc, state, "atomic counter in structure, "
                           "shader storage block or uniform block");
       }
@@ -6139,7 +6137,6 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
           * FINISHME: Request clarification from Khronos and add
           * FINISHME: spec quotation here.
           */
-         YYLTYPE loc = decl_list->get_location();
          _mesa_glsl_error(&loc, state,
                           "image in structure, shader storage block or "
                           "uniform block");
@@ -6160,7 +6157,6 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
       }
 
       if (qual->flags.q.constant) {
-         YYLTYPE loc = decl_list->get_location();
          _mesa_glsl_error(&loc, state,
                           "const storage qualifier cannot be applied "
                           "to struct or interface block members");
@@ -6208,6 +6204,8 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
 
       foreach_list_typed (ast_declaration, decl, link,
                           &decl_list->declarations) {
+         YYLTYPE loc = decl->get_location();
+
          if (!allow_reserved_names)
             validate_identifier(decl->identifier, loc, state);
 
@@ -6330,7 +6328,6 @@ ast_struct_specifier::hir(exec_list *instructions,
       ast_process_struct_or_iface_block_members(instructions,
                                                 state,
                                                 &this->declarations,
-                                                loc,
                                                 &fields,
                                                 false,
                                                 GLSL_MATRIX_LAYOUT_INHERITED,
@@ -6494,7 +6491,6 @@ ast_interface_block::hir(exec_list *instructions,
       ast_process_struct_or_iface_block_members(&declared_variables,
                                                 state,
                                                 &this->declarations,
-                                                loc,
                                                 &fields,
                                                 true,
                                                 matrix_layout,

From a01b8c7e774aec651302fc8177b937c915daf1e7 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Fri, 13 Nov 2015 11:21:42 +1100
Subject: [PATCH 046/335] glsl: cleanup and fix validate matrix function for
 arrays

Previously if the member was an array of matrices then a
warning message would be incorrectly given.

Also the struct case could never be met so it has been removed.

Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 1088ca26779..d2b97d2848f 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2490,7 +2490,7 @@ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state,
                        "uniform block layout qualifiers row_major and "
                        "column_major may not be applied to variables "
                        "outside of uniform blocks");
-   } else if (!type->is_matrix()) {
+   } else if (!type->without_array()->is_matrix()) {
       /* The OpenGL ES 3.0 conformance tests did not originally allow
        * matrix layout qualifiers on non-matrices.  However, the OpenGL
        * 4.4 and OpenGL ES 3.0 (revision TBD) specifications were
@@ -2501,15 +2501,6 @@ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state,
                          "uniform block layout qualifiers row_major and "
                          "column_major applied to non-matrix types may "
                          "be rejected by older compilers");
-   } else if (type->is_record()) {
-      /* We allow 'layout(row_major)' on structure types because it's the only
-       * way to get row-major layouts on matrices contained in structures.
-       */
-      _mesa_glsl_warning(loc, state,
-                         "uniform block layout qualifiers row_major and "
-                         "column_major applied to structure types is not "
-                         "strictly conformant and may be rejected by other "
-                         "compilers");
    }
 }
 

From 4f4ca6b90ac33ad84bc469f58df1682e93ab3733 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Fri, 13 Nov 2015 11:28:20 +1100
Subject: [PATCH 047/335] glsl: remove temp variable to make code easier to
 read

Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index d2b97d2848f..9856f16323b 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -6245,20 +6245,17 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
             /* For readonly and writeonly qualifiers the field definition,
              * if set, overwrites the layout qualifier.
              */
-            bool read_only = layout->flags.q.read_only;
-            bool write_only = layout->flags.q.write_only;
-
             if (qual->flags.q.read_only) {
-               read_only = true;
-               write_only = false;
+               fields[i].image_read_only = true;
+               fields[i].image_write_only = false;
             } else if (qual->flags.q.write_only) {
-               read_only = false;
-               write_only = true;
+               fields[i].image_read_only = false;
+               fields[i].image_write_only = true;
+            } else {
+               fields[i].image_read_only = layout->flags.q.read_only;
+               fields[i].image_write_only = layout->flags.q.write_only;
             }
 
-            fields[i].image_read_only = read_only;
-            fields[i].image_write_only = write_only;
-
             /* For other qualifiers, we set the flag if either the layout
              * qualifier or the field qualifier are set
              */

From 03bbddd139fc3e543cb3aedf64236808ca20eeca Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Fri, 13 Nov 2015 11:41:52 +1100
Subject: [PATCH 048/335] glsl: don't validate binding when its not needed

Checking that the flag has been set is all the validation thats
needed here.

Also not calling the binding validation function will make things
much simpler when adding compile time constant support as we
won't need to resolve the binding value.

Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 9856f16323b..97e2351790c 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -6133,8 +6133,11 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
                           "uniform block");
       }
 
-      if (qual->flags.q.explicit_binding)
-         validate_binding_qualifier(state, &loc, decl_type, qual);
+      if (qual->flags.q.explicit_binding) {
+         _mesa_glsl_error(&loc, state,
+                          "binding layout qualifier cannot be applied "
+                          "to struct or interface block members");
+      }
 
       if (qual->flags.q.std140 ||
           qual->flags.q.std430 ||

From d4fbf11b583d76cbb362fb04334f791cc486b583 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Fri, 13 Nov 2015 15:43:13 +1100
Subject: [PATCH 049/335] glsl: rename location layout helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change name from validate -> apply to more accurately describe what
the function does.

Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
---
 src/glsl/ast_to_hir.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 97e2351790c..41d05c07eee 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2650,10 +2650,10 @@ interpret_interpolation_qualifier(const struct ast_type_qualifier *qual,
 
 
 static void
-validate_explicit_location(const struct ast_type_qualifier *qual,
-                           ir_variable *var,
-                           struct _mesa_glsl_parse_state *state,
-                           YYLTYPE *loc)
+apply_explicit_location(const struct ast_type_qualifier *qual,
+                        ir_variable *var,
+                        struct _mesa_glsl_parse_state *state,
+                        YYLTYPE *loc)
 {
    bool fail = false;
 
@@ -2995,7 +2995,7 @@ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
    }
 
    if (qual->flags.q.explicit_location) {
-      validate_explicit_location(qual, var, state, loc);
+      apply_explicit_location(qual, var, state, loc);
    } else if (qual->flags.q.explicit_index) {
       _mesa_glsl_error(loc, state, "explicit index requires explicit location");
    }

From 2bec154b479c0135c9be54bf15fc817b2be12dce Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 28 Oct 2015 16:26:15 -0700
Subject: [PATCH 050/335] i965: Implement ARB_pipeline_statistics_query
 tessellation counters.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We basically just need to uncomment Ben's code.

v2: Fix obvious bugs caught by Ben.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ben Widawsky <benjamin.widawsky@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/gen6_queryobj.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c
index 9f4a5db3592..d508c4c9278 100644
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -136,8 +136,8 @@ emit_pipeline_stat(struct brw_context *brw, drm_intel_bo *bo,
       IA_VERTICES_COUNT,   /* VERTICES_SUBMITTED */
       IA_PRIMITIVES_COUNT, /* PRIMITIVES_SUBMITTED */
       VS_INVOCATION_COUNT, /* VERTEX_SHADER_INVOCATIONS */
-      0, /* HS_INVOCATION_COUNT,*/  /* TESS_CONTROL_SHADER_PATCHES */
-      0, /* DS_INVOCATION_COUNT,*/  /* TESS_EVALUATION_SHADER_INVOCATIONS */
+      HS_INVOCATION_COUNT, /* TESS_CONTROL_SHADER_PATCHES */
+      DS_INVOCATION_COUNT, /* TESS_EVALUATION_SHADER_INVOCATIONS */
       GS_PRIMITIVES_COUNT, /* GEOMETRY_SHADER_PRIMITIVES_EMITTED */
       PS_INVOCATION_COUNT, /* FRAGMENT_SHADER_INVOCATIONS */
       CS_INVOCATION_COUNT, /* COMPUTE_SHADER_INVOCATIONS */
@@ -231,6 +231,8 @@ gen6_queryobj_get_results(struct gl_context *ctx,
    case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
    case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
    case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
       query->Base.Result = results[1] - results[0];
       break;
 
@@ -250,8 +252,6 @@ gen6_queryobj_get_results(struct gl_context *ctx,
          query->Base.Result /= 4;
       break;
 
-   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
-   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
    default:
       unreachable("Unrecognized query target in brw_queryobj_get_results()");
    }
@@ -329,11 +329,11 @@ gen6_begin_query(struct gl_context *ctx, struct gl_query_object *q)
    case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
    case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
    case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
       emit_pipeline_stat(brw, query->bo, query->Base.Stream, query->Base.Target, 0);
       break;
 
-   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
-   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
    default:
       unreachable("Unrecognized query target in brw_begin_query()");
    }
@@ -381,12 +381,12 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q)
    case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
    case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
    case GL_GEOMETRY_SHADER_INVOCATIONS:
+   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
       emit_pipeline_stat(brw, query->bo,
                          query->Base.Stream, query->Base.Target, 1);
       break;
 
-   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
-   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
    default:
       unreachable("Unrecognized query target in brw_end_query()");
    }

From c531d409274328c9713221f33f1d24e0f4877451 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Mon, 16 Nov 2015 17:23:01 -0800
Subject: [PATCH 051/335] i965: Add assertion for src_stencil payload size

This helps address a coverity warning and prevents future questions about this
code.

Reported-by: Coverity (via Ilia)
Cc: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Ben Widawsky <benjamin.widawsky@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 84b5920d4f5..995ab229544 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3603,6 +3603,12 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
       assert(devinfo->gen >= 9);
       assert(bld.dispatch_width() != 16);
 
+      /* XXX: src_stencil is only available on gen9+. dst_depth is never
+       * available on gen9+. As such it's impossible to have both enabled at the
+       * same time and therefore length cannot overrun the array.
+       */
+      assert(length < 15);
+
       sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD);
       bld.exec_all().annotate("FB write OS")
          .emit(FS_OPCODE_PACK_STENCIL_REF, sources[length],

From 27b1d344384ef89faf6d321bf4625d08ba6ff3bf Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 17 Nov 2015 16:31:14 -0800
Subject: [PATCH 052/335] i965: Fix PIPE_CONTOL typo.

PIPE_CONTOL!!!
---
 src/mesa/drivers/dri/i965/gen7_urb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c
index 69162171c4e..161de77e156 100644
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -118,7 +118,7 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
 
    /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS):
     *
-    *     A PIPE_CONTOL command with the CS Stall bit set must be programmed
+    *     A PIPE_CONTROL command with the CS Stall bit set must be programmed
     *     in the ring after this instruction.
     *
     * No such restriction exists for Haswell or Baytrail.

From a4bf28178f064082d3b818d2cd48abf9075cc459 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 10 Nov 2015 15:37:47 -0800
Subject: [PATCH 053/335] vc4: Add support for nir_op_uge, using the carry bit
 on QPU_A_SUB.

It looks like nir_lower_idiv is going to use it soon, so add support.
With Ilia's change, this fixes one case in fs-op-div-large-uint-uint (with
GL 3.0 forced on).

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/vc4/vc4_opt_algebraic.c | 2 ++
 src/gallium/drivers/vc4/vc4_program.c       | 4 ++++
 src/gallium/drivers/vc4/vc4_qir.c           | 8 ++++++++
 src/gallium/drivers/vc4/vc4_qir.h           | 8 ++++++++
 src/gallium/drivers/vc4/vc4_qpu_emit.c      | 4 ++++
 5 files changed, 26 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
index f1bab810eff..07a92266dd2 100644
--- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c
+++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
@@ -144,6 +144,8 @@ qir_opt_algebraic(struct vc4_compile *c)
                 case QOP_SEL_X_Y_ZC:
                 case QOP_SEL_X_Y_NS:
                 case QOP_SEL_X_Y_NC:
+                case QOP_SEL_X_Y_CS:
+                case QOP_SEL_X_Y_CC:
                         if (is_zero(c, inst->src[1])) {
                                 /* Replace references to a 0 uniform value
                                  * with the SEL_X_0 equivalent.
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index a48dad804e2..52317bd02af 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -987,6 +987,10 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
                 qir_SF(c, qir_SUB(c, src[0], src[1]));
                 *dest = qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
                 break;
+        case nir_op_uge:
+                qir_SF(c, qir_SUB(c, src[0], src[1]));
+                *dest = qir_SEL_X_0_CC(c, qir_uniform_ui(c, ~0));
+                break;
         case nir_op_ilt:
                 qir_SF(c, qir_SUB(c, src[0], src[1]));
                 *dest = qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 7894b081b19..f2855e159fc 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -69,10 +69,14 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true },
         [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true },
         [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1, false, true },
+        [QOP_SEL_X_0_CS] = { "fsel_x_0_cs", 1, 1, false, true },
+        [QOP_SEL_X_0_CC] = { "fsel_x_0_cc", 1, 1, false, true },
         [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2, false, true },
         [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2, false, true },
         [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2, false, true },
         [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2, false, true },
+        [QOP_SEL_X_Y_CS] = { "fsel_x_y_cs", 1, 2, false, true },
+        [QOP_SEL_X_Y_CC] = { "fsel_x_y_cc", 1, 2, false, true },
 
         [QOP_RCP] = { "rcp", 1, 1, false, true },
         [QOP_RSQ] = { "rsq", 1, 1, false, true },
@@ -218,10 +222,14 @@ qir_depends_on_flags(struct qinst *inst)
         case QOP_SEL_X_0_NC:
         case QOP_SEL_X_0_ZS:
         case QOP_SEL_X_0_ZC:
+        case QOP_SEL_X_0_CS:
+        case QOP_SEL_X_0_CC:
         case QOP_SEL_X_Y_NS:
         case QOP_SEL_X_Y_NC:
         case QOP_SEL_X_Y_ZS:
         case QOP_SEL_X_Y_ZC:
+        case QOP_SEL_X_Y_CS:
+        case QOP_SEL_X_Y_CC:
                 return true;
         default:
                 return false;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index a92ad93ee07..ddb35e41fcf 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -99,11 +99,15 @@ enum qop {
         QOP_SEL_X_0_ZC,
         QOP_SEL_X_0_NS,
         QOP_SEL_X_0_NC,
+        QOP_SEL_X_0_CS,
+        QOP_SEL_X_0_CC,
         /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */
         QOP_SEL_X_Y_ZS,
         QOP_SEL_X_Y_ZC,
         QOP_SEL_X_Y_NS,
         QOP_SEL_X_Y_NC,
+        QOP_SEL_X_Y_CS,
+        QOP_SEL_X_Y_CC,
 
         QOP_FTOI,
         QOP_ITOF,
@@ -567,10 +571,14 @@ QIR_ALU1(SEL_X_0_ZS)
 QIR_ALU1(SEL_X_0_ZC)
 QIR_ALU1(SEL_X_0_NS)
 QIR_ALU1(SEL_X_0_NC)
+QIR_ALU1(SEL_X_0_CS)
+QIR_ALU1(SEL_X_0_CC)
 QIR_ALU2(SEL_X_Y_ZS)
 QIR_ALU2(SEL_X_Y_ZC)
 QIR_ALU2(SEL_X_Y_NS)
 QIR_ALU2(SEL_X_Y_NC)
+QIR_ALU2(SEL_X_Y_CS)
+QIR_ALU2(SEL_X_Y_CC)
 QIR_ALU2(FMIN)
 QIR_ALU2(FMAX)
 QIR_ALU2(FMINABS)
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 133e1385178..e0d3633da42 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -311,6 +311,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                 case QOP_SEL_X_0_ZC:
                 case QOP_SEL_X_0_NS:
                 case QOP_SEL_X_0_NC:
+                case QOP_SEL_X_0_CS:
+                case QOP_SEL_X_0_CC:
                         queue(c, qpu_a_MOV(dst, src[0]) | unpack);
                         set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS +
                                           QPU_COND_ZS);
@@ -324,6 +326,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                 case QOP_SEL_X_Y_ZC:
                 case QOP_SEL_X_Y_NS:
                 case QOP_SEL_X_Y_NC:
+                case QOP_SEL_X_Y_CS:
+                case QOP_SEL_X_Y_CC:
                         queue(c, qpu_a_MOV(dst, src[0]));
                         if (qinst->src[0].pack)
                                 *(last_inst(c)) |= unpack;

From d18d1ba5877c234a2cf4e3e834df27f03c7b6a11 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 11 Nov 2015 16:50:29 -0800
Subject: [PATCH 054/335] vc4: Fix documentation on vc4_qir_lower_uniforms.c.

---
 src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
index f087c3b81b5..29e3ee31201 100644
--- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
@@ -22,14 +22,10 @@
  */
 
 /**
- * @file vc4_opt_algebraic.c
+ * @file vc4_qir_lower_uniforms.c
  *
- * This is the optimization pass for miscellaneous changes to instructions
- * where we can simplify the operation by some knowledge about the specific
- * operations.
- *
- * Mostly this will be a matter of turning things into MOVs so that they can
- * later be copy-propagated out.
+ * This is the pre-code-generation pass for fixing up instructions that try to
+ * read from multiple uniform values.
  */
 
 #include "vc4_qir.h"

From dffe7260cd5f9c70df4b817c26a4268ddad2475c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 16 Nov 2015 20:45:46 -0800
Subject: [PATCH 055/335] vc4: Fix uniform reordering to support reading the
 same uniform twice.

This does actually happen in the wild (particularly fabs of a uniform), so
we'd like to support it.
---
 .../drivers/vc4/vc4_reorder_uniforms.c        | 26 +++++++++++++------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c
index 7f11fba2340..85a0c95e851 100644
--- a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c
@@ -44,18 +44,28 @@ qir_reorder_uniforms(struct vc4_compile *c)
         uint32_t next_uniform = 0;
 
         list_for_each_entry(struct qinst, inst, &c->instructions, link) {
+                uint32_t new = ~0;
+
                 for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
                         if (inst->src[i].file != QFILE_UNIF)
                                 continue;
 
-                        uint32_t new = next_uniform++;
-                        if (uniform_index_size <= new) {
-                                uniform_index_size =
-                                        MAX2(uniform_index_size * 2, 16);
-                                uniform_index =
-                                        realloc(uniform_index,
-                                                uniform_index_size *
-                                                sizeof(uint32_t));
+                        if (new == ~0) {
+                                new = next_uniform++;
+                                if (uniform_index_size <= new) {
+                                        uniform_index_size =
+                                                MAX2(uniform_index_size * 2, 16);
+                                        uniform_index =
+                                                realloc(uniform_index,
+                                                        uniform_index_size *
+                                                        sizeof(uint32_t));
+                                }
+                        } else {
+                                /* If we've got two uniform references in this
+                                 * instruction, they need to be the same
+                                 * uniform value.
+                                 */
+                                assert(inst->src[i].index == uniform_index[new]);
                         }
 
                         uniform_index[new] = inst->src[i].index;

From dd05ffebfcb5d2e7ca44def1907aa2fbadd5e19d Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 11 Nov 2015 17:09:40 -0800
Subject: [PATCH 056/335] vc4: Don't bother lowering uniforms when the same
 value is used twice.

DEQP likes to do math on uniforms, and the "fmaxabs dst, uni, uni" to get
the absolute value would get lowered.  The lowering doesn't bother to try
to restrict the lifetime of the lowered uniforms, so we'd end up register
allocation failng due to this on 5 of the tests (More tests still fail in
RA, which look like we'll need to reduce lowered uniform lifetimes to
fix).

No changes on shader-db, though fewer extra MOVs are generated on even
glxgears (MOVs pair well enough that it ends up being the same instruction
count).
---
 .../drivers/vc4/vc4_qir_lower_uniforms.c      | 46 +++++++++++++------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
index 29e3ee31201..a57e100593c 100644
--- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
@@ -81,6 +81,33 @@ is_lowerable_uniform(struct qinst *inst, int i)
         return true;
 }
 
+/* Returns the number of different uniform values referenced by the
+ * instruction.
+ */
+static uint32_t
+qir_get_instruction_uniform_count(struct qinst *inst)
+{
+        uint32_t count = 0;
+
+        for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+                if (inst->src[i].file != QFILE_UNIF)
+                        continue;
+
+                bool is_duplicate = false;
+                for (int j = 0; j < i; j++) {
+                        if (inst->src[j].file == QFILE_UNIF &&
+                            inst->src[j].index == inst->src[i].index) {
+                                is_duplicate = true;
+                                break;
+                        }
+                }
+                if (!is_duplicate)
+                        count++;
+        }
+
+        return count;
+}
+
 void
 qir_lower_uniforms(struct vc4_compile *c)
 {
@@ -94,13 +121,7 @@ qir_lower_uniforms(struct vc4_compile *c)
         list_for_each_entry(struct qinst, inst, &c->instructions, link) {
                 uint32_t nsrc = qir_get_op_nsrc(inst->op);
 
-                uint32_t count = 0;
-                for (int i = 0; i < nsrc; i++) {
-                        if (inst->src[i].file == QFILE_UNIF)
-                                count++;
-                }
-
-                if (count <= 1)
+                if (qir_get_instruction_uniform_count(inst) <= 1)
                         continue;
 
                 for (int i = 0; i < nsrc; i++) {
@@ -136,23 +157,22 @@ qir_lower_uniforms(struct vc4_compile *c)
                 list_for_each_entry(struct qinst, inst, &c->instructions, link) {
                         uint32_t nsrc = qir_get_op_nsrc(inst->op);
 
-                        uint32_t count = 0;
-                        for (int i = 0; i < nsrc; i++) {
-                                if (inst->src[i].file == QFILE_UNIF)
-                                        count++;
-                        }
+                        uint32_t count = qir_get_instruction_uniform_count(inst);
 
                         if (count <= 1)
                                 continue;
 
+                        bool removed = false;
                         for (int i = 0; i < nsrc; i++) {
                                 if (is_lowerable_uniform(inst, i) &&
                                     inst->src[i].index == max_index) {
                                         inst->src[i] = temp;
                                         remove_uniform(ht, unif);
-                                        count--;
+                                        removed = true;
                                 }
                         }
+                        if (removed)
+                                count--;
 
                         /* If the instruction doesn't need lowering any more,
                          * then drop it from the list.

From 5dfb4dbc0583d2593e36f7eb27c77b81680db848 Mon Sep 17 00:00:00 2001
From: Neil Roberts <neil@linux.intel.com>
Date: Mon, 16 Nov 2015 14:03:11 +0100
Subject: [PATCH 057/335] i965: Prevent fast clears for MSRTs on SKL

There are currently a bunch of formats that behave strangely when
sampling the cleared color from the MCS buffer on SKL. They seem to
mostly be formats that don't have an alpha component, although it's
not all of them, and we haven't yet found anything in the specs which
would explain this. For now to be on the safe side this patch just
prevents fast clears for MSRTs on SKL altogether so that when fast
clears are eventually enabled it will only be for single-sampled
surfaces. The assumption is that clears are probably more likely to be
used in single-sampled applications anyway so we can at least get them
working and we can enable MSRTs later once we understand the problem
better.

This patch should have no functional effect other than perhaps
receiving fewer perf_debug messages on SKL+.

v2: Improve the commit message to avoid saying the patch disables fast
    clears because it will be merged before fast clears are enabled
    for any surfaces so it doesn't actually disable anything.
Reviewed-by: Ben Widawsky <benjamin.widawsky@intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index 12e7c32e424..211c0a44162 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -447,6 +447,13 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
       if (brw->gen < 7)
          clear_type = REP_CLEAR;
 
+      /* Certain formats have unresolved issues with sampling from the MCS
+       * buffer on Gen9. This disables fast clears altogether for MSRTs until
+       * we can figure out what's going on.
+       */
+      if (brw->gen >= 9 && irb->mt->num_samples > 1)
+         clear_type = REP_CLEAR;
+
       if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_NO_MCS)
          clear_type = REP_CLEAR;
 

From e117e74baf0e0482fd4c3c2fa412e7168889d286 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Fri, 6 Nov 2015 12:52:51 +0100
Subject: [PATCH 058/335] radeon: move get_driver_query_info to r600_query.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 46 +----------------
 src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
 src/gallium/drivers/radeon/r600_query.c       | 49 +++++++++++++++++++
 3 files changed, 51 insertions(+), 45 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 3599692a857..9cb30c753cc 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -718,50 +718,6 @@ static uint64_t r600_get_timestamp(struct pipe_screen *screen)
 			rscreen->info.r600_clock_crystal_freq;
 }
 
-static int r600_get_driver_query_info(struct pipe_screen *screen,
-				      unsigned index,
-				      struct pipe_driver_query_info *info)
-{
-	struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
-	struct pipe_driver_query_info list[] = {
-		{"num-compilations", R600_QUERY_NUM_COMPILATIONS, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64,
-		 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-		{"num-shaders-created", R600_QUERY_NUM_SHADERS_CREATED, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64,
-		 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-		{"draw-calls", R600_QUERY_DRAW_CALLS, {0}},
-		{"requested-VRAM", R600_QUERY_REQUESTED_VRAM, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-		{"requested-GTT", R600_QUERY_REQUESTED_GTT, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-		{"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}, PIPE_DRIVER_QUERY_TYPE_MICROSECONDS,
-		 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-		{"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}},
-		{"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES,
-		 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-		{"VRAM-usage", R600_QUERY_VRAM_USAGE, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-		{"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-		{"GPU-load", R600_QUERY_GPU_LOAD, {100}},
-		{"temperature", R600_QUERY_GPU_TEMPERATURE, {125}},
-		{"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
-		{"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
-	};
-	unsigned num_queries;
-
-	if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
-		num_queries = Elements(list);
-	else if (rscreen->info.drm_major == 3)
-		num_queries = Elements(list) - 3;
-	else
-		num_queries = Elements(list) - 4;
-
-	if (!info)
-		return num_queries;
-
-	if (index >= num_queries)
-		return 0;
-
-	*info = list[index];
-	return 1;
-}
-
 static void r600_fence_reference(struct pipe_screen *screen,
 				 struct pipe_fence_handle **dst,
 				 struct pipe_fence_handle *src)
@@ -949,7 +905,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
 	rscreen->b.get_device_vendor = r600_get_device_vendor;
 	rscreen->b.get_compute_param = r600_get_compute_param;
 	rscreen->b.get_paramf = r600_get_paramf;
-	rscreen->b.get_driver_query_info = r600_get_driver_query_info;
 	rscreen->b.get_timestamp = r600_get_timestamp;
 	rscreen->b.fence_finish = r600_fence_finish;
 	rscreen->b.fence_reference = r600_fence_reference;
@@ -965,6 +920,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
 	}
 
 	r600_init_screen_texture_functions(rscreen);
+	r600_init_screen_query_functions(rscreen);
 
 	rscreen->ws = ws;
 	rscreen->family = rscreen->info.family;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index ebe633b9125..d945e53cbc3 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -526,6 +526,7 @@ uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen);
 unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin);
 
 /* r600_query.c */
+void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
 void r600_query_init(struct r600_common_context *rctx);
 void r600_suspend_nontimer_queries(struct r600_common_context *ctx);
 void r600_resume_nontimer_queries(struct r600_common_context *ctx);
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 8c2b601a96c..780e2e13a89 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -1002,6 +1002,50 @@ err:
 	return;
 }
 
+static int r600_get_driver_query_info(struct pipe_screen *screen,
+				      unsigned index,
+				      struct pipe_driver_query_info *info)
+{
+	struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+	struct pipe_driver_query_info list[] = {
+		{"num-compilations", R600_QUERY_NUM_COMPILATIONS, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64,
+		 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
+		{"num-shaders-created", R600_QUERY_NUM_SHADERS_CREATED, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64,
+		 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
+		{"draw-calls", R600_QUERY_DRAW_CALLS, {0}},
+		{"requested-VRAM", R600_QUERY_REQUESTED_VRAM, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
+		{"requested-GTT", R600_QUERY_REQUESTED_GTT, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
+		{"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}, PIPE_DRIVER_QUERY_TYPE_MICROSECONDS,
+		 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
+		{"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}},
+		{"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES,
+		 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
+		{"VRAM-usage", R600_QUERY_VRAM_USAGE, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
+		{"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
+		{"GPU-load", R600_QUERY_GPU_LOAD, {100}},
+		{"temperature", R600_QUERY_GPU_TEMPERATURE, {125}},
+		{"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
+		{"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
+	};
+	unsigned num_queries;
+
+	if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
+		num_queries = Elements(list);
+	else if (rscreen->info.drm_major == 3)
+		num_queries = Elements(list) - 3;
+	else
+		num_queries = Elements(list) - 4;
+
+	if (!info)
+		return num_queries;
+
+	if (index >= num_queries)
+		return 0;
+
+	*info = list[index];
+	return 1;
+}
+
 void r600_query_init(struct r600_common_context *rctx)
 {
 	rctx->b.create_query = r600_create_query;
@@ -1017,3 +1061,8 @@ void r600_query_init(struct r600_common_context *rctx)
 	LIST_INITHEAD(&rctx->active_nontimer_queries);
 	LIST_INITHEAD(&rctx->active_timer_queries);
 }
+
+void r600_init_screen_query_functions(struct r600_common_screen *rscreen)
+{
+	rscreen->b.get_driver_query_info = r600_get_driver_query_info;
+}

From c56e83e518cf5aa852c746f49023256e2c092ae2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Tue, 10 Nov 2015 20:42:02 +0100
Subject: [PATCH 059/335] radeon: cleanup driver query list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/drivers/radeon/r600_query.c | 84 ++++++++++++++++---------
 1 file changed, 55 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 780e2e13a89..8feb8ea0bd0 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -1002,39 +1002,50 @@ err:
 	return;
 }
 
+#define X(name_, query_type_, type_, result_type_) \
+	{ \
+		.name = name_, \
+		.query_type = R600_QUERY_##query_type_, \
+		.type = PIPE_DRIVER_QUERY_TYPE_##type_, \
+		.result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
+		.group_id = ~(unsigned)0 \
+	}
+
+static struct pipe_driver_query_info r600_driver_query_list[] = {
+	X("num-compilations",		NUM_COMPILATIONS,	UINT64, CUMULATIVE),
+	X("num-shaders-created",	NUM_SHADERS_CREATED,	UINT64, CUMULATIVE),
+	X("draw-calls",			DRAW_CALLS,		UINT64, CUMULATIVE),
+	X("requested-VRAM",		REQUESTED_VRAM,		BYTES, AVERAGE),
+	X("requested-GTT",		REQUESTED_GTT,		BYTES, AVERAGE),
+	X("buffer-wait-time",		BUFFER_WAIT_TIME,	MICROSECONDS, CUMULATIVE),
+	X("num-cs-flushes",		NUM_CS_FLUSHES,		UINT64, CUMULATIVE),
+	X("num-bytes-moved",		NUM_BYTES_MOVED,	BYTES, CUMULATIVE),
+	X("VRAM-usage",			VRAM_USAGE,		BYTES, AVERAGE),
+	X("GTT-usage",			GTT_USAGE,		BYTES, AVERAGE),
+	X("GPU-load",			GPU_LOAD,		UINT64, AVERAGE),
+	X("temperature",		GPU_TEMPERATURE,	UINT64, AVERAGE),
+	X("shader-clock",		CURRENT_GPU_SCLK,	HZ, AVERAGE),
+	X("memory-clock",		CURRENT_GPU_MCLK,	HZ, AVERAGE),
+};
+
+#undef X
+
+static unsigned r600_get_num_queries(struct r600_common_screen *rscreen)
+{
+	if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
+		return Elements(r600_driver_query_list);
+	else if (rscreen->info.drm_major == 3)
+		return Elements(r600_driver_query_list) - 3;
+	else
+		return Elements(r600_driver_query_list) - 4;
+}
+
 static int r600_get_driver_query_info(struct pipe_screen *screen,
 				      unsigned index,
 				      struct pipe_driver_query_info *info)
 {
 	struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
-	struct pipe_driver_query_info list[] = {
-		{"num-compilations", R600_QUERY_NUM_COMPILATIONS, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64,
-		 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-		{"num-shaders-created", R600_QUERY_NUM_SHADERS_CREATED, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64,
-		 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-		{"draw-calls", R600_QUERY_DRAW_CALLS, {0}},
-		{"requested-VRAM", R600_QUERY_REQUESTED_VRAM, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-		{"requested-GTT", R600_QUERY_REQUESTED_GTT, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-		{"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}, PIPE_DRIVER_QUERY_TYPE_MICROSECONDS,
-		 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-		{"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}},
-		{"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES,
-		 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-		{"VRAM-usage", R600_QUERY_VRAM_USAGE, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-		{"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-		{"GPU-load", R600_QUERY_GPU_LOAD, {100}},
-		{"temperature", R600_QUERY_GPU_TEMPERATURE, {125}},
-		{"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
-		{"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
-	};
-	unsigned num_queries;
-
-	if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
-		num_queries = Elements(list);
-	else if (rscreen->info.drm_major == 3)
-		num_queries = Elements(list) - 3;
-	else
-		num_queries = Elements(list) - 4;
+	unsigned num_queries = r600_get_num_queries(rscreen);
 
 	if (!info)
 		return num_queries;
@@ -1042,7 +1053,22 @@ static int r600_get_driver_query_info(struct pipe_screen *screen,
 	if (index >= num_queries)
 		return 0;
 
-	*info = list[index];
+	*info = r600_driver_query_list[index];
+
+	switch (info->query_type) {
+	case R600_QUERY_REQUESTED_VRAM:
+	case R600_QUERY_VRAM_USAGE:
+		info->max_value.u64 = rscreen->info.vram_size;
+		break;
+	case R600_QUERY_REQUESTED_GTT:
+	case R600_QUERY_GTT_USAGE:
+		info->max_value.u64 = rscreen->info.gart_size;
+		break;
+	case R600_QUERY_GPU_TEMPERATURE:
+		info->max_value.u64 = 125;
+		break;
+	}
+
 	return 1;
 }
 

From 50cab4788d664769ffc8547d8e759e92d14cf5e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Thu, 12 Nov 2015 22:04:50 +0100
Subject: [PATCH 060/335] radeon: move R600_QUERY_* constants into a new query
 header file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

More query-related structures will have to be moved into their own
header file to support hardware-specific performance counters.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/drivers/radeon/Makefile.sources   |  1 +
 src/gallium/drivers/radeon/r600_pipe_common.h | 15 ------
 src/gallium/drivers/radeon/r600_query.c       |  1 +
 src/gallium/drivers/radeon/r600_query.h       | 49 +++++++++++++++++++
 4 files changed, 51 insertions(+), 15 deletions(-)
 create mode 100644 src/gallium/drivers/radeon/r600_query.h

diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources
index f63790c329e..d840ff8ca54 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -7,6 +7,7 @@ C_SOURCES := \
 	r600_pipe_common.c \
 	r600_pipe_common.h \
 	r600_query.c \
+	r600_query.h \
 	r600_streamout.c \
 	r600_texture.c \
 	radeon_uvd.c \
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index d945e53cbc3..aa047119cb7 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -47,21 +47,6 @@
 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH	(PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
 #define R600_RESOURCE_FLAG_FORCE_TILING		(PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
 
-#define R600_QUERY_DRAW_CALLS		(PIPE_QUERY_DRIVER_SPECIFIC + 0)
-#define R600_QUERY_REQUESTED_VRAM	(PIPE_QUERY_DRIVER_SPECIFIC + 1)
-#define R600_QUERY_REQUESTED_GTT	(PIPE_QUERY_DRIVER_SPECIFIC + 2)
-#define R600_QUERY_BUFFER_WAIT_TIME	(PIPE_QUERY_DRIVER_SPECIFIC + 3)
-#define R600_QUERY_NUM_CS_FLUSHES	(PIPE_QUERY_DRIVER_SPECIFIC + 4)
-#define R600_QUERY_NUM_BYTES_MOVED	(PIPE_QUERY_DRIVER_SPECIFIC + 5)
-#define R600_QUERY_VRAM_USAGE		(PIPE_QUERY_DRIVER_SPECIFIC + 6)
-#define R600_QUERY_GTT_USAGE		(PIPE_QUERY_DRIVER_SPECIFIC + 7)
-#define R600_QUERY_GPU_TEMPERATURE	(PIPE_QUERY_DRIVER_SPECIFIC + 8)
-#define R600_QUERY_CURRENT_GPU_SCLK	(PIPE_QUERY_DRIVER_SPECIFIC + 9)
-#define R600_QUERY_CURRENT_GPU_MCLK	(PIPE_QUERY_DRIVER_SPECIFIC + 10)
-#define R600_QUERY_GPU_LOAD		(PIPE_QUERY_DRIVER_SPECIFIC + 11)
-#define R600_QUERY_NUM_COMPILATIONS	(PIPE_QUERY_DRIVER_SPECIFIC + 12)
-#define R600_QUERY_NUM_SHADERS_CREATED	(PIPE_QUERY_DRIVER_SPECIFIC + 13)
-
 #define R600_CONTEXT_STREAMOUT_FLUSH		(1u << 0)
 #define R600_CONTEXT_PRIVATE_FLAG		(1u << 1)
 
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 8feb8ea0bd0..3b58e00bd27 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -22,6 +22,7 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include "r600_query.h"
 #include "r600_cs.h"
 #include "util/u_memory.h"
 
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
new file mode 100644
index 00000000000..fc8b47b19af
--- /dev/null
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *  Nicolai Hähnle <nicolai.haehnle@amd.com>
+ *
+ */
+
+#ifndef R600_QUERY_H
+#define R600_QUERY_H
+
+#include "pipe/p_defines.h"
+
+#define R600_QUERY_DRAW_CALLS		(PIPE_QUERY_DRIVER_SPECIFIC + 0)
+#define R600_QUERY_REQUESTED_VRAM	(PIPE_QUERY_DRIVER_SPECIFIC + 1)
+#define R600_QUERY_REQUESTED_GTT	(PIPE_QUERY_DRIVER_SPECIFIC + 2)
+#define R600_QUERY_BUFFER_WAIT_TIME	(PIPE_QUERY_DRIVER_SPECIFIC + 3)
+#define R600_QUERY_NUM_CS_FLUSHES	(PIPE_QUERY_DRIVER_SPECIFIC + 4)
+#define R600_QUERY_NUM_BYTES_MOVED	(PIPE_QUERY_DRIVER_SPECIFIC + 5)
+#define R600_QUERY_VRAM_USAGE		(PIPE_QUERY_DRIVER_SPECIFIC + 6)
+#define R600_QUERY_GTT_USAGE		(PIPE_QUERY_DRIVER_SPECIFIC + 7)
+#define R600_QUERY_GPU_TEMPERATURE	(PIPE_QUERY_DRIVER_SPECIFIC + 8)
+#define R600_QUERY_CURRENT_GPU_SCLK	(PIPE_QUERY_DRIVER_SPECIFIC + 9)
+#define R600_QUERY_CURRENT_GPU_MCLK	(PIPE_QUERY_DRIVER_SPECIFIC + 10)
+#define R600_QUERY_GPU_LOAD		(PIPE_QUERY_DRIVER_SPECIFIC + 11)
+#define R600_QUERY_NUM_COMPILATIONS	(PIPE_QUERY_DRIVER_SPECIFIC + 12)
+#define R600_QUERY_NUM_SHADERS_CREATED	(PIPE_QUERY_DRIVER_SPECIFIC + 13)
+#define R600_QUERY_FIRST_PERFCOUNTER	(PIPE_QUERY_DRIVER_SPECIFIC + 100)
+
+#endif /* R600_QUERY_H */

From 829a9808a98f5b53576c6c8bc5da53182d58e1e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Wed, 18 Nov 2015 11:40:00 +0100
Subject: [PATCH 061/335] radeon: add query handler function pointers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The goal here is to be able to move the implementation details of hardware-
specific queries (in particular, performance counters) out of the common code.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
[Fixed a rebase conflict and re-tested before pushing.]
---
 src/gallium/drivers/radeon/r600_query.c | 57 ++++++++++++++++++++++---
 src/gallium/drivers/radeon/r600_query.h | 12 ++++++
 2 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 3b58e00bd27..409d7803562 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -26,7 +26,6 @@
 #include "r600_cs.h"
 #include "util/u_memory.h"
 
-
 struct r600_query_buffer {
 	/* The buffer where query results are stored. */
 	struct r600_resource			*buf;
@@ -39,6 +38,8 @@ struct r600_query_buffer {
 };
 
 struct r600_query {
+	struct r600_query_ops *ops;
+
 	/* The query buffer and how many results are in it. */
 	struct r600_query_buffer		buffer;
 	/* The type of query */
@@ -59,6 +60,19 @@ struct r600_query {
 	unsigned stream;
 };
 
+static void r600_do_destroy_query(struct r600_common_context *, struct r600_query *);
+static boolean r600_do_begin_query(struct r600_common_context *, struct r600_query *);
+static void r600_do_end_query(struct r600_common_context *, struct r600_query *);
+static boolean r600_do_get_query_result(struct r600_common_context *,
+					struct r600_query *, boolean wait,
+					union pipe_query_result *result);
+
+static struct r600_query_ops legacy_query_ops = {
+	.destroy = r600_do_destroy_query,
+	.begin = r600_do_begin_query,
+	.end = r600_do_end_query,
+	.get_result = r600_do_get_query_result,
+};
 
 static bool r600_is_timer_query(unsigned type)
 {
@@ -373,6 +387,7 @@ static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned q
 		return NULL;
 
 	query->type = query_type;
+	query->ops = &legacy_query_ops;
 
 	switch (query_type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -380,7 +395,6 @@ static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned q
 		query->result_size = 16 * rctx->max_db;
 		query->num_cs_dw = 6;
 		break;
-		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		query->result_size = 16;
 		query->num_cs_dw = 8;
@@ -440,7 +454,15 @@ static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned q
 
 static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
 {
-	struct r600_query *rquery = (struct r600_query*)query;
+	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+	struct r600_query *rquery = (struct r600_query *)query;
+
+	rquery->ops->destroy(rctx, rquery);
+}
+
+static void r600_do_destroy_query(struct r600_common_context *rctx,
+				  struct r600_query *rquery)
+{
 	struct r600_query_buffer *prev = rquery->buffer.previous;
 
 	/* Release all query buffers. */
@@ -452,7 +474,7 @@ static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *quer
 	}
 
 	pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
-	FREE(query);
+	FREE(rquery);
 }
 
 static boolean r600_begin_query(struct pipe_context *ctx,
@@ -460,6 +482,13 @@ static boolean r600_begin_query(struct pipe_context *ctx,
 {
 	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
 	struct r600_query *rquery = (struct r600_query *)query;
+
+	return rquery->ops->begin(rctx, rquery);
+}
+
+static boolean r600_do_begin_query(struct r600_common_context *rctx,
+				   struct r600_query *rquery)
+{
 	struct r600_query_buffer *prev = rquery->buffer.previous;
 
 	if (!r600_query_needs_begin(rquery->type)) {
@@ -535,12 +564,18 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
 	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
 	struct r600_query *rquery = (struct r600_query *)query;
 
+	rquery->ops->end(rctx, rquery);
+}
+
+static void r600_do_end_query(struct r600_common_context *rctx,
+			      struct r600_query *rquery)
+{
 	/* Non-GPU queries. */
 	switch (rquery->type) {
 	case PIPE_QUERY_TIMESTAMP_DISJOINT:
 		return;
 	case PIPE_QUERY_GPU_FINISHED:
-		ctx->flush(ctx, &rquery->fence, 0);
+		rctx->b.flush(&rctx->b, &rquery->fence, 0);
 		return;
 	case R600_QUERY_DRAW_CALLS:
 		rquery->end_result = rctx->num_draw_calls;
@@ -799,11 +834,19 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
 }
 
 static boolean r600_get_query_result(struct pipe_context *ctx,
-					struct pipe_query *query,
-					boolean wait, union pipe_query_result *result)
+				     struct pipe_query *query, boolean wait,
+				     union pipe_query_result *result)
 {
 	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
 	struct r600_query *rquery = (struct r600_query *)query;
+
+	return rquery->ops->get_result(rctx, rquery, wait, result);
+}
+
+static boolean r600_do_get_query_result(struct r600_common_context *rctx,
+					struct r600_query *rquery,
+					boolean wait, union pipe_query_result *result)
+{
 	struct r600_query_buffer *qbuf;
 
 	util_query_clear_result(result, rquery->type);
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index fc8b47b19af..6d568d6fd3b 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -30,6 +30,9 @@
 
 #include "pipe/p_defines.h"
 
+struct r600_common_context;
+struct r600_query;
+
 #define R600_QUERY_DRAW_CALLS		(PIPE_QUERY_DRIVER_SPECIFIC + 0)
 #define R600_QUERY_REQUESTED_VRAM	(PIPE_QUERY_DRIVER_SPECIFIC + 1)
 #define R600_QUERY_REQUESTED_GTT	(PIPE_QUERY_DRIVER_SPECIFIC + 2)
@@ -46,4 +49,13 @@
 #define R600_QUERY_NUM_SHADERS_CREATED	(PIPE_QUERY_DRIVER_SPECIFIC + 13)
 #define R600_QUERY_FIRST_PERFCOUNTER	(PIPE_QUERY_DRIVER_SPECIFIC + 100)
 
+struct r600_query_ops {
+	void (*destroy)(struct r600_common_context *, struct r600_query *);
+	boolean (*begin)(struct r600_common_context *, struct r600_query *);
+	void (*end)(struct r600_common_context *, struct r600_query *);
+	boolean (*get_result)(struct r600_common_context *,
+			      struct r600_query *, boolean wait,
+			      union pipe_query_result *result);
+};
+
 #endif /* R600_QUERY_H */

From 019106760d16a7badf2d3a7034b4281b6cf2c27f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Wed, 18 Nov 2015 11:55:09 +0100
Subject: [PATCH 062/335] radeon: convert software queries to the new style
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Software queries are all queries that do not require suspend/resume
and explicit handling of result buffers.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
[Fixed a rebase conflict and re-tested before pushing.]
---
 src/gallium/drivers/radeon/r600_query.c | 366 +++++++++++++-----------
 1 file changed, 194 insertions(+), 172 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 409d7803562..928a55f6155 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -51,15 +51,195 @@ struct r600_query {
 	unsigned				num_cs_dw;
 	/* linked list of queries */
 	struct list_head			list;
-	/* for custom non-GPU queries */
+	/* For transform feedback: which stream the query is for */
+	unsigned stream;
+};
+
+/* Queries without buffer handling or suspend/resume. */
+struct r600_query_sw {
+	struct r600_query b;
+
 	uint64_t begin_result;
 	uint64_t end_result;
 	/* Fence for GPU_FINISHED. */
 	struct pipe_fence_handle *fence;
-	/* For transform feedback: which stream the query is for */
-	unsigned stream;
 };
 
+static void r600_query_sw_destroy(struct r600_common_context *rctx,
+				  struct r600_query *rquery)
+{
+	struct pipe_screen *screen = rctx->b.screen;
+	struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+	screen->fence_reference(screen, &query->fence, NULL);
+	FREE(query);
+}
+
+static enum radeon_value_id winsys_id_from_type(unsigned type)
+{
+	switch (type) {
+	case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY;
+	case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY;
+	case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
+	case R600_QUERY_NUM_CS_FLUSHES: return RADEON_NUM_CS_FLUSHES;
+	case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
+	case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
+	case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
+	case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
+	case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
+	case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
+	default: unreachable("query type does not correspond to winsys id");
+	}
+}
+
+static boolean r600_query_sw_begin(struct r600_common_context *rctx,
+				   struct r600_query *rquery)
+{
+	struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+	switch(query->b.type) {
+	case PIPE_QUERY_TIMESTAMP_DISJOINT:
+	case PIPE_QUERY_GPU_FINISHED:
+		break;
+	case R600_QUERY_DRAW_CALLS:
+		query->begin_result = rctx->num_draw_calls;
+		break;
+	case R600_QUERY_REQUESTED_VRAM:
+	case R600_QUERY_REQUESTED_GTT:
+	case R600_QUERY_VRAM_USAGE:
+	case R600_QUERY_GTT_USAGE:
+	case R600_QUERY_GPU_TEMPERATURE:
+	case R600_QUERY_CURRENT_GPU_SCLK:
+	case R600_QUERY_CURRENT_GPU_MCLK:
+		query->begin_result = 0;
+		break;
+	case R600_QUERY_BUFFER_WAIT_TIME:
+	case R600_QUERY_NUM_CS_FLUSHES:
+	case R600_QUERY_NUM_BYTES_MOVED: {
+		enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
+		query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
+		break;
+	}
+	case R600_QUERY_GPU_LOAD:
+		query->begin_result = r600_gpu_load_begin(rctx->screen);
+		break;
+	case R600_QUERY_NUM_COMPILATIONS:
+		query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
+		break;
+	case R600_QUERY_NUM_SHADERS_CREATED:
+		query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
+		break;
+	default:
+		unreachable("r600_query_sw_begin: bad query type");
+	}
+
+	return TRUE;
+}
+
+static void r600_query_sw_end(struct r600_common_context *rctx,
+			      struct r600_query *rquery)
+{
+	struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+	switch(query->b.type) {
+	case PIPE_QUERY_TIMESTAMP_DISJOINT:
+		break;
+	case PIPE_QUERY_GPU_FINISHED:
+		rctx->b.flush(&rctx->b, &query->fence, 0);
+		break;
+	case R600_QUERY_DRAW_CALLS:
+		query->begin_result = rctx->num_draw_calls;
+		break;
+	case R600_QUERY_REQUESTED_VRAM:
+	case R600_QUERY_REQUESTED_GTT:
+	case R600_QUERY_VRAM_USAGE:
+	case R600_QUERY_GTT_USAGE:
+	case R600_QUERY_GPU_TEMPERATURE:
+	case R600_QUERY_CURRENT_GPU_SCLK:
+	case R600_QUERY_CURRENT_GPU_MCLK:
+	case R600_QUERY_BUFFER_WAIT_TIME:
+	case R600_QUERY_NUM_CS_FLUSHES:
+	case R600_QUERY_NUM_BYTES_MOVED: {
+		enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
+		query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
+		break;
+	}
+	case R600_QUERY_GPU_LOAD:
+		query->end_result = r600_gpu_load_end(rctx->screen,
+						      query->begin_result);
+		query->begin_result = 0;
+		break;
+	case R600_QUERY_NUM_COMPILATIONS:
+		query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
+		break;
+	case R600_QUERY_NUM_SHADERS_CREATED:
+		query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
+		break;
+	default:
+		unreachable("r600_query_sw_end: bad query type");
+	}
+}
+
+static boolean r600_query_sw_get_result(struct r600_common_context *rctx,
+					struct r600_query *rquery,
+					boolean wait,
+					union pipe_query_result *result)
+{
+	struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+	switch (query->b.type) {
+	case PIPE_QUERY_TIMESTAMP_DISJOINT:
+		/* Convert from cycles per millisecond to cycles per second (Hz). */
+		result->timestamp_disjoint.frequency =
+			(uint64_t)rctx->screen->info.r600_clock_crystal_freq * 1000;
+		result->timestamp_disjoint.disjoint = FALSE;
+		return TRUE;
+	case PIPE_QUERY_GPU_FINISHED: {
+		struct pipe_screen *screen = rctx->b.screen;
+		result->b = screen->fence_finish(screen, query->fence,
+						 wait ? PIPE_TIMEOUT_INFINITE : 0);
+		return result->b;
+	}
+	}
+
+	result->u64 = query->end_result - query->begin_result;
+
+	switch (query->b.type) {
+	case R600_QUERY_BUFFER_WAIT_TIME:
+	case R600_QUERY_GPU_TEMPERATURE:
+		result->u64 /= 1000;
+		break;
+	case R600_QUERY_CURRENT_GPU_SCLK:
+	case R600_QUERY_CURRENT_GPU_MCLK:
+		result->u64 *= 1000000;
+		break;
+	}
+
+	return TRUE;
+}
+
+static struct r600_query_ops sw_query_ops = {
+	.destroy = r600_query_sw_destroy,
+	.begin = r600_query_sw_begin,
+	.end = r600_query_sw_end,
+	.get_result = r600_query_sw_get_result
+};
+
+static struct pipe_query *r600_query_sw_create(struct pipe_context *ctx,
+					       unsigned query_type)
+{
+	struct r600_query_sw *query;
+
+	query = CALLOC_STRUCT(r600_query_sw);
+	if (query == NULL)
+		return NULL;
+
+	query->b.type = query_type;
+	query->b.ops = &sw_query_ops;
+
+	return (struct pipe_query *)query;
+}
+
 static void r600_do_destroy_query(struct r600_common_context *, struct r600_query *);
 static boolean r600_do_begin_query(struct r600_common_context *, struct r600_query *);
 static void r600_do_end_query(struct r600_common_context *, struct r600_query *);
@@ -82,8 +262,7 @@ static bool r600_is_timer_query(unsigned type)
 
 static bool r600_query_needs_begin(unsigned type)
 {
-	return type != PIPE_QUERY_GPU_FINISHED &&
-	       type != PIPE_QUERY_TIMESTAMP;
+	return type != PIPE_QUERY_TIMESTAMP;
 }
 
 static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx, unsigned type)
@@ -91,27 +270,6 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
 	unsigned j, i, num_results, buf_size = 4096;
 	uint32_t *results;
 
-	/* Non-GPU queries. */
-	switch (type) {
-	case PIPE_QUERY_TIMESTAMP_DISJOINT:
-	case PIPE_QUERY_GPU_FINISHED:
-	case R600_QUERY_DRAW_CALLS:
-	case R600_QUERY_REQUESTED_VRAM:
-	case R600_QUERY_REQUESTED_GTT:
-	case R600_QUERY_BUFFER_WAIT_TIME:
-	case R600_QUERY_NUM_CS_FLUSHES:
-	case R600_QUERY_NUM_BYTES_MOVED:
-	case R600_QUERY_VRAM_USAGE:
-	case R600_QUERY_GTT_USAGE:
-	case R600_QUERY_GPU_TEMPERATURE:
-	case R600_QUERY_CURRENT_GPU_SCLK:
-	case R600_QUERY_CURRENT_GPU_MCLK:
-	case R600_QUERY_GPU_LOAD:
-	case R600_QUERY_NUM_COMPILATIONS:
-	case R600_QUERY_NUM_SHADERS_CREATED:
-		return NULL;
-	}
-
 	/* Queries are normally read by the CPU after
 	 * being written by the gpu, hence staging is probably a good
 	 * usage pattern.
@@ -380,7 +538,11 @@ static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned q
 {
 	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
 	struct r600_query *query;
-	bool skip_allocation = false;
+
+	if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT ||
+	    query_type == PIPE_QUERY_GPU_FINISHED ||
+	    query_type >= PIPE_QUERY_DRIVER_SPECIFIC)
+		return r600_query_sw_create(ctx, query_type);
 
 	query = CALLOC_STRUCT(r600_query);
 	if (query == NULL)
@@ -417,38 +579,18 @@ static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned q
 		query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16;
 		query->num_cs_dw = 6;
 		break;
-	/* Non-GPU queries and queries not requiring a buffer. */
-	case PIPE_QUERY_TIMESTAMP_DISJOINT:
-	case PIPE_QUERY_GPU_FINISHED:
-	case R600_QUERY_DRAW_CALLS:
-	case R600_QUERY_REQUESTED_VRAM:
-	case R600_QUERY_REQUESTED_GTT:
-	case R600_QUERY_BUFFER_WAIT_TIME:
-	case R600_QUERY_NUM_CS_FLUSHES:
-	case R600_QUERY_NUM_BYTES_MOVED:
-	case R600_QUERY_VRAM_USAGE:
-	case R600_QUERY_GTT_USAGE:
-	case R600_QUERY_GPU_TEMPERATURE:
-	case R600_QUERY_CURRENT_GPU_SCLK:
-	case R600_QUERY_CURRENT_GPU_MCLK:
-	case R600_QUERY_GPU_LOAD:
-	case R600_QUERY_NUM_COMPILATIONS:
-	case R600_QUERY_NUM_SHADERS_CREATED:
-		skip_allocation = true;
-		break;
 	default:
 		assert(0);
 		FREE(query);
 		return NULL;
 	}
 
-	if (!skip_allocation) {
-		query->buffer.buf = r600_new_query_buffer(rctx, query_type);
-		if (!query->buffer.buf) {
-			FREE(query);
-			return NULL;
-		}
+	query->buffer.buf = r600_new_query_buffer(rctx, query_type);
+	if (!query->buffer.buf) {
+		FREE(query);
+		return NULL;
 	}
+
 	return (struct pipe_query*)query;
 }
 
@@ -496,42 +638,6 @@ static boolean r600_do_begin_query(struct r600_common_context *rctx,
 		return false;
 	}
 
-	/* Non-GPU queries. */
-	switch (rquery->type) {
-	case PIPE_QUERY_TIMESTAMP_DISJOINT:
-		return true;
-	case R600_QUERY_DRAW_CALLS:
-		rquery->begin_result = rctx->num_draw_calls;
-		return true;
-	case R600_QUERY_REQUESTED_VRAM:
-	case R600_QUERY_REQUESTED_GTT:
-	case R600_QUERY_VRAM_USAGE:
-	case R600_QUERY_GTT_USAGE:
-	case R600_QUERY_GPU_TEMPERATURE:
-	case R600_QUERY_CURRENT_GPU_SCLK:
-	case R600_QUERY_CURRENT_GPU_MCLK:
-		rquery->begin_result = 0;
-		return true;
-	case R600_QUERY_BUFFER_WAIT_TIME:
-		rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000;
-		return true;
-	case R600_QUERY_NUM_CS_FLUSHES:
-		rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
-		return true;
-	case R600_QUERY_NUM_BYTES_MOVED:
-		rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED);
-		return true;
-	case R600_QUERY_GPU_LOAD:
-		rquery->begin_result = r600_gpu_load_begin(rctx->screen);
-		return true;
-	case R600_QUERY_NUM_COMPILATIONS:
-		rquery->begin_result = p_atomic_read(&rctx->screen->num_compilations);
-		return true;
-	case R600_QUERY_NUM_SHADERS_CREATED:
-		rquery->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
-		return true;
-	}
-
 	/* Discard the old query buffers. */
 	while (prev) {
 		struct r600_query_buffer *qbuf = prev;
@@ -570,57 +676,6 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
 static void r600_do_end_query(struct r600_common_context *rctx,
 			      struct r600_query *rquery)
 {
-	/* Non-GPU queries. */
-	switch (rquery->type) {
-	case PIPE_QUERY_TIMESTAMP_DISJOINT:
-		return;
-	case PIPE_QUERY_GPU_FINISHED:
-		rctx->b.flush(&rctx->b, &rquery->fence, 0);
-		return;
-	case R600_QUERY_DRAW_CALLS:
-		rquery->end_result = rctx->num_draw_calls;
-		return;
-	case R600_QUERY_REQUESTED_VRAM:
-		rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_VRAM_MEMORY);
-		return;
-	case R600_QUERY_REQUESTED_GTT:
-		rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_GTT_MEMORY);
-		return;
-	case R600_QUERY_BUFFER_WAIT_TIME:
-		rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000;
-		return;
-	case R600_QUERY_NUM_CS_FLUSHES:
-		rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
-		return;
-	case R600_QUERY_NUM_BYTES_MOVED:
-		rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED);
-		return;
-	case R600_QUERY_VRAM_USAGE:
-		rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_VRAM_USAGE);
-		return;
-	case R600_QUERY_GTT_USAGE:
-		rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GTT_USAGE);
-		return;
-	case R600_QUERY_GPU_TEMPERATURE:
-		rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GPU_TEMPERATURE) / 1000;
-		return;
-	case R600_QUERY_CURRENT_GPU_SCLK:
-		rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_SCLK) * 1000000;
-		return;
-	case R600_QUERY_CURRENT_GPU_MCLK:
-		rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_MCLK) * 1000000;
-		return;
-	case R600_QUERY_GPU_LOAD:
-		rquery->end_result = r600_gpu_load_end(rctx->screen, rquery->begin_result);
-		return;
-	case R600_QUERY_NUM_COMPILATIONS:
-		rquery->end_result = p_atomic_read(&rctx->screen->num_compilations);
-		return;
-	case R600_QUERY_NUM_SHADERS_CREATED:
-		rquery->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
-		return;
-	}
-
 	r600_emit_query_end(rctx, rquery);
 
 	if (r600_query_needs_begin(rquery->type))
@@ -651,42 +706,9 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
 					    boolean wait,
 					    union pipe_query_result *result)
 {
-	struct pipe_screen *screen = ctx->b.screen;
 	unsigned results_base = 0;
 	char *map;
 
-	/* Non-GPU queries. */
-	switch (query->type) {
-	case PIPE_QUERY_TIMESTAMP_DISJOINT:
-		/* Convert from cycles per millisecond to cycles per second (Hz). */
-		result->timestamp_disjoint.frequency =
-			(uint64_t)ctx->screen->info.r600_clock_crystal_freq * 1000;
-		result->timestamp_disjoint.disjoint = FALSE;
-		return TRUE;
-	case PIPE_QUERY_GPU_FINISHED:
-		result->b = screen->fence_finish(screen, query->fence,
-					wait ? PIPE_TIMEOUT_INFINITE : 0);
-		return result->b;
-	case R600_QUERY_DRAW_CALLS:
-	case R600_QUERY_REQUESTED_VRAM:
-	case R600_QUERY_REQUESTED_GTT:
-	case R600_QUERY_BUFFER_WAIT_TIME:
-	case R600_QUERY_NUM_CS_FLUSHES:
-	case R600_QUERY_NUM_BYTES_MOVED:
-	case R600_QUERY_VRAM_USAGE:
-	case R600_QUERY_GTT_USAGE:
-	case R600_QUERY_GPU_TEMPERATURE:
-	case R600_QUERY_CURRENT_GPU_SCLK:
-	case R600_QUERY_CURRENT_GPU_MCLK:
-	case R600_QUERY_NUM_COMPILATIONS:
-	case R600_QUERY_NUM_SHADERS_CREATED:
-		result->u64 = query->end_result - query->begin_result;
-		return TRUE;
-	case R600_QUERY_GPU_LOAD:
-		result->u64 = query->end_result;
-		return TRUE;
-	}
-
 	map = r600_buffer_map_sync_with_rings(ctx, qbuf->buf,
 						PIPE_TRANSFER_READ |
 						(wait ? 0 : PIPE_TRANSFER_DONTBLOCK));

From 1d10b3d01e8af58f3c14bf39af8b592860ab36aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Wed, 18 Nov 2015 11:59:21 +0100
Subject: [PATCH 063/335] radeon: convert hardware queries to the new style
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move r600_query and r600_query_hw into the header because we will want to
reuse the buffer handling and suspend/resume logic outside of the common
radeon code.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
[Fixed a rebase conflict and re-tested before pushing.]
---
 src/gallium/drivers/radeon/r600_query.c | 281 +++++++++++-------------
 src/gallium/drivers/radeon/r600_query.h |  39 ++++
 2 files changed, 172 insertions(+), 148 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 928a55f6155..556077e860f 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -26,35 +26,6 @@
 #include "r600_cs.h"
 #include "util/u_memory.h"
 
-struct r600_query_buffer {
-	/* The buffer where query results are stored. */
-	struct r600_resource			*buf;
-	/* Offset of the next free result after current query data */
-	unsigned				results_end;
-	/* If a query buffer is full, a new buffer is created and the old one
-	 * is put in here. When we calculate the result, we sum up the samples
-	 * from all buffers. */
-	struct r600_query_buffer		*previous;
-};
-
-struct r600_query {
-	struct r600_query_ops *ops;
-
-	/* The query buffer and how many results are in it. */
-	struct r600_query_buffer		buffer;
-	/* The type of query */
-	unsigned				type;
-	/* Size of the result in memory for both begin_query and end_query,
-	 * this can be one or two numbers, or it could even be a size of a structure. */
-	unsigned				result_size;
-	/* The number of dwords for begin_query or end_query. */
-	unsigned				num_cs_dw;
-	/* linked list of queries */
-	struct list_head			list;
-	/* For transform feedback: which stream the query is for */
-	unsigned stream;
-};
-
 /* Queries without buffer handling or suspend/resume. */
 struct r600_query_sw {
 	struct r600_query b;
@@ -240,19 +211,23 @@ static struct pipe_query *r600_query_sw_create(struct pipe_context *ctx,
 	return (struct pipe_query *)query;
 }
 
-static void r600_do_destroy_query(struct r600_common_context *, struct r600_query *);
-static boolean r600_do_begin_query(struct r600_common_context *, struct r600_query *);
-static void r600_do_end_query(struct r600_common_context *, struct r600_query *);
-static boolean r600_do_get_query_result(struct r600_common_context *,
-					struct r600_query *, boolean wait,
-					union pipe_query_result *result);
+void r600_query_hw_destroy(struct r600_common_context *rctx,
+			   struct r600_query *rquery)
+{
+	struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+	struct r600_query_buffer *prev = query->buffer.previous;
 
-static struct r600_query_ops legacy_query_ops = {
-	.destroy = r600_do_destroy_query,
-	.begin = r600_do_begin_query,
-	.end = r600_do_end_query,
-	.get_result = r600_do_get_query_result,
-};
+	/* Release all query buffers. */
+	while (prev) {
+		struct r600_query_buffer *qbuf = prev;
+		prev = prev->previous;
+		pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
+		FREE(qbuf);
+	}
+
+	pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL);
+	FREE(rquery);
+}
 
 static bool r600_is_timer_query(unsigned type)
 {
@@ -313,6 +288,73 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
 	return buf;
 }
 
+static boolean r600_query_hw_begin(struct r600_common_context *, struct r600_query *);
+static void r600_query_hw_end(struct r600_common_context *, struct r600_query *);
+static boolean r600_query_hw_get_result(struct r600_common_context *,
+					struct r600_query *, boolean wait,
+					union pipe_query_result *result);
+
+static struct r600_query_ops query_hw_ops = {
+	.destroy = r600_query_hw_destroy,
+	.begin = r600_query_hw_begin,
+	.end = r600_query_hw_end,
+	.get_result = r600_query_hw_get_result,
+};
+
+static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
+					       unsigned query_type,
+					       unsigned index)
+{
+	struct r600_query_hw *query = CALLOC_STRUCT(r600_query_hw);
+	if (!query)
+		return NULL;
+
+	query->b.type = query_type;
+	query->b.ops = &query_hw_ops;
+
+	switch (query_type) {
+	case PIPE_QUERY_OCCLUSION_COUNTER:
+	case PIPE_QUERY_OCCLUSION_PREDICATE:
+		query->result_size = 16 * rctx->max_db;
+		query->num_cs_dw = 6;
+		break;
+	case PIPE_QUERY_TIME_ELAPSED:
+		query->result_size = 16;
+		query->num_cs_dw = 8;
+		break;
+	case PIPE_QUERY_TIMESTAMP:
+		query->result_size = 8;
+		query->num_cs_dw = 8;
+		break;
+	case PIPE_QUERY_PRIMITIVES_EMITTED:
+	case PIPE_QUERY_PRIMITIVES_GENERATED:
+	case PIPE_QUERY_SO_STATISTICS:
+	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+		/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
+		query->result_size = 32;
+		query->num_cs_dw = 6;
+		query->stream = index;
+		break;
+	case PIPE_QUERY_PIPELINE_STATISTICS:
+		/* 11 values on EG, 8 on R600. */
+		query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16;
+		query->num_cs_dw = 6;
+		break;
+	default:
+		assert(0);
+		FREE(query);
+		return NULL;
+	}
+
+	query->buffer.buf = r600_new_query_buffer(rctx, query_type);
+	if (!query->buffer.buf) {
+		FREE(query);
+		return NULL;
+	}
+
+	return (struct pipe_query *)query;
+}
+
 static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
 					      unsigned type, int diff)
 {
@@ -332,7 +374,7 @@ static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
 	}
 }
 
-static unsigned event_type_for_stream(struct r600_query *query)
+static unsigned event_type_for_stream(struct r600_query_hw *query)
 {
 	switch (query->stream) {
 	default:
@@ -343,20 +385,21 @@ static unsigned event_type_for_stream(struct r600_query *query)
 	}
 }
 
-static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_query *query)
+static void r600_emit_query_begin(struct r600_common_context *ctx,
+				  struct r600_query_hw *query)
 {
 	struct radeon_winsys_cs *cs = ctx->gfx.cs;
 	uint64_t va;
 
-	r600_update_occlusion_query_state(ctx, query->type, 1);
-	r600_update_prims_generated_query_state(ctx, query->type, 1);
+	r600_update_occlusion_query_state(ctx, query->b.type, 1);
+	r600_update_prims_generated_query_state(ctx, query->b.type, 1);
 	ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw * 2, TRUE);
 
 	/* Get a new query buffer if needed. */
 	if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
 		struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
 		*qbuf = query->buffer;
-		query->buffer.buf = r600_new_query_buffer(ctx, query->type);
+		query->buffer.buf = r600_new_query_buffer(ctx, query->b.type);
 		query->buffer.results_end = 0;
 		query->buffer.previous = qbuf;
 	}
@@ -364,7 +407,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
 	/* emit begin query */
 	va = query->buffer.buf->gpu_address + query->buffer.results_end;
 
-	switch (query->type) {
+	switch (query->b.type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
 	case PIPE_QUERY_OCCLUSION_PREDICATE:
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
@@ -401,26 +444,27 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
 	r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
 			RADEON_PRIO_QUERY);
 
-	if (r600_is_timer_query(query->type))
+	if (r600_is_timer_query(query->b.type))
 		ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw;
 	else
 		ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
 }
 
-static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_query *query)
+static void r600_emit_query_end(struct r600_common_context *ctx,
+				struct r600_query_hw *query)
 {
 	struct radeon_winsys_cs *cs = ctx->gfx.cs;
 	uint64_t va;
 
 	/* The queries which need begin already called this in begin_query. */
-	if (!r600_query_needs_begin(query->type)) {
+	if (!r600_query_needs_begin(query->b.type)) {
 		ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw, FALSE);
 	}
 
 	va = query->buffer.buf->gpu_address;
 
 	/* emit end query */
-	switch (query->type) {
+	switch (query->b.type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
 	case PIPE_QUERY_OCCLUSION_PREDICATE:
 		va += query->buffer.results_end + 8;
@@ -465,22 +509,22 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 
 	query->buffer.results_end += query->result_size;
 
-	if (r600_query_needs_begin(query->type)) {
-		if (r600_is_timer_query(query->type))
+	if (r600_query_needs_begin(query->b.type)) {
+		if (r600_is_timer_query(query->b.type))
 			ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw;
 		else
 			ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw;
 	}
 
-	r600_update_occlusion_query_state(ctx, query->type, -1);
-	r600_update_prims_generated_query_state(ctx, query->type, -1);
+	r600_update_occlusion_query_state(ctx, query->b.type, -1);
+	r600_update_prims_generated_query_state(ctx, query->b.type, -1);
 }
 
 static void r600_emit_query_predication(struct r600_common_context *ctx,
 					struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = ctx->gfx.cs;
-	struct r600_query *query = (struct r600_query*)ctx->render_cond;
+	struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond;
 	struct r600_query_buffer *qbuf;
 	uint32_t op;
 	bool flag_wait;
@@ -491,7 +535,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx,
 	flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
 		    ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
 
-	switch (query->type) {
+	switch (query->b.type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
 	case PIPE_QUERY_OCCLUSION_PREDICATE:
 		op = PRED_OP(PREDICATION_OP_ZPASS);
@@ -537,61 +581,13 @@ static void r600_emit_query_predication(struct r600_common_context *ctx,
 static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
 {
 	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
-	struct r600_query *query;
 
 	if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT ||
 	    query_type == PIPE_QUERY_GPU_FINISHED ||
 	    query_type >= PIPE_QUERY_DRIVER_SPECIFIC)
 		return r600_query_sw_create(ctx, query_type);
 
-	query = CALLOC_STRUCT(r600_query);
-	if (query == NULL)
-		return NULL;
-
-	query->type = query_type;
-	query->ops = &legacy_query_ops;
-
-	switch (query_type) {
-	case PIPE_QUERY_OCCLUSION_COUNTER:
-	case PIPE_QUERY_OCCLUSION_PREDICATE:
-		query->result_size = 16 * rctx->max_db;
-		query->num_cs_dw = 6;
-		break;
-	case PIPE_QUERY_TIME_ELAPSED:
-		query->result_size = 16;
-		query->num_cs_dw = 8;
-		break;
-	case PIPE_QUERY_TIMESTAMP:
-		query->result_size = 8;
-		query->num_cs_dw = 8;
-		break;
-	case PIPE_QUERY_PRIMITIVES_EMITTED:
-	case PIPE_QUERY_PRIMITIVES_GENERATED:
-	case PIPE_QUERY_SO_STATISTICS:
-	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-		/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
-		query->result_size = 32;
-		query->num_cs_dw = 6;
-		query->stream = index;
-		break;
-	case PIPE_QUERY_PIPELINE_STATISTICS:
-		/* 11 values on EG, 8 on R600. */
-		query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16;
-		query->num_cs_dw = 6;
-		break;
-	default:
-		assert(0);
-		FREE(query);
-		return NULL;
-	}
-
-	query->buffer.buf = r600_new_query_buffer(rctx, query_type);
-	if (!query->buffer.buf) {
-		FREE(query);
-		return NULL;
-	}
-
-	return (struct pipe_query*)query;
+	return r600_query_hw_create(rctx, query_type, index);
 }
 
 static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
@@ -602,23 +598,6 @@ static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *quer
 	rquery->ops->destroy(rctx, rquery);
 }
 
-static void r600_do_destroy_query(struct r600_common_context *rctx,
-				  struct r600_query *rquery)
-{
-	struct r600_query_buffer *prev = rquery->buffer.previous;
-
-	/* Release all query buffers. */
-	while (prev) {
-		struct r600_query_buffer *qbuf = prev;
-		prev = prev->previous;
-		pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
-		FREE(qbuf);
-	}
-
-	pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
-	FREE(rquery);
-}
-
 static boolean r600_begin_query(struct pipe_context *ctx,
                                 struct pipe_query *query)
 {
@@ -628,10 +607,11 @@ static boolean r600_begin_query(struct pipe_context *ctx,
 	return rquery->ops->begin(rctx, rquery);
 }
 
-static boolean r600_do_begin_query(struct r600_common_context *rctx,
+static boolean r600_query_hw_begin(struct r600_common_context *rctx,
 				   struct r600_query *rquery)
 {
-	struct r600_query_buffer *prev = rquery->buffer.previous;
+	struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+	struct r600_query_buffer *prev = query->buffer.previous;
 
 	if (!r600_query_needs_begin(rquery->type)) {
 		assert(0);
@@ -647,21 +627,21 @@ static boolean r600_do_begin_query(struct r600_common_context *rctx,
 	}
 
 	/* Obtain a new buffer if the current one can't be mapped without a stall. */
-	if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
-	    !rctx->ws->buffer_wait(rquery->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
-		pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
-		rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
+	if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
+	    !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
+		pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL);
+		query->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
 	}
 
-	rquery->buffer.results_end = 0;
-	rquery->buffer.previous = NULL;
+	query->buffer.results_end = 0;
+	query->buffer.previous = NULL;
 
-	r600_emit_query_begin(rctx, rquery);
+	r600_emit_query_begin(rctx, query);
 
 	if (r600_is_timer_query(rquery->type))
-		LIST_ADDTAIL(&rquery->list, &rctx->active_timer_queries);
+		LIST_ADDTAIL(&query->list, &rctx->active_timer_queries);
 	else
-		LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_queries);
+		LIST_ADDTAIL(&query->list, &rctx->active_nontimer_queries);
    return true;
 }
 
@@ -673,13 +653,15 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
 	rquery->ops->end(rctx, rquery);
 }
 
-static void r600_do_end_query(struct r600_common_context *rctx,
+static void r600_query_hw_end(struct r600_common_context *rctx,
 			      struct r600_query *rquery)
 {
-	r600_emit_query_end(rctx, rquery);
+	struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+
+	r600_emit_query_end(rctx, query);
 
 	if (r600_query_needs_begin(rquery->type))
-		LIST_DELINIT(&rquery->list);
+		LIST_DELINIT(&query->list);
 }
 
 static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
@@ -701,7 +683,7 @@ static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned
 }
 
 static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
-					    struct r600_query *query,
+					    struct r600_query_hw *query,
 					    struct r600_query_buffer *qbuf,
 					    boolean wait,
 					    union pipe_query_result *result)
@@ -716,7 +698,7 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
 		return FALSE;
 
 	/* count all results across all data blocks */
-	switch (query->type) {
+	switch (query->b.type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
 		while (results_base != qbuf->results_end) {
 			result->u64 +=
@@ -865,16 +847,17 @@ static boolean r600_get_query_result(struct pipe_context *ctx,
 	return rquery->ops->get_result(rctx, rquery, wait, result);
 }
 
-static boolean r600_do_get_query_result(struct r600_common_context *rctx,
+static boolean r600_query_hw_get_result(struct r600_common_context *rctx,
 					struct r600_query *rquery,
 					boolean wait, union pipe_query_result *result)
 {
+	struct r600_query_hw *query = (struct r600_query_hw *)rquery;
 	struct r600_query_buffer *qbuf;
 
 	util_query_clear_result(result, rquery->type);
 
-	for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) {
-		if (!r600_get_query_buffer_result(rctx, rquery, qbuf, wait, result)) {
+	for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
+		if (!r600_get_query_buffer_result(rctx, query, qbuf, wait, result)) {
 			return FALSE;
 		}
 	}
@@ -893,7 +876,7 @@ static void r600_render_condition(struct pipe_context *ctx,
 				  uint mode)
 {
 	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
-	struct r600_query *rquery = (struct r600_query*)query;
+	struct r600_query_hw *rquery = (struct r600_query_hw *)query;
 	struct r600_query_buffer *qbuf;
 	struct r600_atom *atom = &rctx->render_cond_atom;
 
@@ -903,8 +886,10 @@ static void r600_render_condition(struct pipe_context *ctx,
 
 	/* Compute the size of SET_PREDICATION packets. */
 	atom->num_dw = 0;
-	for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
-		atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
+	if (query) {
+		for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
+			atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
+	}
 
 	rctx->set_atom_dirty(rctx, atom, query != NULL);
 }
@@ -913,7 +898,7 @@ static void r600_suspend_queries(struct r600_common_context *ctx,
 				 struct list_head *query_list,
 				 unsigned *num_cs_dw_queries_suspend)
 {
-	struct r600_query *query;
+	struct r600_query_hw *query;
 
 	LIST_FOR_EACH_ENTRY(query, query_list, list) {
 		r600_emit_query_end(ctx, query);
@@ -936,7 +921,7 @@ void r600_suspend_timer_queries(struct r600_common_context *ctx)
 static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx,
 						    struct list_head *query_list)
 {
-	struct r600_query *query;
+	struct r600_query_hw *query;
 	unsigned num_dw = 0;
 
 	LIST_FOR_EACH_ENTRY(query, query_list, list) {
@@ -962,7 +947,7 @@ static void r600_resume_queries(struct r600_common_context *ctx,
 				struct list_head *query_list,
 				unsigned *num_cs_dw_queries_suspend)
 {
-	struct r600_query *query;
+	struct r600_query_hw *query;
 	unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, query_list);
 
 	assert(*num_cs_dw_queries_suspend == 0);
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index 6d568d6fd3b..baad5825d3b 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -29,9 +29,11 @@
 #define R600_QUERY_H
 
 #include "pipe/p_defines.h"
+#include "util/list.h"
 
 struct r600_common_context;
 struct r600_query;
+struct r600_resource;
 
 #define R600_QUERY_DRAW_CALLS		(PIPE_QUERY_DRIVER_SPECIFIC + 0)
 #define R600_QUERY_REQUESTED_VRAM	(PIPE_QUERY_DRIVER_SPECIFIC + 1)
@@ -58,4 +60,41 @@ struct r600_query_ops {
 			      union pipe_query_result *result);
 };
 
+struct r600_query {
+	struct r600_query_ops *ops;
+
+	/* The type of query */
+	unsigned type;
+};
+
+struct r600_query_buffer {
+	/* The buffer where query results are stored. */
+	struct r600_resource		*buf;
+	/* Offset of the next free result after current query data */
+	unsigned			results_end;
+	/* If a query buffer is full, a new buffer is created and the old one
+	 * is put in here. When we calculate the result, we sum up the samples
+	 * from all buffers. */
+	struct r600_query_buffer	*previous;
+};
+
+struct r600_query_hw {
+	struct r600_query b;
+
+	/* The query buffer and how many results are in it. */
+	struct r600_query_buffer buffer;
+	/* Size of the result in memory for both begin_query and end_query,
+	 * this can be one or two numbers, or it could even be a size of a structure. */
+	unsigned result_size;
+	/* The number of dwords for begin_query or end_query. */
+	unsigned num_cs_dw;
+	/* Linked list of queries */
+	struct list_head list;
+	/* For transform feedback: which stream the query is for */
+	unsigned stream;
+};
+
+void r600_query_hw_destroy(struct r600_common_context *rctx,
+			   struct r600_query *rquery);
+
 #endif /* R600_QUERY_H */

From c207c55fc08a1bf3dd40e79b3aaec34afbee2e55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Wed, 18 Nov 2015 12:05:11 +0100
Subject: [PATCH 064/335] radeon: split hw query buffer handling from cs emit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The idea here is that driver queries implemented outside of common code
will use the same query buffer handling with different logic for starting
and stopping the corresponding counters.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
[Fixed a rebase conflict and re-tested before pushing.]
---
 src/gallium/drivers/radeon/r600_query.c | 234 ++++++++++++++----------
 src/gallium/drivers/radeon/r600_query.h |  20 ++
 2 files changed, 153 insertions(+), 101 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 556077e860f..b40cfa1f03a 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -229,21 +229,10 @@ void r600_query_hw_destroy(struct r600_common_context *rctx,
 	FREE(rquery);
 }
 
-static bool r600_is_timer_query(unsigned type)
+static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx,
+						   struct r600_query_hw *query)
 {
-	return type == PIPE_QUERY_TIME_ELAPSED ||
-	       type == PIPE_QUERY_TIMESTAMP;
-}
-
-static bool r600_query_needs_begin(unsigned type)
-{
-	return type != PIPE_QUERY_TIMESTAMP;
-}
-
-static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx, unsigned type)
-{
-	unsigned j, i, num_results, buf_size = 4096;
-	uint32_t *results;
+	unsigned buf_size = 4096;
 
 	/* Queries are normally read by the CPU after
 	 * being written by the gpu, hence staging is probably a good
@@ -253,14 +242,34 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
 		pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM,
 				   PIPE_USAGE_STAGING, buf_size);
 
-	switch (type) {
-	case PIPE_QUERY_OCCLUSION_COUNTER:
-	case PIPE_QUERY_OCCLUSION_PREDICATE:
-		results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
-		memset(results, 0, buf_size);
+	if (query->ops->prepare_buffer)
+		query->ops->prepare_buffer(ctx, query, buf);
+
+	return buf;
+}
+
+static void r600_query_hw_prepare_buffer(struct r600_common_context *ctx,
+					 struct r600_query_hw *query,
+					 struct r600_resource *buffer)
+ {
+	uint32_t *results;
+
+	if (query->b.type == PIPE_QUERY_TIME_ELAPSED ||
+	    query->b.type == PIPE_QUERY_TIMESTAMP)
+		return;
+
+	results = r600_buffer_map_sync_with_rings(ctx, buffer,
+						  PIPE_TRANSFER_WRITE);
+
+	memset(results, 0, buffer->b.b.width0);
+
+	if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
+	    query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+		unsigned num_results;
+		unsigned i, j;
 
 		/* Set top bits for unused backends. */
-		num_results = buf_size / (16 * ctx->max_db);
+		num_results = buffer->b.b.width0 / (16 * ctx->max_db);
 		for (j = 0; j < num_results; j++) {
 			for (i = 0; i < ctx->max_db; i++) {
 				if (!(ctx->backend_mask & (1<<i))) {
@@ -270,22 +279,7 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
 			}
 			results += 4 * ctx->max_db;
 		}
-		break;
-	case PIPE_QUERY_TIME_ELAPSED:
-	case PIPE_QUERY_TIMESTAMP:
-		break;
-	case PIPE_QUERY_PRIMITIVES_EMITTED:
-	case PIPE_QUERY_PRIMITIVES_GENERATED:
-	case PIPE_QUERY_SO_STATISTICS:
-	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-	case PIPE_QUERY_PIPELINE_STATISTICS:
-		results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
-		memset(results, 0, buf_size);
-		break;
-	default:
-		assert(0);
 	}
-	return buf;
 }
 
 static boolean r600_query_hw_begin(struct r600_common_context *, struct r600_query *);
@@ -301,6 +295,21 @@ static struct r600_query_ops query_hw_ops = {
 	.get_result = r600_query_hw_get_result,
 };
 
+static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
+					struct r600_query_hw *query,
+					struct r600_resource *buffer,
+					uint64_t va);
+static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
+				       struct r600_query_hw *query,
+				       struct r600_resource *buffer,
+				       uint64_t va);
+
+static struct r600_query_hw_ops query_hw_default_hw_ops = {
+	.prepare_buffer = r600_query_hw_prepare_buffer,
+	.emit_start = r600_query_hw_do_emit_start,
+	.emit_stop = r600_query_hw_do_emit_stop,
+};
+
 static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
 					       unsigned query_type,
 					       unsigned index)
@@ -311,6 +320,7 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
 
 	query->b.type = query_type;
 	query->b.ops = &query_hw_ops;
+	query->ops = &query_hw_default_hw_ops;
 
 	switch (query_type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -321,10 +331,13 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
 	case PIPE_QUERY_TIME_ELAPSED:
 		query->result_size = 16;
 		query->num_cs_dw = 8;
+		query->flags = R600_QUERY_HW_FLAG_TIMER;
 		break;
 	case PIPE_QUERY_TIMESTAMP:
 		query->result_size = 8;
 		query->num_cs_dw = 8;
+		query->flags = R600_QUERY_HW_FLAG_TIMER |
+			       R600_QUERY_HW_FLAG_NO_START;
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -346,7 +359,7 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
 		return NULL;
 	}
 
-	query->buffer.buf = r600_new_query_buffer(rctx, query_type);
+	query->buffer.buf = r600_new_query_buffer(rctx, query);
 	if (!query->buffer.buf) {
 		FREE(query);
 		return NULL;
@@ -385,10 +398,54 @@ static unsigned event_type_for_stream(struct r600_query_hw *query)
 	}
 }
 
-static void r600_emit_query_begin(struct r600_common_context *ctx,
-				  struct r600_query_hw *query)
+static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
+					struct r600_query_hw *query,
+					struct r600_resource *buffer,
+					uint64_t va)
 {
 	struct radeon_winsys_cs *cs = ctx->gfx.cs;
+
+	switch (query->b.type) {
+	case PIPE_QUERY_OCCLUSION_COUNTER:
+	case PIPE_QUERY_OCCLUSION_PREDICATE:
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+		radeon_emit(cs, va);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
+		break;
+	case PIPE_QUERY_PRIMITIVES_EMITTED:
+	case PIPE_QUERY_PRIMITIVES_GENERATED:
+	case PIPE_QUERY_SO_STATISTICS:
+	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+		radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
+		radeon_emit(cs, va);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
+		break;
+	case PIPE_QUERY_TIME_ELAPSED:
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
+		radeon_emit(cs, va);
+		radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
+		radeon_emit(cs, 0);
+		radeon_emit(cs, 0);
+		break;
+	case PIPE_QUERY_PIPELINE_STATISTICS:
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
+		radeon_emit(cs, va);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
+		break;
+	default:
+		assert(0);
+	}
+	r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
+			RADEON_PRIO_QUERY);
+}
+
+static void r600_query_hw_emit_start(struct r600_common_context *ctx,
+				     struct r600_query_hw *query)
+{
 	uint64_t va;
 
 	r600_update_occlusion_query_state(ctx, query->b.type, 1);
@@ -399,7 +456,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx,
 	if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
 		struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
 		*qbuf = query->buffer;
-		query->buffer.buf = r600_new_query_buffer(ctx, query->b.type);
+		query->buffer.buf = r600_new_query_buffer(ctx, query);
 		query->buffer.results_end = 0;
 		query->buffer.previous = qbuf;
 	}
@@ -407,67 +464,26 @@ static void r600_emit_query_begin(struct r600_common_context *ctx,
 	/* emit begin query */
 	va = query->buffer.buf->gpu_address + query->buffer.results_end;
 
-	switch (query->b.type) {
-	case PIPE_QUERY_OCCLUSION_COUNTER:
-	case PIPE_QUERY_OCCLUSION_PREDICATE:
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
-		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
-		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32) & 0xFFFF);
-		break;
-	case PIPE_QUERY_PRIMITIVES_EMITTED:
-	case PIPE_QUERY_PRIMITIVES_GENERATED:
-	case PIPE_QUERY_SO_STATISTICS:
-	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
-		radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
-		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32) & 0xFFFF);
-		break;
-	case PIPE_QUERY_TIME_ELAPSED:
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
-		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
-		radeon_emit(cs, va);
-		radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
-		radeon_emit(cs, 0);
-		radeon_emit(cs, 0);
-		break;
-	case PIPE_QUERY_PIPELINE_STATISTICS:
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
-		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
-		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32) & 0xFFFF);
-		break;
-	default:
-		assert(0);
-	}
-	r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
-			RADEON_PRIO_QUERY);
+	query->ops->emit_start(ctx, query, query->buffer.buf, va);
 
-	if (r600_is_timer_query(query->b.type))
+	if (query->flags & R600_QUERY_HW_FLAG_TIMER)
 		ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw;
 	else
 		ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
+
 }
 
-static void r600_emit_query_end(struct r600_common_context *ctx,
-				struct r600_query_hw *query)
+static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
+				       struct r600_query_hw *query,
+				       struct r600_resource *buffer,
+				       uint64_t va)
 {
 	struct radeon_winsys_cs *cs = ctx->gfx.cs;
-	uint64_t va;
 
-	/* The queries which need begin already called this in begin_query. */
-	if (!r600_query_needs_begin(query->b.type)) {
-		ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw, FALSE);
-	}
-
-	va = query->buffer.buf->gpu_address;
-
-	/* emit end query */
 	switch (query->b.type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
 	case PIPE_QUERY_OCCLUSION_PREDICATE:
-		va += query->buffer.results_end + 8;
+		va += 8;
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
 		radeon_emit(cs, va);
@@ -477,14 +493,14 @@ static void r600_emit_query_end(struct r600_common_context *ctx,
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
 	case PIPE_QUERY_SO_STATISTICS:
 	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-		va += query->buffer.results_end + query->result_size/2;
+		va += query->result_size/2;
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
 		radeon_emit(cs, va);
 		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
-		va += query->buffer.results_end + query->result_size/2;
+		va += query->result_size/2;
 		/* fall through */
 	case PIPE_QUERY_TIMESTAMP:
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
@@ -495,7 +511,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx,
 		radeon_emit(cs, 0);
 		break;
 	case PIPE_QUERY_PIPELINE_STATISTICS:
-		va += query->buffer.results_end + query->result_size/2;
+		va += query->result_size/2;
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
 		radeon_emit(cs, va);
@@ -506,11 +522,27 @@ static void r600_emit_query_end(struct r600_common_context *ctx,
 	}
 	r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
 			RADEON_PRIO_QUERY);
+}
+
+static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
+				    struct r600_query_hw *query)
+{
+	uint64_t va;
+
+	/* The queries which need begin already called this in begin_query. */
+	if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
+		ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw, FALSE);
+	}
+
+	/* emit end query */
+	va = query->buffer.buf->gpu_address + query->buffer.results_end;
+
+	query->ops->emit_stop(ctx, query, query->buffer.buf, va);
 
 	query->buffer.results_end += query->result_size;
 
-	if (r600_query_needs_begin(query->b.type)) {
-		if (r600_is_timer_query(query->b.type))
+	if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) {
+		if (query->flags & R600_QUERY_HW_FLAG_TIMER)
 			ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw;
 		else
 			ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw;
@@ -613,7 +645,7 @@ static boolean r600_query_hw_begin(struct r600_common_context *rctx,
 	struct r600_query_hw *query = (struct r600_query_hw *)rquery;
 	struct r600_query_buffer *prev = query->buffer.previous;
 
-	if (!r600_query_needs_begin(rquery->type)) {
+	if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
 		assert(0);
 		return false;
 	}
@@ -630,15 +662,15 @@ static boolean r600_query_hw_begin(struct r600_common_context *rctx,
 	if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
 	    !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
 		pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL);
-		query->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
+		query->buffer.buf = r600_new_query_buffer(rctx, query);
 	}
 
 	query->buffer.results_end = 0;
 	query->buffer.previous = NULL;
 
-	r600_emit_query_begin(rctx, query);
+	r600_query_hw_emit_start(rctx, query);
 
-	if (r600_is_timer_query(rquery->type))
+	if (query->flags & R600_QUERY_HW_FLAG_TIMER)
 		LIST_ADDTAIL(&query->list, &rctx->active_timer_queries);
 	else
 		LIST_ADDTAIL(&query->list, &rctx->active_nontimer_queries);
@@ -658,9 +690,9 @@ static void r600_query_hw_end(struct r600_common_context *rctx,
 {
 	struct r600_query_hw *query = (struct r600_query_hw *)rquery;
 
-	r600_emit_query_end(rctx, query);
+	r600_query_hw_emit_stop(rctx, query);
 
-	if (r600_query_needs_begin(rquery->type))
+	if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))
 		LIST_DELINIT(&query->list);
 }
 
@@ -901,7 +933,7 @@ static void r600_suspend_queries(struct r600_common_context *ctx,
 	struct r600_query_hw *query;
 
 	LIST_FOR_EACH_ENTRY(query, query_list, list) {
-		r600_emit_query_end(ctx, query);
+		r600_query_hw_emit_stop(ctx, query);
 	}
 	assert(*num_cs_dw_queries_suspend == 0);
 }
@@ -956,7 +988,7 @@ static void r600_resume_queries(struct r600_common_context *ctx,
 	ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, TRUE);
 
 	LIST_FOR_EACH_ENTRY(query, query_list, list) {
-		r600_emit_query_begin(ctx, query);
+		r600_query_hw_emit_start(ctx, query);
 	}
 }
 
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index baad5825d3b..c5b720bbc91 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -33,6 +33,7 @@
 
 struct r600_common_context;
 struct r600_query;
+struct r600_query_hw;
 struct r600_resource;
 
 #define R600_QUERY_DRAW_CALLS		(PIPE_QUERY_DRIVER_SPECIFIC + 0)
@@ -67,6 +68,23 @@ struct r600_query {
 	unsigned type;
 };
 
+enum {
+	R600_QUERY_HW_FLAG_NO_START = (1 << 0),
+	R600_QUERY_HW_FLAG_TIMER = (1 << 1),
+};
+
+struct r600_query_hw_ops {
+	void (*prepare_buffer)(struct r600_common_context *,
+			       struct r600_query_hw *,
+			       struct r600_resource *);
+	void (*emit_start)(struct r600_common_context *,
+			   struct r600_query_hw *,
+			   struct r600_resource *buffer, uint64_t va);
+	void (*emit_stop)(struct r600_common_context *,
+			  struct r600_query_hw *,
+			  struct r600_resource *buffer, uint64_t va);
+};
+
 struct r600_query_buffer {
 	/* The buffer where query results are stored. */
 	struct r600_resource		*buf;
@@ -80,6 +98,8 @@ struct r600_query_buffer {
 
 struct r600_query_hw {
 	struct r600_query b;
+	struct r600_query_hw_ops *ops;
+	unsigned flags;
 
 	/* The query buffer and how many results are in it. */
 	struct r600_query_buffer buffer;

From 50f0f938e3a577647fdfb6bdbb4ad3da252aa791 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Fri, 13 Nov 2015 00:27:34 +0100
Subject: [PATCH 065/335] radeon: implement r600_query_hw_get_result via
 function pointers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We will need the clear_result override for the batch query implementation.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/drivers/radeon/r600_query.c | 189 +++++++++++-------------
 src/gallium/drivers/radeon/r600_query.h |   4 +
 2 files changed, 94 insertions(+), 99 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index b40cfa1f03a..603a0d0d5e1 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -303,11 +303,18 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
 				       struct r600_query_hw *query,
 				       struct r600_resource *buffer,
 				       uint64_t va);
+static void r600_query_hw_add_result(struct r600_common_context *ctx,
+				     struct r600_query_hw *, void *buffer,
+				     union pipe_query_result *result);
+static void r600_query_hw_clear_result(struct r600_query_hw *,
+				       union pipe_query_result *);
 
 static struct r600_query_hw_ops query_hw_default_hw_ops = {
 	.prepare_buffer = r600_query_hw_prepare_buffer,
 	.emit_start = r600_query_hw_do_emit_start,
 	.emit_stop = r600_query_hw_do_emit_stop,
+	.clear_result = r600_query_hw_clear_result,
+	.add_result = r600_query_hw_add_result,
 };
 
 static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
@@ -696,7 +703,7 @@ static void r600_query_hw_end(struct r600_common_context *rctx,
 		LIST_DELINIT(&query->list);
 }
 
-static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
+static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index,
 				       bool test_status_bit)
 {
 	uint32_t *current_result = (uint32_t*)map;
@@ -714,47 +721,36 @@ static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned
 	return 0;
 }
 
-static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
-					    struct r600_query_hw *query,
-					    struct r600_query_buffer *qbuf,
-					    boolean wait,
-					    union pipe_query_result *result)
+static void r600_query_hw_add_result(struct r600_common_context *ctx,
+				     struct r600_query_hw *query,
+				     void *buffer,
+				     union pipe_query_result *result)
 {
-	unsigned results_base = 0;
-	char *map;
-
-	map = r600_buffer_map_sync_with_rings(ctx, qbuf->buf,
-						PIPE_TRANSFER_READ |
-						(wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
-	if (!map)
-		return FALSE;
-
-	/* count all results across all data blocks */
 	switch (query->b.type) {
-	case PIPE_QUERY_OCCLUSION_COUNTER:
-		while (results_base != qbuf->results_end) {
+	case PIPE_QUERY_OCCLUSION_COUNTER: {
+		unsigned results_base = 0;
+		while (results_base != query->result_size) {
 			result->u64 +=
-				r600_query_read_result(map + results_base, 0, 2, true);
+				r600_query_read_result(buffer + results_base, 0, 2, true);
 			results_base += 16;
 		}
 		break;
-	case PIPE_QUERY_OCCLUSION_PREDICATE:
-		while (results_base != qbuf->results_end) {
+	}
+	case PIPE_QUERY_OCCLUSION_PREDICATE: {
+		unsigned results_base = 0;
+		while (results_base != query->result_size) {
 			result->b = result->b ||
-				r600_query_read_result(map + results_base, 0, 2, true) != 0;
+				r600_query_read_result(buffer + results_base, 0, 2, true) != 0;
 			results_base += 16;
 		}
 		break;
+	}
 	case PIPE_QUERY_TIME_ELAPSED:
-		while (results_base != qbuf->results_end) {
-			result->u64 +=
-				r600_query_read_result(map + results_base, 0, 2, false);
-			results_base += query->result_size;
-		}
+		result->u64 += r600_query_read_result(buffer, 0, 2, false);
 		break;
 	case PIPE_QUERY_TIMESTAMP:
 	{
-		uint32_t *current_result = (uint32_t*)map;
+		uint32_t *current_result = (uint32_t*)buffer;
 		result->u64 = (uint64_t)current_result[0] |
 			      (uint64_t)current_result[1] << 32;
 		break;
@@ -766,84 +762,64 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
 		 *    u64 PrimitiveStorageNeeded;
 		 * }
 		 * We only need NumPrimitivesWritten here. */
-		while (results_base != qbuf->results_end) {
-			result->u64 +=
-				r600_query_read_result(map + results_base, 2, 6, true);
-			results_base += query->result_size;
-		}
+		result->u64 += r600_query_read_result(buffer, 2, 6, true);
 		break;
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
 		/* Here we read PrimitiveStorageNeeded. */
-		while (results_base != qbuf->results_end) {
-			result->u64 +=
-				r600_query_read_result(map + results_base, 0, 4, true);
-			results_base += query->result_size;
-		}
+		result->u64 += r600_query_read_result(buffer, 0, 4, true);
 		break;
 	case PIPE_QUERY_SO_STATISTICS:
-		while (results_base != qbuf->results_end) {
-			result->so_statistics.num_primitives_written +=
-				r600_query_read_result(map + results_base, 2, 6, true);
-			result->so_statistics.primitives_storage_needed +=
-				r600_query_read_result(map + results_base, 0, 4, true);
-			results_base += query->result_size;
-		}
+		result->so_statistics.num_primitives_written +=
+			r600_query_read_result(buffer, 2, 6, true);
+		result->so_statistics.primitives_storage_needed +=
+			r600_query_read_result(buffer, 0, 4, true);
 		break;
 	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-		while (results_base != qbuf->results_end) {
-			result->b = result->b ||
-				r600_query_read_result(map + results_base, 2, 6, true) !=
-				r600_query_read_result(map + results_base, 0, 4, true);
-			results_base += query->result_size;
-		}
+		result->b = result->b ||
+			r600_query_read_result(buffer, 2, 6, true) !=
+			r600_query_read_result(buffer, 0, 4, true);
 		break;
 	case PIPE_QUERY_PIPELINE_STATISTICS:
 		if (ctx->chip_class >= EVERGREEN) {
-			while (results_base != qbuf->results_end) {
-				result->pipeline_statistics.ps_invocations +=
-					r600_query_read_result(map + results_base, 0, 22, false);
-				result->pipeline_statistics.c_primitives +=
-					r600_query_read_result(map + results_base, 2, 24, false);
-				result->pipeline_statistics.c_invocations +=
-					r600_query_read_result(map + results_base, 4, 26, false);
-				result->pipeline_statistics.vs_invocations +=
-					r600_query_read_result(map + results_base, 6, 28, false);
-				result->pipeline_statistics.gs_invocations +=
-					r600_query_read_result(map + results_base, 8, 30, false);
-				result->pipeline_statistics.gs_primitives +=
-					r600_query_read_result(map + results_base, 10, 32, false);
-				result->pipeline_statistics.ia_primitives +=
-					r600_query_read_result(map + results_base, 12, 34, false);
-				result->pipeline_statistics.ia_vertices +=
-					r600_query_read_result(map + results_base, 14, 36, false);
-				result->pipeline_statistics.hs_invocations +=
-					r600_query_read_result(map + results_base, 16, 38, false);
-				result->pipeline_statistics.ds_invocations +=
-					r600_query_read_result(map + results_base, 18, 40, false);
-				result->pipeline_statistics.cs_invocations +=
-					r600_query_read_result(map + results_base, 20, 42, false);
-				results_base += query->result_size;
-			}
+			result->pipeline_statistics.ps_invocations +=
+				r600_query_read_result(buffer, 0, 22, false);
+			result->pipeline_statistics.c_primitives +=
+				r600_query_read_result(buffer, 2, 24, false);
+			result->pipeline_statistics.c_invocations +=
+				r600_query_read_result(buffer, 4, 26, false);
+			result->pipeline_statistics.vs_invocations +=
+				r600_query_read_result(buffer, 6, 28, false);
+			result->pipeline_statistics.gs_invocations +=
+				r600_query_read_result(buffer, 8, 30, false);
+			result->pipeline_statistics.gs_primitives +=
+				r600_query_read_result(buffer, 10, 32, false);
+			result->pipeline_statistics.ia_primitives +=
+				r600_query_read_result(buffer, 12, 34, false);
+			result->pipeline_statistics.ia_vertices +=
+				r600_query_read_result(buffer, 14, 36, false);
+			result->pipeline_statistics.hs_invocations +=
+				r600_query_read_result(buffer, 16, 38, false);
+			result->pipeline_statistics.ds_invocations +=
+				r600_query_read_result(buffer, 18, 40, false);
+			result->pipeline_statistics.cs_invocations +=
+				r600_query_read_result(buffer, 20, 42, false);
 		} else {
-			while (results_base != qbuf->results_end) {
-				result->pipeline_statistics.ps_invocations +=
-					r600_query_read_result(map + results_base, 0, 16, false);
-				result->pipeline_statistics.c_primitives +=
-					r600_query_read_result(map + results_base, 2, 18, false);
-				result->pipeline_statistics.c_invocations +=
-					r600_query_read_result(map + results_base, 4, 20, false);
-				result->pipeline_statistics.vs_invocations +=
-					r600_query_read_result(map + results_base, 6, 22, false);
-				result->pipeline_statistics.gs_invocations +=
-					r600_query_read_result(map + results_base, 8, 24, false);
-				result->pipeline_statistics.gs_primitives +=
-					r600_query_read_result(map + results_base, 10, 26, false);
-				result->pipeline_statistics.ia_primitives +=
-					r600_query_read_result(map + results_base, 12, 28, false);
-				result->pipeline_statistics.ia_vertices +=
-					r600_query_read_result(map + results_base, 14, 30, false);
-				results_base += query->result_size;
-			}
+			result->pipeline_statistics.ps_invocations +=
+				r600_query_read_result(buffer, 0, 16, false);
+			result->pipeline_statistics.c_primitives +=
+				r600_query_read_result(buffer, 2, 18, false);
+			result->pipeline_statistics.c_invocations +=
+				r600_query_read_result(buffer, 4, 20, false);
+			result->pipeline_statistics.vs_invocations +=
+				r600_query_read_result(buffer, 6, 22, false);
+			result->pipeline_statistics.gs_invocations +=
+				r600_query_read_result(buffer, 8, 24, false);
+			result->pipeline_statistics.gs_primitives +=
+				r600_query_read_result(buffer, 10, 26, false);
+			result->pipeline_statistics.ia_primitives +=
+				r600_query_read_result(buffer, 12, 28, false);
+			result->pipeline_statistics.ia_vertices +=
+				r600_query_read_result(buffer, 14, 30, false);
 		}
 #if 0 /* for testing */
 		printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
@@ -865,8 +841,6 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
 	default:
 		assert(0);
 	}
-
-	return TRUE;
 }
 
 static boolean r600_get_query_result(struct pipe_context *ctx,
@@ -879,6 +853,12 @@ static boolean r600_get_query_result(struct pipe_context *ctx,
 	return rquery->ops->get_result(rctx, rquery, wait, result);
 }
 
+static void r600_query_hw_clear_result(struct r600_query_hw *query,
+				       union pipe_query_result *result)
+{
+	util_query_clear_result(result, query->b.type);
+}
+
 static boolean r600_query_hw_get_result(struct r600_common_context *rctx,
 					struct r600_query *rquery,
 					boolean wait, union pipe_query_result *result)
@@ -886,11 +866,22 @@ static boolean r600_query_hw_get_result(struct r600_common_context *rctx,
 	struct r600_query_hw *query = (struct r600_query_hw *)rquery;
 	struct r600_query_buffer *qbuf;
 
-	util_query_clear_result(result, rquery->type);
+	query->ops->clear_result(query, result);
 
 	for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
-		if (!r600_get_query_buffer_result(rctx, query, qbuf, wait, result)) {
+		unsigned results_base = 0;
+		void *map;
+
+		map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf,
+						      PIPE_TRANSFER_READ |
+						      (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
+		if (!map)
 			return FALSE;
+
+		while (results_base != qbuf->results_end) {
+			query->ops->add_result(rctx, query, map + results_base,
+					       result);
+			results_base += query->result_size;
 		}
 	}
 
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index c5b720bbc91..f6dc74aa86d 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -83,6 +83,10 @@ struct r600_query_hw_ops {
 	void (*emit_stop)(struct r600_common_context *,
 			  struct r600_query_hw *,
 			  struct r600_resource *buffer, uint64_t va);
+	void (*clear_result)(struct r600_query_hw *, union pipe_query_result *);
+	void (*add_result)(struct r600_common_context *ctx,
+			   struct r600_query_hw *, void *buffer,
+			   union pipe_query_result *result);
 };
 
 struct r600_query_buffer {

From ffd01b7781eebd207c16a9df5ea2858f6f5be544 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Wed, 18 Nov 2015 12:06:58 +0100
Subject: [PATCH 066/335] radeon: expose r600_query_hw functions for reuse
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
[Fixed a rebase conflict and re-tested before pushing.]
---
 src/gallium/drivers/radeon/r600_query.c | 31 ++++++++++++++-----------
 src/gallium/drivers/radeon/r600_query.h | 10 ++++++++
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 603a0d0d5e1..079e02ee90f 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -282,12 +282,6 @@ static void r600_query_hw_prepare_buffer(struct r600_common_context *ctx,
 	}
 }
 
-static boolean r600_query_hw_begin(struct r600_common_context *, struct r600_query *);
-static void r600_query_hw_end(struct r600_common_context *, struct r600_query *);
-static boolean r600_query_hw_get_result(struct r600_common_context *,
-					struct r600_query *, boolean wait,
-					union pipe_query_result *result);
-
 static struct r600_query_ops query_hw_ops = {
 	.destroy = r600_query_hw_destroy,
 	.begin = r600_query_hw_begin,
@@ -317,6 +311,16 @@ static struct r600_query_hw_ops query_hw_default_hw_ops = {
 	.add_result = r600_query_hw_add_result,
 };
 
+boolean r600_query_hw_init(struct r600_common_context *rctx,
+			   struct r600_query_hw *query)
+{
+	query->buffer.buf = r600_new_query_buffer(rctx, query);
+	if (!query->buffer.buf)
+		return FALSE;
+
+	return TRUE;
+}
+
 static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
 					       unsigned query_type,
 					       unsigned index)
@@ -366,8 +370,7 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
 		return NULL;
 	}
 
-	query->buffer.buf = r600_new_query_buffer(rctx, query);
-	if (!query->buffer.buf) {
+	if (!r600_query_hw_init(rctx, query)) {
 		FREE(query);
 		return NULL;
 	}
@@ -646,8 +649,8 @@ static boolean r600_begin_query(struct pipe_context *ctx,
 	return rquery->ops->begin(rctx, rquery);
 }
 
-static boolean r600_query_hw_begin(struct r600_common_context *rctx,
-				   struct r600_query *rquery)
+boolean r600_query_hw_begin(struct r600_common_context *rctx,
+			    struct r600_query *rquery)
 {
 	struct r600_query_hw *query = (struct r600_query_hw *)rquery;
 	struct r600_query_buffer *prev = query->buffer.previous;
@@ -692,7 +695,7 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
 	rquery->ops->end(rctx, rquery);
 }
 
-static void r600_query_hw_end(struct r600_common_context *rctx,
+void r600_query_hw_end(struct r600_common_context *rctx,
 			      struct r600_query *rquery)
 {
 	struct r600_query_hw *query = (struct r600_query_hw *)rquery;
@@ -859,9 +862,9 @@ static void r600_query_hw_clear_result(struct r600_query_hw *query,
 	util_query_clear_result(result, query->b.type);
 }
 
-static boolean r600_query_hw_get_result(struct r600_common_context *rctx,
-					struct r600_query *rquery,
-					boolean wait, union pipe_query_result *result)
+boolean r600_query_hw_get_result(struct r600_common_context *rctx,
+				 struct r600_query *rquery,
+				 boolean wait, union pipe_query_result *result)
 {
 	struct r600_query_hw *query = (struct r600_query_hw *)rquery;
 	struct r600_query_buffer *qbuf;
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index f6dc74aa86d..29c635ea825 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -118,7 +118,17 @@ struct r600_query_hw {
 	unsigned stream;
 };
 
+boolean r600_query_hw_init(struct r600_common_context *rctx,
+			   struct r600_query_hw *query);
 void r600_query_hw_destroy(struct r600_common_context *rctx,
 			   struct r600_query *rquery);
+boolean r600_query_hw_begin(struct r600_common_context *rctx,
+			    struct r600_query *rquery);
+void r600_query_hw_end(struct r600_common_context *rctx,
+		       struct r600_query *rquery);
+boolean r600_query_hw_get_result(struct r600_common_context *rctx,
+				 struct r600_query *rquery,
+				 boolean wait,
+				 union pipe_query_result *result);
 
 #endif /* R600_QUERY_H */

From 27ce75ed12c814b0d4b96c32f6e89ba308910e26 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Fri, 13 Nov 2015 00:38:36 +0100
Subject: [PATCH 067/335] radeon: count cs dwords separately for query begin
 and end
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This will be important for perfcounter queries.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/drivers/radeon/r600_query.c | 33 ++++++++++++++-----------
 src/gallium/drivers/radeon/r600_query.h |  3 ++-
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 079e02ee90f..2797bcb76b7 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -337,16 +337,18 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
 	case PIPE_QUERY_OCCLUSION_COUNTER:
 	case PIPE_QUERY_OCCLUSION_PREDICATE:
 		query->result_size = 16 * rctx->max_db;
-		query->num_cs_dw = 6;
+		query->num_cs_dw_begin = 6;
+		query->num_cs_dw_end = 6;
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		query->result_size = 16;
-		query->num_cs_dw = 8;
+		query->num_cs_dw_begin = 8;
+		query->num_cs_dw_end = 8;
 		query->flags = R600_QUERY_HW_FLAG_TIMER;
 		break;
 	case PIPE_QUERY_TIMESTAMP:
 		query->result_size = 8;
-		query->num_cs_dw = 8;
+		query->num_cs_dw_end = 8;
 		query->flags = R600_QUERY_HW_FLAG_TIMER |
 			       R600_QUERY_HW_FLAG_NO_START;
 		break;
@@ -356,13 +358,15 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
 	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 		/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
 		query->result_size = 32;
-		query->num_cs_dw = 6;
+		query->num_cs_dw_begin = 6;
+		query->num_cs_dw_end = 6;
 		query->stream = index;
 		break;
 	case PIPE_QUERY_PIPELINE_STATISTICS:
 		/* 11 values on EG, 8 on R600. */
 		query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16;
-		query->num_cs_dw = 6;
+		query->num_cs_dw_begin = 6;
+		query->num_cs_dw_end = 6;
 		break;
 	default:
 		assert(0);
@@ -460,7 +464,9 @@ static void r600_query_hw_emit_start(struct r600_common_context *ctx,
 
 	r600_update_occlusion_query_state(ctx, query->b.type, 1);
 	r600_update_prims_generated_query_state(ctx, query->b.type, 1);
-	ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw * 2, TRUE);
+
+	ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end,
+			       TRUE);
 
 	/* Get a new query buffer if needed. */
 	if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
@@ -477,10 +483,9 @@ static void r600_query_hw_emit_start(struct r600_common_context *ctx,
 	query->ops->emit_start(ctx, query, query->buffer.buf, va);
 
 	if (query->flags & R600_QUERY_HW_FLAG_TIMER)
-		ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw;
+		ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw_end;
 	else
-		ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
-
+		ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw_end;
 }
 
 static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
@@ -541,7 +546,7 @@ static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
 
 	/* The queries which need begin already called this in begin_query. */
 	if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
-		ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw, FALSE);
+		ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_end, FALSE);
 	}
 
 	/* emit end query */
@@ -553,9 +558,9 @@ static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
 
 	if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) {
 		if (query->flags & R600_QUERY_HW_FLAG_TIMER)
-			ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw;
+			ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw_end;
 		else
-			ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw;
+			ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw_end;
 	}
 
 	r600_update_occlusion_query_state(ctx, query->b.type, -1);
@@ -952,14 +957,14 @@ static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *
 
 	LIST_FOR_EACH_ENTRY(query, query_list, list) {
 		/* begin + end */
-		num_dw += query->num_cs_dw * 2;
+		num_dw += query->num_cs_dw_begin + query->num_cs_dw_end;
 
 		/* Workaround for the fact that
 		 * num_cs_dw_nontimer_queries_suspend is incremented for every
 		 * resumed query, which raises the bar in need_cs_space for
 		 * queries about to be resumed.
 		 */
-		num_dw += query->num_cs_dw;
+		num_dw += query->num_cs_dw_end;
 	}
 	/* primitives generated query */
 	num_dw += ctx->streamout.enable_atom.num_dw;
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index 29c635ea825..9bd3b5d5a02 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -111,7 +111,8 @@ struct r600_query_hw {
 	 * this can be one or two numbers, or it could even be a size of a structure. */
 	unsigned result_size;
 	/* The number of dwords for begin_query or end_query. */
-	unsigned num_cs_dw;
+	unsigned num_cs_dw_begin;
+	unsigned num_cs_dw_end;
 	/* Linked list of queries */
 	struct list_head list;
 	/* For transform feedback: which stream the query is for */

From 1a90e3e1e3ee70504f2ce57462fb592b448269be Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 16 Nov 2015 10:31:46 -0700
Subject: [PATCH 068/335] svga: add/use new svga_sampler_format() function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is important for the case of sampling from a depth texture.  In
that case, we need to sample the texture as if it were a single-channel
color texture.  For other/color formats, we can use the format as-is.

Reviewed-by: Charmaine Lee <charmainel@vmware.com>
Reviewed-by: José Fonseca <jfonseca@vmware.com>
---
 src/gallium/drivers/svga/svga_format.c        | 23 +++++++++++++++++++
 src/gallium/drivers/svga/svga_format.h        |  4 ++++
 src/gallium/drivers/svga/svga_state_sampler.c |  3 +++
 3 files changed, 30 insertions(+)

diff --git a/src/gallium/drivers/svga/svga_format.c b/src/gallium/drivers/svga/svga_format.c
index 28b8064bf70..41bddd18a84 100644
--- a/src/gallium/drivers/svga/svga_format.c
+++ b/src/gallium/drivers/svga/svga_format.c
@@ -2098,3 +2098,26 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
       return format;
    }
 }
+
+
+/**
+ * Given a surface format, return the corresponding format to use for
+ * a texture sampler.  In most cases, it's the format unchanged, but there
+ * are some special cases.
+ */
+SVGA3dSurfaceFormat
+svga_sampler_format(SVGA3dSurfaceFormat format)
+{
+   switch (format) {
+   case SVGA3D_D16_UNORM:
+      return SVGA3D_R16_UNORM;
+   case SVGA3D_D24_UNORM_S8_UINT:
+      return SVGA3D_R24_UNORM_X8_TYPELESS;
+   case SVGA3D_D32_FLOAT:
+      return SVGA3D_R32_FLOAT;
+   case SVGA3D_D32_FLOAT_S8X24_UINT:
+      return SVGA3D_R32_FLOAT_X8X24_TYPELESS;
+   default:
+      return format;
+   }
+}
diff --git a/src/gallium/drivers/svga/svga_format.h b/src/gallium/drivers/svga/svga_format.h
index 0af218cb01a..9f9a530d473 100644
--- a/src/gallium/drivers/svga/svga_format.h
+++ b/src/gallium/drivers/svga/svga_format.h
@@ -93,4 +93,8 @@ SVGA3dSurfaceFormat
 svga_typeless_format(SVGA3dSurfaceFormat format);
 
 
+SVGA3dSurfaceFormat
+svga_sampler_format(SVGA3dSurfaceFormat format);
+
+
 #endif /* SVGA_FORMAT_H_ */
diff --git a/src/gallium/drivers/svga/svga_state_sampler.c b/src/gallium/drivers/svga/svga_state_sampler.c
index 611d2c6102f..c5d52bbfd14 100644
--- a/src/gallium/drivers/svga/svga_state_sampler.c
+++ b/src/gallium/drivers/svga/svga_state_sampler.c
@@ -108,6 +108,9 @@ svga_validate_pipe_sampler_view(struct svga_context *svga,
                                      PIPE_BIND_SAMPLER_VIEW);
       assert(format != SVGA3D_FORMAT_INVALID);
 
+      /* Convert the format to a sampler-friendly format, if needed */
+      format = svga_sampler_format(format);
+
       if (texture->target == PIPE_BUFFER) {
          viewDesc.buffer.firstElement = sv->base.u.buf.first_element;
          viewDesc.buffer.numElements = (sv->base.u.buf.last_element -

From 1a48326a84e3a5df0c3e6c2c3bd59992df5097ab Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 16 Nov 2015 10:41:20 -0700
Subject: [PATCH 069/335] svga: use more VGPU10 formats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We always want to prefer the VGPU10 formats over the VGPU9 ones when
we have VGPU10 support.

Original patch by Jose and updated by Brian.

Reviewed-by: Charmaine Lee <charmainel@vmware.com>
Reviewed-by: José Fonseca <jfonseca@vmware.com>
---
 src/gallium/drivers/svga/svga_format.c | 97 ++++++++++++++++++--------
 1 file changed, 67 insertions(+), 30 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_format.c b/src/gallium/drivers/svga/svga_format.c
index 41bddd18a84..0104e8a273a 100644
--- a/src/gallium/drivers/svga/svga_format.c
+++ b/src/gallium/drivers/svga/svga_format.c
@@ -53,17 +53,17 @@ static const struct vgpu10_format_entry format_conversion_table[] =
    { PIPE_FORMAT_A8R8G8B8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
    { PIPE_FORMAT_X8R8G8B8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
    { PIPE_FORMAT_B5G5R5A1_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_B5G5R5A1_UNORM,       0 },
-   { PIPE_FORMAT_B4G4R4A4_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_A4R4G4B4,             0 },
+   { PIPE_FORMAT_B4G4R4A4_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
    { PIPE_FORMAT_B5G6R5_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_B5G6R5_UNORM,         0 },
    { PIPE_FORMAT_R10G10B10A2_UNORM,     SVGA3D_R10G10B10A2_UNORM,   SVGA3D_R10G10B10A2_UNORM,    0 },
-   { PIPE_FORMAT_L8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_LUMINANCE8,           0 },
+   { PIPE_FORMAT_L8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
    { PIPE_FORMAT_A8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_A8_UNORM,             0 },
    { PIPE_FORMAT_I8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
    { PIPE_FORMAT_L8A8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
    { PIPE_FORMAT_L16_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
    { PIPE_FORMAT_UYVY,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
    { PIPE_FORMAT_YUYV,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
-   { PIPE_FORMAT_Z16_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_Z_D16,                0 },
+   { PIPE_FORMAT_Z16_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_D16_UNORM,            0 },
    { PIPE_FORMAT_Z32_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
    { PIPE_FORMAT_Z32_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_D32_FLOAT,            0 },
    { PIPE_FORMAT_Z24_UNORM_S8_UINT,     SVGA3D_FORMAT_INVALID,      SVGA3D_D24_UNORM_S8_UINT,    0 },
@@ -152,14 +152,14 @@ static const struct vgpu10_format_entry format_conversion_table[] =
    { PIPE_FORMAT_A8R8G8B8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
    { PIPE_FORMAT_X8R8G8B8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
    { PIPE_FORMAT_R8G8B8A8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_R8G8B8A8_UNORM_SRGB,  0 },
-   { PIPE_FORMAT_DXT1_RGB,              SVGA3D_FORMAT_INVALID,      SVGA3D_DXT1,                 0 },
-   { PIPE_FORMAT_DXT1_RGBA,             SVGA3D_FORMAT_INVALID,      SVGA3D_DXT1,                 0 },
-   { PIPE_FORMAT_DXT3_RGBA,             SVGA3D_FORMAT_INVALID,      SVGA3D_DXT3,                 0 },
-   { PIPE_FORMAT_DXT5_RGBA,             SVGA3D_FORMAT_INVALID,      SVGA3D_DXT5,                 0 },
-   { PIPE_FORMAT_DXT1_SRGB,             SVGA3D_FORMAT_INVALID,      SVGA3D_DXT1,                 0 },
-   { PIPE_FORMAT_DXT1_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_DXT1,                 0 },
-   { PIPE_FORMAT_DXT3_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_DXT3,                 0 },
-   { PIPE_FORMAT_DXT5_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_DXT5,                 0 },
+   { PIPE_FORMAT_DXT1_RGB,              SVGA3D_FORMAT_INVALID,      SVGA3D_BC1_UNORM,            0 },
+   { PIPE_FORMAT_DXT1_RGBA,             SVGA3D_FORMAT_INVALID,      SVGA3D_BC1_UNORM,            0 },
+   { PIPE_FORMAT_DXT3_RGBA,             SVGA3D_FORMAT_INVALID,      SVGA3D_BC2_UNORM,            0 },
+   { PIPE_FORMAT_DXT5_RGBA,             SVGA3D_FORMAT_INVALID,      SVGA3D_BC3_UNORM,            0 },
+   { PIPE_FORMAT_DXT1_SRGB,             SVGA3D_FORMAT_INVALID,      SVGA3D_BC1_UNORM_SRGB,       0 },
+   { PIPE_FORMAT_DXT1_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_BC1_UNORM_SRGB,       0 },
+   { PIPE_FORMAT_DXT3_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_BC2_UNORM_SRGB,       0 },
+   { PIPE_FORMAT_DXT5_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_BC3_UNORM_SRGB,       0 },
    { PIPE_FORMAT_RGTC1_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_BC4_UNORM,            0 },
    { PIPE_FORMAT_RGTC1_SNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_BC4_SNORM,            0 },
    { PIPE_FORMAT_RGTC2_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_BC5_UNORM,            0 },
@@ -472,7 +472,7 @@ struct format_cap {
  * PIPE_FORMAT_Z24_UNORM_S8_UINT is converted to SVGA3D_D24_UNORM_S8_UINT
  * for rendering but converted to SVGA3D_R24_UNORM_X8_TYPELESS for sampling.
  * If we want to query if a format supports both rendering and sampling the
- * host will tell us no for both SVGA3D_D24_UNORM_S8_UINT and
+ * host will tell us no for SVGA3D_D24_UNORM_S8_UINT, SVGA3D_D16_UNORM and
  * SVGA3D_R24_UNORM_X8_TYPELESS.  So we override the host query for those
  * formats and report that both can do rendering and sampling.
  */
@@ -1410,27 +1410,50 @@ static const struct format_cap format_cap_table[] = {
    },
    {
       "SVGA3D_BC1_TYPELESS",
-      SVGA3D_BC1_TYPELESS, 0, 0, 0, 0, 0
+      SVGA3D_BC1_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_BC1_TYPELESS,
+      4, 4, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
       "SVGA3D_BC1_UNORM_SRGB",
-      SVGA3D_BC1_UNORM_SRGB, 0, 0, 0, 0, 0
+      SVGA3D_BC1_UNORM_SRGB,
+      SVGA3D_DEVCAP_DXFMT_BC1_UNORM_SRGB,
+      4, 4, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
       "SVGA3D_BC2_TYPELESS",
-      SVGA3D_BC2_TYPELESS, 0, 0, 0, 0, 0
+      SVGA3D_BC2_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_BC2_TYPELESS,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
       "SVGA3D_BC2_UNORM_SRGB",
-      SVGA3D_BC2_UNORM_SRGB, 0, 0, 0, 0, 0
+      SVGA3D_BC2_UNORM_SRGB,
+      SVGA3D_DEVCAP_DXFMT_BC2_UNORM_SRGB,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
       "SVGA3D_BC3_TYPELESS",
-      SVGA3D_BC3_TYPELESS, 0, 0, 0, 0, 0
+      SVGA3D_BC3_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_BC3_TYPELESS,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
       "SVGA3D_BC3_UNORM_SRGB",
-      SVGA3D_BC3_UNORM_SRGB, 0, 0, 0, 0, 0
+      SVGA3D_BC3_UNORM_SRGB,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
       "SVGA3D_BC4_TYPELESS",
@@ -1671,7 +1694,7 @@ static const struct format_cap format_cap_table[] = {
    {
       "SVGA3D_D16_UNORM",
       SVGA3D_D16_UNORM,
-      SVGA3D_DEVCAP_DXFMT_D16_UNORM,
+      0, /*SVGA3D_DEVCAP_DXFMT_D16_UNORM*/
       1, 1, 2,
       SVGA3DFORMAT_OP_TEXTURE |
       SVGA3DFORMAT_OP_CUBETEXTURE |
@@ -1690,15 +1713,27 @@ static const struct format_cap format_cap_table[] = {
    },
    {
       "SVGA3D_BC1_UNORM",
-      SVGA3D_BC1_UNORM, 0, 0, 0, 0, 0
+      SVGA3D_BC1_UNORM,
+      SVGA3D_DEVCAP_DXFMT_BC1_UNORM,
+      4, 4, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
       "SVGA3D_BC2_UNORM",
-      SVGA3D_BC2_UNORM, 0, 0, 0, 0, 0
+      SVGA3D_BC2_UNORM,
+      SVGA3D_DEVCAP_DXFMT_BC2_UNORM,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
       "SVGA3D_BC3_UNORM",
-      SVGA3D_BC3_UNORM, 0, 0, 0, 0, 0
+      SVGA3D_BC3_UNORM,
+      SVGA3D_DEVCAP_DXFMT_BC3_UNORM,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
    },
    {
       "SVGA3D_B5G6R5_UNORM",
@@ -2053,6 +2088,7 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
    case SVGA3D_R8G8_UINT:
    case SVGA3D_R8G8_SINT:
       return SVGA3D_R8G8_TYPELESS;
+   case SVGA3D_D16_UNORM:
    case SVGA3D_R16_UNORM:
    case SVGA3D_R16_UINT:
    case SVGA3D_R16_SNORM:
@@ -2070,6 +2106,15 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
    case SVGA3D_B8G8R8X8_UNORM_SRGB:
    case SVGA3D_B8G8R8X8_UNORM:
       return SVGA3D_B8G8R8X8_TYPELESS;
+   case SVGA3D_BC1_UNORM:
+   case SVGA3D_BC1_UNORM_SRGB:
+      return SVGA3D_BC1_TYPELESS;
+   case SVGA3D_BC2_UNORM:
+   case SVGA3D_BC2_UNORM_SRGB:
+      return SVGA3D_BC2_TYPELESS;
+   case SVGA3D_BC3_UNORM:
+   case SVGA3D_BC3_UNORM_SRGB:
+      return SVGA3D_BC3_TYPELESS;
    case SVGA3D_BC4_UNORM:
    case SVGA3D_BC4_SNORM:
       return SVGA3D_BC4_TYPELESS;
@@ -2079,18 +2124,10 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
 
    /* Special cases (no corresponding _TYPELESS formats) */
    case SVGA3D_A8_UNORM:
-   case SVGA3D_A4R4G4B4:
    case SVGA3D_B5G5R5A1_UNORM:
    case SVGA3D_B5G6R5_UNORM:
-   case SVGA3D_DXT1:
-   case SVGA3D_DXT2:
-   case SVGA3D_DXT3:
-   case SVGA3D_DXT4:
-   case SVGA3D_DXT5:
    case SVGA3D_R11G11B10_FLOAT:
    case SVGA3D_R9G9B9E5_SHAREDEXP:
-   case SVGA3D_Z_D32:
-   case SVGA3D_Z_D16:
       return format;
    default:
       debug_printf("Unexpected format %s in %s\n",

From 72e232374eda780a5dcd374b55d203d0e2a6d02b Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Fri, 13 Nov 2015 16:09:37 -0800
Subject: [PATCH 070/335] meta/generate_mipmap: Don't leak the framebuffer
 object

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/mesa/drivers/common/meta_generate_mipmap.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c
index ffd71b6a199..bde170fcf6f 100644
--- a/src/mesa/drivers/common/meta_generate_mipmap.c
+++ b/src/mesa/drivers/common/meta_generate_mipmap.c
@@ -131,6 +131,11 @@ _mesa_meta_glsl_generate_mipmap_cleanup(struct gen_mipmap_state *mipmap)
    _mesa_DeleteSamplers(1, &mipmap->Sampler);
    mipmap->Sampler = 0;
 
+   if (mipmap->FBO != 0) {
+      _mesa_DeleteFramebuffers(1, &mipmap->FBO);
+      mipmap->FBO = 0;
+   }
+
    _mesa_meta_blit_shader_table_cleanup(&mipmap->shaders);
 }
 

From b196f1fff31f1528afa84590926c2935d9b0c255 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 11 Nov 2015 19:24:01 -0800
Subject: [PATCH 071/335] i965: Add enums for 3DSTATE_TE field values.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

3DSTATE_TE has partitioning, output topology, and domain fields,
each of which has several enumerated values.  We'll also need to
switch on the domain, so enums (rather than #defines) seem like a
natural fit.

I chose to put these in brw_compiler.h because they'll be stored
in struct brw_tes_prog_data, which will live there.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_compiler.h | 28 ++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index 8f147d3f75a..1ee01eb0661 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -490,6 +490,34 @@ enum shader_dispatch_mode {
    DISPATCH_MODE_SIMD8 = 3,
 };
 
+/**
+ * @defgroup Tessellator parameter enumerations.
+ *
+ * These correspond to the hardware values in 3DSTATE_TE, and are provided
+ * as part of the tessellation evaluation shader.
+ *
+ * @{
+ */
+enum brw_tess_partitioning {
+   BRW_TESS_PARTITIONING_INTEGER         = 0,
+   BRW_TESS_PARTITIONING_ODD_FRACTIONAL  = 1,
+   BRW_TESS_PARTITIONING_EVEN_FRACTIONAL = 2,
+};
+
+enum brw_tess_output_topology {
+   BRW_TESS_OUTPUT_TOPOLOGY_POINT   = 0,
+   BRW_TESS_OUTPUT_TOPOLOGY_LINE    = 1,
+   BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW  = 2,
+   BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW = 3,
+};
+
+enum brw_tess_domain {
+   BRW_TESS_DOMAIN_QUAD    = 0,
+   BRW_TESS_DOMAIN_TRI     = 1,
+   BRW_TESS_DOMAIN_ISOLINE = 2,
+};
+/** @} */
+
 struct brw_vue_prog_data {
    struct brw_stage_prog_data base;
    struct brw_vue_map vue_map;

From 2631bfd62c899dfa2a7095685c4f6874d5e9704d Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 17 Nov 2015 14:56:32 -0800
Subject: [PATCH 072/335] nir: Store the size of the TCS output patch in
 nir_shader_info.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/glsl_to_nir.cpp | 4 ++++
 src/glsl/nir/nir.h           | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 6d24341ce01..c4b53f38b6b 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -167,6 +167,10 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
       shader_prog->TransformFeedback.NumVarying > 0;
 
    switch (stage) {
+   case MESA_SHADER_TESS_CTRL:
+      shader->info.tcs.vertices_out = shader_prog->TessCtrl.VerticesOut;
+      break;
+
    case MESA_SHADER_GEOMETRY:
       shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn;
       shader->info.gs.output_primitive = sh->Geom.OutputType;
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index beabcafef4e..46add22da70 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1544,6 +1544,11 @@ typedef struct nir_shader_info {
       struct {
          unsigned local_size[3];
       } cs;
+
+      struct {
+         /** The number of vertices in the TCS output patch. */
+         unsigned vertices_out;
+      } tcs;
    };
 } nir_shader_info;
 

From 8e68113c1a78c48f26e820f4beb2dda9e4b99f32 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Wed, 18 Nov 2015 14:23:35 -0500
Subject: [PATCH 073/335] nvc0/ir: actually emit AFETCH on kepler

Looks like this was forgotten in the commit which added the AFETCH
logic.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 2a13e1086a0..9f84de03a4a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -2357,6 +2357,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
    case OP_PFETCH:
       emitPFETCH(insn);
       break;
+   case OP_AFETCH:
+      emitAFETCH(insn);
+      break;
    case OP_EMIT:
    case OP_RESTART:
       emitOUT(insn);

From 4581f8428e0e1d2f6787d0765823c7883bd2cfcd Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Tue, 17 Nov 2015 16:16:46 +0200
Subject: [PATCH 074/335] llvmpipe: disable VSX in ppc due to LLVM PPC bug

This patch disables the use of VSX instructions, as they cause some
piglit tests to fail

For more details, see: https://llvm.org/bugs/show_bug.cgi?id=25503#c7

With this patch, ppc64le reaches parity with x86-64 as far as piglit test
suite is concerned.

v2:
- Added check that we have at least LLVM 3.4
- Added the LLVM bug URL as a comment in the code

v3:

- Only disable VSX if Altivec is supported, because if Altivec support
is missing, then VSX support doesn't exist anyway.

- Change original patch description.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 7bda1184ee9..3ee708f4fad 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -536,6 +536,15 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
 
 #if defined(PIPE_ARCH_PPC)
    MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
+#if HAVE_LLVM >= 0x0304
+   /*
+    * Make sure VSX instructions are disabled
+    * See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7
+    */
+   if (util_cpu_caps.has_altivec) {
+      MAttrs.push_back("-vsx");
+   }
+#endif
 #endif
 
    builder.setMAttrs(MAttrs);

From b40e144a665142957a7ae027238e61fd01a27ebc Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 8 Nov 2015 13:43:07 -0500
Subject: [PATCH 075/335] nir: fix typo in idiv lowering, causing
 large-udiv-udiv failures

In nv50, and in the python script that Rob circulated, we do:

   bld.mkCmp(OP_SET, CC_GE, TYPE_U32, (s = bld.getSSA()), TYPE_U32, m, b);

Do the same in the nir div lowering pass. This fixes the large-udiv-udiv
piglit tests on freedreno.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/glsl/nir/nir_lower_idiv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/nir/nir_lower_idiv.c b/src/glsl/nir/nir_lower_idiv.c
index c961178c53a..3580ced0ac0 100644
--- a/src/glsl/nir/nir_lower_idiv.c
+++ b/src/glsl/nir/nir_lower_idiv.c
@@ -96,7 +96,7 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu)
    r = nir_imul(bld, q, b);
    r = nir_isub(bld, a, r);
 
-   r = nir_ige(bld, r, b);
+   r = nir_uge(bld, r, b);
    r = nir_b2i(bld, r);
 
    q = nir_iadd(bld, q, r);

From b24c9a8aeef6fbad06d7982aee8bdb55679289f3 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Mon, 16 Nov 2015 14:58:50 -0500
Subject: [PATCH 076/335] freedreno/a3xx+a4xx: fix GL_POINTS lockup w/ GLES

point_size_per_vertex is always TRUE for GLES, causing us to configure
the hw as if gl_PointSize was written, even if it was not.  Which makes
for grumpy hw.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 2 ++
 src/gallium/drivers/freedreno/a4xx/fd4_draw.c | 8 ++++++++
 src/gallium/drivers/freedreno/a4xx/fd4_draw.h | 7 +------
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index 3906c9b996e..b8a31d84b3f 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -81,7 +81,9 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
 	OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
 			info->restart_index : 0xffffffff);
 
+	/* points + psize -> spritelist: */
 	if (ctx->rasterizer->point_size_per_vertex &&
+			fd3_emit_get_vp(emit)->writes_psize &&
 			(info->mode == PIPE_PRIM_POINTS))
 		primtype = DI_PT_POINTLIST_PSIZE;
 
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index 7bd5163529a..54bd445d43b 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -47,6 +47,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		struct fd4_emit *emit)
 {
 	const struct pipe_draw_info *info = emit->info;
+	enum pc_di_primtype primtype = ctx->primtypes[info->mode];
 
 	if (!(fd4_emit_get_vp(emit) && fd4_emit_get_fp(emit)))
 		return;
@@ -64,7 +65,14 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
 	OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
 			info->restart_index : 0xffffffff);
 
+	/* points + psize -> spritelist: */
+	if (ctx->rasterizer->point_size_per_vertex &&
+			fd4_emit_get_vp(emit)->writes_psize &&
+			(info->mode == PIPE_PRIM_POINTS))
+		primtype = DI_PT_POINTLIST_PSIZE;
+
 	fd4_draw_emit(ctx, ring,
+			primtype,
 			emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
 			info);
 }
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
index b89a30a7c4b..a6c56404a8a 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
@@ -101,12 +101,12 @@ fd4_size2indextype(unsigned index_size)
 }
 static inline void
 fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
+		enum pc_di_primtype primtype,
 		enum pc_di_vis_cull_mode vismode,
 		const struct pipe_draw_info *info)
 {
 	struct pipe_index_buffer *idx = &ctx->indexbuf;
 	struct fd_bo *idx_bo = NULL;
-	enum pc_di_primtype primtype = ctx->primtypes[info->mode];
 	enum a4xx_index_size idx_type;
 	enum pc_di_src_sel src_sel;
 	uint32_t idx_size, idx_offset;
@@ -127,11 +127,6 @@ fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		src_sel = DI_SRC_SEL_AUTO_INDEX;
 	}
 
-	/* points + psize -> spritelist: */
-	if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex &&
-			(info->mode == PIPE_PRIM_POINTS))
-		primtype = DI_PT_POINTLIST_PSIZE;
-
 	fd4_draw(ctx, ring, primtype, vismode, src_sel,
 			info->count, info->instance_count,
 			idx_type, idx_size, idx_offset, idx_bo);

From 8106fec74c4d8548974fcf64e052a6bac07e926f Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Mon, 16 Nov 2015 15:07:29 -0500
Subject: [PATCH 077/335] freedreno/a3xx+a4xx: fix for stk binning pass hang

We'd end up in a state where shader uses no inputs, yet num_elements is
greater than zero.  Triggered by a TF vertex shader which did:

  gl_Position = vec4(0.0, 0.0, 0.0, 0.0);

resulting in a binning pass variant with no inputs.

Includes equiv fix in a4xx, even though we don't have binning-pass
enabled yet on a4xx.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 44 ++++++++++++++----
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 45 +++++++++++++++----
 .../drivers/freedreno/ir3/ir3_shader.h        |  6 ++-
 3 files changed, 76 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 8f9c8b0623c..25ea3e7a7b7 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -350,7 +350,10 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 	unsigned instance_regid = regid(63, 0);
 	unsigned vtxcnt_regid = regid(63, 0);
 
+	/* Note that sysvals come *after* normal inputs: */
 	for (i = 0; i < vp->inputs_count; i++) {
+		if (!vp->inputs[i].compmask)
+			continue;
 		if (vp->inputs[i].sysval) {
 			switch(vp->inputs[i].slot) {
 			case SYSTEM_VALUE_BASE_VERTEX:
@@ -369,18 +372,11 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 				unreachable("invalid system value");
 				break;
 			}
-		} else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
+		} else if (i < vtx->vtx->num_elements) {
 			last = i;
 		}
 	}
 
-	/* hw doesn't like to be configured for zero vbo's, it seems: */
-	if ((vtx->vtx->num_elements == 0) &&
-			(vertex_regid == regid(63, 0)) &&
-			(instance_regid == regid(63, 0)) &&
-			(vtxcnt_regid == regid(63, 0)))
-		return;
-
 	for (i = 0, j = 0; i <= last; i++) {
 		assert(!vp->inputs[i].sysval);
 		if (vp->inputs[i].compmask) {
@@ -424,6 +420,38 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 		}
 	}
 
+	/* hw doesn't like to be configured for zero vbo's, it seems: */
+	if (last < 0) {
+		/* just recycle the shader bo, we just need to point to *something*
+		 * valid:
+		 */
+		struct fd_bo *dummy_vbo = vp->bo;
+		bool switchnext = (vertex_regid != regid(63, 0)) ||
+				(instance_regid != regid(63, 0)) ||
+				(vtxcnt_regid != regid(63, 0));
+
+		OUT_PKT0(ring, REG_A3XX_VFD_FETCH(0), 2);
+		OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
+				A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
+				COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
+				A3XX_VFD_FETCH_INSTR_0_INDEXCODE(0) |
+				A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
+		OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
+
+		OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(0), 1);
+		OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
+				A3XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
+				A3XX_VFD_DECODE_INSTR_FORMAT(VFMT_8_UNORM) |
+				A3XX_VFD_DECODE_INSTR_SWAP(XYZW) |
+				A3XX_VFD_DECODE_INSTR_REGID(regid(0,0)) |
+				A3XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
+				A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+				COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+		total_in = 1;
+		j = 1;
+	}
+
 	OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
 	OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
 			A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 26b58718cd8..5a7b192f79d 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -332,7 +332,10 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
 	unsigned instance_regid = regid(63, 0);
 	unsigned vtxcnt_regid = regid(63, 0);
 
+	/* Note that sysvals come *after* normal inputs: */
 	for (i = 0; i < vp->inputs_count; i++) {
+		if (!vp->inputs[i].compmask)
+			continue;
 		if (vp->inputs[i].sysval) {
 			switch(vp->inputs[i].slot) {
 			case SYSTEM_VALUE_BASE_VERTEX:
@@ -351,19 +354,11 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
 				unreachable("invalid system value");
 				break;
 			}
-		} else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
+		} else if (i < vtx->vtx->num_elements) {
 			last = i;
 		}
 	}
 
-
-	/* hw doesn't like to be configured for zero vbo's, it seems: */
-	if ((vtx->vtx->num_elements == 0) &&
-			(vertex_regid == regid(63, 0)) &&
-			(instance_regid == regid(63, 0)) &&
-			(vtxcnt_regid == regid(63, 0)))
-		return;
-
 	for (i = 0, j = 0; i <= last; i++) {
 		assert(!vp->inputs[i].sysval);
 		if (vp->inputs[i].compmask) {
@@ -408,6 +403,38 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
 		}
 	}
 
+	/* hw doesn't like to be configured for zero vbo's, it seems: */
+	if (last < 0) {
+		/* just recycle the shader bo, we just need to point to *something*
+		 * valid:
+		 */
+		struct fd_bo *dummy_vbo = vp->bo;
+		bool switchnext = (vertex_regid != regid(63, 0)) ||
+				(instance_regid != regid(63, 0)) ||
+				(vtxcnt_regid != regid(63, 0));
+
+		OUT_PKT0(ring, REG_A4XX_VFD_FETCH(0), 4);
+		OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
+				A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
+				COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
+		OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
+		OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(1));
+		OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(1));
+
+		OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(0), 1);
+		OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
+				A4XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
+				A4XX_VFD_DECODE_INSTR_FORMAT(VFMT4_8_UNORM) |
+				A4XX_VFD_DECODE_INSTR_SWAP(XYZW) |
+				A4XX_VFD_DECODE_INSTR_REGID(regid(0,0)) |
+				A4XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
+				A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+				COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+		total_in = 1;
+		j = 1;
+	}
+
 	OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5);
 	OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
 			0xa0000 | /* XXX */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 7e2c27d9765..5d1cccb0daa 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -166,7 +166,9 @@ struct ir3_shader_variant {
 	} outputs[16 + 2];  /* +POSITION +PSIZE */
 	bool writes_pos, writes_psize;
 
-	/* vertices/inputs: */
+	/* attributes (VS) / varyings (FS):
+	 * Note that sysval's should come *after* normal inputs.
+	 */
 	unsigned inputs_count;
 	struct {
 		uint8_t slot;
@@ -229,7 +231,7 @@ struct ir3_shader {
 
 	struct ir3_compiler *compiler;
 
-	struct pipe_context *pctx;
+	struct pipe_context *pctx;    /* TODO replace w/ pipe_screen */
 	const struct tgsi_token *tokens;
 	struct pipe_stream_output_info stream_output;
 

From 84d087aea29821a029aeefe76386d487e72d7287 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 7 Nov 2015 19:32:32 -0500
Subject: [PATCH 078/335] freedreno/a3xx: add missing formats to enable
 ARB_vertex_type_2_10_10_10_rev

The previously RE'd formats were from an ES driver implementing
OES_vertex_type_10_10_10_2 and thus backwards. A future change could add
the 2_10_10_10 support.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 docs/relnotes/11.1.0.html                       | 1 +
 src/gallium/drivers/freedreno/a3xx/a3xx.xml.h   | 8 ++++----
 src/gallium/drivers/freedreno/a3xx/fd3_format.c | 4 ++++
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 6654311a3ac..0075a00ad88 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -57,6 +57,7 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
 <li>GL_ARB_texture_query_lod on softpipe</li>
 <li>GL_ARB_texture_view on radeonsi and r600 (for evergeen and newer)</li>
+<li>GL_ARB_vertex_type_2_10_10_10_rev on freedreno (a3xx)</li>
 <li>GL_EXT_buffer_storage implemented for when ES 3.1 support is gained</li>
 <li>GL_EXT_draw_elements_base_vertex on all drivers</li>
 <li>GL_OES_draw_elements_base_vertex on all drivers</li>
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index b5e1ddadde0..9f382baba97 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -111,10 +111,10 @@ enum a3xx_vtx_fmt {
 	VFMT_8_8_SNORM = 53,
 	VFMT_8_8_8_SNORM = 54,
 	VFMT_8_8_8_8_SNORM = 55,
-	VFMT_10_10_10_2_UINT = 60,
-	VFMT_10_10_10_2_UNORM = 61,
-	VFMT_10_10_10_2_SINT = 62,
-	VFMT_10_10_10_2_SNORM = 63,
+	VFMT_10_10_10_2_UINT = 56,
+	VFMT_10_10_10_2_UNORM = 57,
+	VFMT_10_10_10_2_SINT = 58,
+	VFMT_10_10_10_2_SNORM = 59,
 };
 
 enum a3xx_tex_fmt {
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
index 857d156c869..9b313b598a8 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
@@ -188,9 +188,13 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
 	VT(B10G10R10A2_UNORM,   10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
 	_T(B10G10R10X2_UNORM,   10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
 	V_(R10G10B10A2_SNORM,   10_10_10_2_SNORM, NONE,              WZYX),
+	V_(B10G10R10A2_SNORM,   10_10_10_2_SNORM, NONE,              WXYZ),
 	V_(R10G10B10A2_UINT,    10_10_10_2_UINT,  NONE,              WZYX),
+	V_(B10G10R10A2_UINT,    10_10_10_2_UINT,  NONE,              WXYZ),
 	V_(R10G10B10A2_USCALED, 10_10_10_2_UINT,  NONE,              WZYX),
+	V_(B10G10R10A2_USCALED, 10_10_10_2_UINT,  NONE,              WXYZ),
 	V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT,  NONE,              WZYX),
+	V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT,  NONE,              WXYZ),
 
 	_T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
 	_T(R9G9B9E5_FLOAT,  9_9_9_E5_FLOAT, NONE,            WZYX),

From 059da344ec17853bb503a7e4afa229c2e2a98c83 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 7 Nov 2015 22:13:16 -0500
Subject: [PATCH 079/335] freedreno/a3xx: add fake RGTC support (required for
 GL3)

Also throw in LATC while we're at it (same exact format). This could be
made more efficient by keeping a shadow compressed texture to use for
returning at map time. However... it's not worth it for now...
presumably compressed textures are not updated often.

Lastly fix up Z32S8 transfers to non-0 layers.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 docs/relnotes/11.1.0.html                     |   1 +
 .../drivers/freedreno/a3xx/fd3_format.c       |  20 ++
 .../drivers/freedreno/a3xx/fd3_format.h       |   1 +
 .../drivers/freedreno/a3xx/fd3_texture.c      |   2 +-
 .../drivers/freedreno/freedreno_resource.c    | 175 +++++++++++++++---
 .../drivers/freedreno/freedreno_texture.c     |   4 +
 6 files changed, 176 insertions(+), 27 deletions(-)

diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 0075a00ad88..28fec7e89c4 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -60,6 +60,7 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_ARB_vertex_type_2_10_10_10_rev on freedreno (a3xx)</li>
 <li>GL_EXT_buffer_storage implemented for when ES 3.1 support is gained</li>
 <li>GL_EXT_draw_elements_base_vertex on all drivers</li>
+<li>GL_EXT_texture_compression_rgtc / latc on freedreno (a3xx)</li>
 <li>GL_OES_draw_elements_base_vertex on all drivers</li>
 <li>EGL_KHR_create_context on softpipe, llvmpipe</li>
 <li>EGL_KHR_gl_colorspace on softpipe, llvmpipe</li>
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
index 9b313b598a8..52ea9444517 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
@@ -275,6 +275,16 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
 	_T(DXT3_SRGBA, DXT3, NONE, WZYX),
 	_T(DXT5_RGBA,  DXT5, NONE, WZYX),
 	_T(DXT5_SRGBA, DXT5, NONE, WZYX),
+
+	/* faked */
+	_T(RGTC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+	_T(RGTC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
+	_T(RGTC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+	_T(RGTC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
+	_T(LATC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+	_T(LATC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
+	_T(LATC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+	_T(LATC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
 };
 
 enum a3xx_vtx_fmt
@@ -314,6 +324,8 @@ fd3_pipe2fetchsize(enum pipe_format format)
 {
 	if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
 		format = PIPE_FORMAT_Z32_FLOAT;
+	else if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
+		format = PIPE_FORMAT_R8G8B8A8_UNORM;
 	switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) {
 	case 8: return TFETCH_1_BYTE;
 	case 16: return TFETCH_2_BYTE;
@@ -328,6 +340,14 @@ fd3_pipe2fetchsize(enum pipe_format format)
 	}
 }
 
+unsigned
+fd3_pipe2nblocksx(enum pipe_format format, unsigned width)
+{
+	if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
+		format = PIPE_FORMAT_R8G8B8A8_UNORM;
+	return util_format_get_nblocksx(format, width);
+}
+
 /* we need to special case a bit the depth/stencil restore, because we are
  * using the texture sampler to blit into the depth/stencil buffer, *not*
  * into a color buffer.  Otherwise fd3_tex_swiz() will do the wrong thing,
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.h b/src/gallium/drivers/freedreno/a3xx/fd3_format.h
index 05c5ea3d247..48c503e9a82 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.h
@@ -37,6 +37,7 @@ enum a3xx_color_fmt fd3_pipe2color(enum pipe_format format);
 enum pipe_format fd3_gmem_restore_format(enum pipe_format format);
 enum a3xx_color_fmt fd3_fs_output_format(enum pipe_format format);
 enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format);
+unsigned fd3_pipe2nblocksx(enum pipe_format format, unsigned width);
 
 uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
 		unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index 2d6ecb2c050..15e63e7d478 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -240,7 +240,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 			A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
 	/* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
 	so->texconst2 =
-			A3XX_TEX_CONST_2_PITCH(util_format_get_nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
+			A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
 	switch (prsc->target) {
 	case PIPE_TEXTURE_1D_ARRAY:
 	case PIPE_TEXTURE_2D_ARRAY:
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index 98de0969cab..6e22e39f52e 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -27,6 +27,7 @@
  */
 
 #include "util/u_format.h"
+#include "util/u_format_rgtc.h"
 #include "util/u_format_zs.h"
 #include "util/u_inlines.h"
 #include "util/u_transfer.h"
@@ -111,11 +112,19 @@ realloc_bo(struct fd_resource *rsc, uint32_t size)
 	util_range_set_empty(&rsc->valid_buffer_range);
 }
 
-/* Currently this is only used for flushing Z32_S8 texture transfers, but
- * eventually it should handle everything.
- */
+static unsigned
+fd_resource_layer_offset(struct fd_resource *rsc,
+						 struct fd_resource_slice *slice,
+						 unsigned layer)
+{
+	if (rsc->layer_first)
+		return layer * rsc->layer_size;
+	else
+		return layer * slice->size0;
+}
+
 static void
-fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
+fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box)
 {
 	struct fd_resource *rsc = fd_resource(trans->base.resource);
 	struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
@@ -123,13 +132,12 @@ fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
 	enum pipe_format format = trans->base.resource->format;
 
 	float *depth = fd_bo_map(rsc->bo) + slice->offset +
+		fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
 		(trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
 	uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
+		fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) +
 		(trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
 
-	assert(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
-		   format == PIPE_FORMAT_X32_S8X24_UINT);
-
 	if (format != PIPE_FORMAT_X32_S8X24_UINT)
 		util_format_z32_float_s8x24_uint_unpack_z_float(
 				depth, slice->pitch * 4,
@@ -142,6 +150,73 @@ fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
 			box->width, box->height);
 }
 
+static void
+fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box)
+{
+	struct fd_resource *rsc = fd_resource(trans->base.resource);
+	struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
+	enum pipe_format format = trans->base.resource->format;
+
+	uint8_t *data = fd_bo_map(rsc->bo) + slice->offset +
+		fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
+		((trans->base.box.y + box->y) * slice->pitch +
+		 trans->base.box.x + box->x) * rsc->cpp;
+
+	uint8_t *source = trans->staging +
+		util_format_get_nblocksy(format, box->y) * trans->base.stride +
+		util_format_get_stride(format, box->x);
+
+	switch (format) {
+	case PIPE_FORMAT_RGTC1_UNORM:
+	case PIPE_FORMAT_RGTC1_SNORM:
+	case PIPE_FORMAT_LATC1_UNORM:
+	case PIPE_FORMAT_LATC1_SNORM:
+		util_format_rgtc1_unorm_unpack_rgba_8unorm(
+				data, slice->pitch * rsc->cpp,
+				source, trans->base.stride,
+				box->width, box->height);
+		break;
+	case PIPE_FORMAT_RGTC2_UNORM:
+	case PIPE_FORMAT_RGTC2_SNORM:
+	case PIPE_FORMAT_LATC2_UNORM:
+	case PIPE_FORMAT_LATC2_SNORM:
+		util_format_rgtc2_unorm_unpack_rgba_8unorm(
+				data, slice->pitch * rsc->cpp,
+				source, trans->base.stride,
+				box->width, box->height);
+		break;
+	default:
+		assert(!"Unexpected format\n");
+		break;
+	}
+}
+
+static void
+fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
+{
+	enum pipe_format format = trans->base.resource->format;
+
+	switch (format) {
+	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+	case PIPE_FORMAT_X32_S8X24_UINT:
+		fd_resource_flush_z32s8(trans, box);
+		break;
+	case PIPE_FORMAT_RGTC1_UNORM:
+	case PIPE_FORMAT_RGTC1_SNORM:
+	case PIPE_FORMAT_RGTC2_UNORM:
+	case PIPE_FORMAT_RGTC2_SNORM:
+	case PIPE_FORMAT_LATC1_UNORM:
+	case PIPE_FORMAT_LATC1_SNORM:
+	case PIPE_FORMAT_LATC2_UNORM:
+	case PIPE_FORMAT_LATC2_SNORM:
+		fd_resource_flush_rgtc(trans, box);
+		break;
+	default:
+		assert(!"Unexpected staging transfer type");
+		break;
+	}
+}
+
 static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
 		struct pipe_transfer *ptrans,
 		const struct pipe_box *box)
@@ -267,20 +342,15 @@ fd_resource_transfer_map(struct pipe_context *pctx,
 		return NULL;
 	}
 
-	if (rsc->layer_first) {
-		offset = slice->offset +
-			box->y / util_format_get_blockheight(format) * ptrans->stride +
-			box->x / util_format_get_blockwidth(format) * rsc->cpp +
-			box->z * rsc->layer_size;
-	} else {
-		offset = slice->offset +
-			box->y / util_format_get_blockheight(format) * ptrans->stride +
-			box->x / util_format_get_blockwidth(format) * rsc->cpp +
-			box->z * slice->size0;
-	}
+	offset = slice->offset +
+		box->y / util_format_get_blockheight(format) * ptrans->stride +
+		box->x / util_format_get_blockwidth(format) * rsc->cpp +
+		fd_resource_layer_offset(rsc, slice, box->z);
 
 	if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
 		prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
+		assert(trans->base.box.depth == 1);
+
 		trans->base.stride = trans->base.box.width * rsc->cpp * 2;
 		trans->staging = malloc(trans->base.stride * trans->base.box.height);
 		if (!trans->staging)
@@ -298,8 +368,10 @@ fd_resource_transfer_map(struct pipe_context *pctx,
 				goto fail;
 
 			float *depth = (float *)(buf + slice->offset +
+				fd_resource_layer_offset(rsc, slice, box->z) +
 				box->y * slice->pitch * 4 + box->x * 4);
 			uint8_t *stencil = sbuf + sslice->offset +
+				fd_resource_layer_offset(rsc->stencil, sslice, box->z) +
 				box->y * sslice->pitch + box->x;
 
 			if (format != PIPE_FORMAT_X32_S8X24_UINT)
@@ -314,6 +386,53 @@ fd_resource_transfer_map(struct pipe_context *pctx,
 					box->width, box->height);
 		}
 
+		buf = trans->staging;
+		offset = 0;
+	} else if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+		assert(trans->base.box.depth == 1);
+
+		trans->base.stride = util_format_get_stride(
+				format, trans->base.box.width);
+		trans->staging = malloc(
+				util_format_get_2d_size(format, trans->base.stride,
+										trans->base.box.height));
+		if (!trans->staging)
+			goto fail;
+
+		/* if we're not discarding the whole range (or resource), we must copy
+		 * the real data in.
+		 */
+		if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
+					   PIPE_TRANSFER_DISCARD_RANGE))) {
+			uint8_t *rgba8 = (uint8_t *)buf + slice->offset +
+				fd_resource_layer_offset(rsc, slice, box->z) +
+				box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp;
+
+			switch (format) {
+			case PIPE_FORMAT_RGTC1_UNORM:
+			case PIPE_FORMAT_RGTC1_SNORM:
+			case PIPE_FORMAT_LATC1_UNORM:
+			case PIPE_FORMAT_LATC1_SNORM:
+				util_format_rgtc1_unorm_pack_rgba_8unorm(
+					trans->staging, trans->base.stride,
+					rgba8, slice->pitch * rsc->cpp,
+					box->width, box->height);
+				break;
+			case PIPE_FORMAT_RGTC2_UNORM:
+			case PIPE_FORMAT_RGTC2_SNORM:
+			case PIPE_FORMAT_LATC2_UNORM:
+			case PIPE_FORMAT_LATC2_SNORM:
+				util_format_rgtc2_unorm_pack_rgba_8unorm(
+					trans->staging, trans->base.stride,
+					rgba8, slice->pitch * rsc->cpp,
+					box->width, box->height);
+				break;
+			default:
+				assert(!"Unexpected format");
+				break;
+			}
+		}
+
 		buf = trans->staging;
 		offset = 0;
 	}
@@ -361,7 +480,7 @@ static const struct u_resource_vtbl fd_resource_vtbl = {
 };
 
 static uint32_t
-setup_slices(struct fd_resource *rsc, uint32_t alignment)
+setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
 {
 	struct pipe_resource *prsc = &rsc->base.b;
 	uint32_t level, size = 0;
@@ -379,7 +498,7 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment)
 
 		slice->pitch = width = align(width, 32);
 		slice->offset = size;
-		blocks = util_format_get_nblocks(prsc->format, width, height);
+		blocks = util_format_get_nblocks(format, width, height);
 		/* 1d array and 2d array textures must all have the same layer size
 		 * for each miplevel on a3xx. 3d textures can have different layer
 		 * sizes for high levels, but the hw auto-sizer is buggy (or at least
@@ -430,11 +549,12 @@ fd_resource_create(struct pipe_screen *pscreen,
 {
 	struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
 	struct pipe_resource *prsc = &rsc->base.b;
+	enum pipe_format format = tmpl->format;
 	uint32_t size;
 
 	DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
 			"nr_samples=%u, usage=%u, bind=%x, flags=%x",
-			tmpl->target, util_format_name(tmpl->format),
+			tmpl->target, util_format_name(format),
 			tmpl->width0, tmpl->height0, tmpl->depth0,
 			tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
 			tmpl->usage, tmpl->bind, tmpl->flags);
@@ -451,10 +571,13 @@ fd_resource_create(struct pipe_screen *pscreen,
 	util_range_init(&rsc->valid_buffer_range);
 
 	rsc->base.vtbl = &fd_resource_vtbl;
-	if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
-		rsc->cpp = util_format_get_blocksize(PIPE_FORMAT_Z32_FLOAT);
-	else
-		rsc->cpp = util_format_get_blocksize(tmpl->format);
+
+	if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+		format = PIPE_FORMAT_Z32_FLOAT;
+	else if (util_format_description(format)->layout ==
+			 UTIL_FORMAT_LAYOUT_RGTC)
+		format = PIPE_FORMAT_R8G8B8A8_UNORM;
+	rsc->cpp = util_format_get_blocksize(format);
 
 	assert(rsc->cpp);
 
@@ -469,7 +592,7 @@ fd_resource_create(struct pipe_screen *pscreen,
 		}
 	}
 
-	size = setup_slices(rsc, slice_alignment(pscreen, tmpl));
+	size = setup_slices(rsc, slice_alignment(pscreen, tmpl), format);
 
 	if (rsc->layer_first) {
 		rsc->layer_size = align(size, 4096);
diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c
index 04e4643b4c9..f03b65b0ae5 100644
--- a/src/gallium/drivers/freedreno/freedreno_texture.c
+++ b/src/gallium/drivers/freedreno/freedreno_texture.c
@@ -212,6 +212,10 @@ fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr,
 				if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER)
 					size = 16;
 
+				/* We fake RGTC as if it were RGBA8 */
+				if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC)
+					size = 8;
+
 				if (chan->pure_integer && size > 16)
 					bcolor32[desc->swizzle[j] + 4] =
 							sampler->border_color.i[j];

From d69e557f2a2c39888d83c7b52244412ee2a5594e Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 7 Nov 2015 23:20:31 -0500
Subject: [PATCH 080/335] freedreno: add support for conditional rendering,
 required for GL3.0

A smarter implementation would make it possible to attach this to emit
state for the BY_REGION versions to avoid breaking the tiling. But this
is a start.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 docs/relnotes/11.1.0.html                     |  1 +
 .../drivers/freedreno/freedreno_context.h     |  4 +++
 .../drivers/freedreno/freedreno_draw.c        |  8 +++++
 .../drivers/freedreno/freedreno_query.c       | 11 +++++++
 .../drivers/freedreno/freedreno_resource.c    | 33 ++++++++++++++++---
 .../drivers/freedreno/freedreno_resource.h    |  2 ++
 .../drivers/freedreno/freedreno_screen.c      |  4 +--
 7 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 28fec7e89c4..00d517706b5 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -61,6 +61,7 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_EXT_buffer_storage implemented for when ES 3.1 support is gained</li>
 <li>GL_EXT_draw_elements_base_vertex on all drivers</li>
 <li>GL_EXT_texture_compression_rgtc / latc on freedreno (a3xx)</li>
+<li>GL_NV_conditional_render on freedreno</li>
 <li>GL_OES_draw_elements_base_vertex on all drivers</li>
 <li>EGL_KHR_create_context on softpipe, llvmpipe</li>
 <li>EGL_KHR_gl_colorspace on softpipe, llvmpipe</li>
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 61c4c6d6e24..571c8142bf7 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -359,6 +359,10 @@ struct fd_context {
 	struct fd_streamout_stateobj streamout;
 	struct pipe_clip_state ucp;
 
+	struct pipe_query *cond_query;
+	bool cond_cond; /* inverted rendering condition */
+	uint cond_mode;
+
 	/* GMEM/tile handling fxns: */
 	void (*emit_tile_init)(struct fd_context *ctx);
 	void (*emit_tile_prep)(struct fd_context *ctx, struct fd_tile *tile);
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index 7bf3343f43a..bf803cc77bc 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -88,6 +88,10 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
 		return;
 	}
 
+	/* TODO: push down the region versions into the tiles */
+	if (!fd_render_condition_check(pctx))
+		return;
+
 	/* emulate unsupported primitives: */
 	if (!fd_supported_prim(ctx, info->mode)) {
 		if (ctx->streamout.num_targets > 0)
@@ -220,6 +224,10 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
 	unsigned cleared_buffers;
 	int i;
 
+	/* TODO: push down the region versions into the tiles */
+	if (!fd_render_condition_check(pctx))
+		return;
+
 	/* for bookkeeping about which buffers have been cleared (and thus
 	 * can fully or partially skip mem2gmem) we need to ignore buffers
 	 * that have already had a draw, in case apps do silly things like
diff --git a/src/gallium/drivers/freedreno/freedreno_query.c b/src/gallium/drivers/freedreno/freedreno_query.c
index db2683c9b6f..b87e8250719 100644
--- a/src/gallium/drivers/freedreno/freedreno_query.c
+++ b/src/gallium/drivers/freedreno/freedreno_query.c
@@ -81,6 +81,16 @@ fd_get_query_result(struct pipe_context *pctx, struct pipe_query *pq,
 	return q->funcs->get_query_result(fd_context(pctx), q, wait, result);
 }
 
+static void
+fd_render_condition(struct pipe_context *pctx, struct pipe_query *pq,
+					boolean condition, uint mode)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->cond_query = pq;
+	ctx->cond_cond = condition;
+	ctx->cond_mode = mode;
+}
+
 static int
 fd_get_driver_query_info(struct pipe_screen *pscreen,
 		unsigned index, struct pipe_driver_query_info *info)
@@ -118,4 +128,5 @@ fd_query_context_init(struct pipe_context *pctx)
 	pctx->begin_query = fd_begin_query;
 	pctx->end_query = fd_end_query;
 	pctx->get_query_result = fd_get_query_result;
+	pctx->render_condition = fd_render_condition;
 }
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index 6e22e39f52e..5b1cee8d18d 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -671,7 +671,7 @@ fail:
 	return NULL;
 }
 
-static void fd_blitter_pipe_begin(struct fd_context *ctx);
+static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond);
 static void fd_blitter_pipe_end(struct fd_context *ctx);
 
 /**
@@ -693,7 +693,7 @@ fd_blitter_pipe_copy_region(struct fd_context *ctx,
 	if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
 		return false;
 
-	fd_blitter_pipe_begin(ctx);
+	fd_blitter_pipe_begin(ctx, false);
 	util_blitter_copy_texture(ctx->blitter,
 			dst, dst_level, dstx, dsty, dstz,
 			src, src_level, src_box);
@@ -735,6 +735,25 @@ fd_resource_copy_region(struct pipe_context *pctx,
 			src, src_level, src_box);
 }
 
+bool
+fd_render_condition_check(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+
+	if (!ctx->cond_query)
+		return true;
+
+	union pipe_query_result res = { 0 };
+	bool wait =
+		ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
+		ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
+
+	if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
+			return (bool)res.u64 != ctx->cond_cond;
+
+	return true;
+}
+
 /**
  * Optimal hardware path for blitting pixels.
  * Scaling, format conversion, up- and downsampling (resolve) are allowed.
@@ -753,6 +772,9 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
 		return;
 	}
 
+	if (info.render_condition_enable && !fd_render_condition_check(pctx))
+		return;
+
 	if (util_try_blit_via_copy_region(pctx, &info)) {
 		return; /* done */
 	}
@@ -769,13 +791,13 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
 		return;
 	}
 
-	fd_blitter_pipe_begin(ctx);
+	fd_blitter_pipe_begin(ctx, info.render_condition_enable);
 	util_blitter_blit(ctx->blitter, &info);
 	fd_blitter_pipe_end(ctx);
 }
 
 static void
-fd_blitter_pipe_begin(struct fd_context *ctx)
+fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond)
 {
 	util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
 	util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
@@ -796,6 +818,9 @@ fd_blitter_pipe_begin(struct fd_context *ctx)
 			(void **)ctx->fragtex.samplers);
 	util_blitter_save_fragment_sampler_views(ctx->blitter,
 			ctx->fragtex.num_textures, ctx->fragtex.textures);
+	if (!render_cond)
+		util_blitter_save_render_condition(ctx->blitter,
+			ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
 
 	fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_BLIT);
 }
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h
index 7549becaa1f..10f5242da57 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.h
+++ b/src/gallium/drivers/freedreno/freedreno_resource.h
@@ -135,4 +135,6 @@ fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer)
 void fd_resource_screen_init(struct pipe_screen *pscreen);
 void fd_resource_context_init(struct pipe_context *pctx);
 
+bool fd_render_condition_check(struct pipe_context *pctx);
+
 #endif /* FREEDRENO_RESOURCE_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 56d1834ef9c..1e124592a80 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -160,7 +160,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_SHADER_STENCIL_EXPORT:
 	case PIPE_CAP_TGSI_TEXCOORD:
 	case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
-	case PIPE_CAP_CONDITIONAL_RENDER:
 	case PIPE_CAP_TEXTURE_MULTISAMPLE:
 	case PIPE_CAP_TEXTURE_BARRIER:
 	case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
@@ -176,6 +175,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_INDEP_BLEND_FUNC:
 	case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
 	case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+	case PIPE_CAP_CONDITIONAL_RENDER:
+	case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
 		return is_a3xx(screen) || is_a4xx(screen);
 
 	case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
@@ -227,7 +228,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
 	case PIPE_CAP_DRAW_INDIRECT:
 	case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
-	case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
 	case PIPE_CAP_SAMPLER_VIEW_TARGET:
 	case PIPE_CAP_POLYGON_OFFSET_CLAMP:
 	case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:

From 9c409c8df3db09c2922a8f9a0376ee91b2a6837c Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 17 Sep 2015 01:43:36 -0400
Subject: [PATCH 081/335] freedreno/a3xx: fix texture buffers, enable offsets

The main issue is that the current logic looked into cso->u.tex, which
is the wrong side of the union to look into for texture buffers. While I
was at it, it was easy enough to add the logic to handle offsets
(first_element).

 - reduce texture buffer size limit (determined experimentally)
 - don't look at first/last levels, instead look at first/last element
 - include the first element offset
 - set offset alignment to 16 (determined experimentally)

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 16 +++++++++-----
 .../drivers/freedreno/a3xx/fd3_texture.c      | 21 +++++++++++++++----
 .../drivers/freedreno/freedreno_screen.c      |  9 ++++----
 3 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 25ea3e7a7b7..24afbc9e956 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -209,13 +209,19 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 					fd3_pipe_sampler_view(tex->textures[i]) :
 					&dummy_view;
 			struct fd_resource *rsc = fd_resource(view->base.texture);
-			unsigned start = fd_sampler_first_level(&view->base);
-			unsigned end   = fd_sampler_last_level(&view->base);;
+			if (rsc && rsc->base.b.target == PIPE_BUFFER) {
+				OUT_RELOC(ring, rsc->bo, view->base.u.buf.first_element *
+						  util_format_get_blocksize(view->base.format), 0, 0);
+				j = 1;
+			} else {
+				unsigned start = fd_sampler_first_level(&view->base);
+				unsigned end   = fd_sampler_last_level(&view->base);;
 
-			for (j = 0; j < (end - start + 1); j++) {
-				struct fd_resource_slice *slice =
+				for (j = 0; j < (end - start + 1); j++) {
+					struct fd_resource_slice *slice =
 						fd_resource_slice(rsc, j + start);
-				OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0);
+					OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0);
+				}
 			}
 
 			/* pad the remaining entries w/ null: */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index 15e63e7d478..99ae99ea0c1 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -211,8 +211,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 {
 	struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
 	struct fd_resource *rsc = fd_resource(prsc);
-	unsigned lvl = fd_sampler_first_level(cso);
-	unsigned miplevels = fd_sampler_last_level(cso) - lvl;
+	unsigned lvl;
 	uint32_t sz2 = 0;
 
 	if (!so)
@@ -227,17 +226,31 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 	so->texconst0 =
 			A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
 			A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) |
-			A3XX_TEX_CONST_0_MIPLVLS(miplevels) |
 			fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
 						cso->swizzle_b, cso->swizzle_a);
 
 	if (util_format_is_srgb(cso->format))
 		so->texconst0 |= A3XX_TEX_CONST_0_SRGB;
 
-	so->texconst1 =
+	if (prsc->target == PIPE_BUFFER) {
+		lvl = 0;
+		so->texconst1 =
+			A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
+			A3XX_TEX_CONST_1_WIDTH(cso->u.buf.last_element -
+								   cso->u.buf.first_element + 1) |
+			A3XX_TEX_CONST_1_HEIGHT(1);
+	} else {
+		unsigned miplevels;
+
+		lvl = fd_sampler_first_level(cso);
+		miplevels = fd_sampler_last_level(cso) - lvl;
+
+		so->texconst0 |= A3XX_TEX_CONST_0_MIPLVLS(miplevels);
+		so->texconst1 =
 			A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
 			A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
 			A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+	}
 	/* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
 	so->texconst2 =
 			A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 1e124592a80..8440e594308 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -180,16 +180,15 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 		return is_a3xx(screen) || is_a4xx(screen);
 
 	case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
-		/* ignoring first/last_element.. but I guess that should be
-		 * easy to add..
-		 */
-		return 0;
+		return is_a3xx(screen) ? 16 : 0;
 	case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
 		/* I think 32k on a4xx.. and we could possibly emulate more
 		 * by pretending 2d/rect textures and splitting high bits
 		 * of index into 2nd dimension..
 		 */
-		return 16383;
+		if (is_a3xx(screen)) return 8192;
+		if (is_a4xx(screen)) return 16383;
+		return 0;
 
 	case PIPE_CAP_DEPTH_CLIP_DISABLE:
 	case PIPE_CAP_CLIP_HALFZ:

From 4607b2b9b64e5486e636d300d89ee3712e2f7550 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 8 Nov 2015 00:28:34 -0500
Subject: [PATCH 082/335] freedreno: expose GLSL 140 and fake MSAA for
 GL3.0/3.1 support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/freedreno_screen.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 8440e594308..456917730d6 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -177,6 +177,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
 	case PIPE_CAP_CONDITIONAL_RENDER:
 	case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+	case PIPE_CAP_FAKE_SW_MSAA:
 		return is_a3xx(screen) || is_a4xx(screen);
 
 	case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
@@ -205,7 +206,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_GLSL_FEATURE_LEVEL:
 		if (glsl120)
 			return 120;
-		return is_ir3(screen) ? 130 : 120;
+		return is_ir3(screen) ? 140 : 120;
 
 	/* Unsupported features. */
 	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
@@ -220,7 +221,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
 	case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
 	case PIPE_CAP_TEXTURE_GATHER_SM5:
-	case PIPE_CAP_FAKE_SW_MSAA:
 	case PIPE_CAP_TEXTURE_QUERY_LOD:
 	case PIPE_CAP_SAMPLE_SHADING:
 	case PIPE_CAP_TEXTURE_GATHER_OFFSETS:

From fadd39442b2892821cdd4923ea511da881bfd02a Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Tue, 17 Nov 2015 11:42:34 -0500
Subject: [PATCH 083/335] freedreno: update generated headers

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/a2xx/a2xx.xml.h |  4 +--
 src/gallium/drivers/freedreno/a3xx/a3xx.xml.h | 29 +++++++++++++++++--
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h |  7 +++--
 .../drivers/freedreno/adreno_common.xml.h     |  4 +--
 .../drivers/freedreno/adreno_pm4.xml.h        |  4 +--
 5 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index ef235734755..77f708f449c 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  64038 bytes, from 2015-11-17 16:37:36)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2015-09-24 17:30:00)
 
 Copyright (C) 2013-2015 by the following authors:
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index 9f382baba97..2e3abfc1611 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  64038 bytes, from 2015-11-17 16:37:36)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2015-09-24 17:30:00)
 
 Copyright (C) 2013-2015 by the following authors:
@@ -115,6 +115,10 @@ enum a3xx_vtx_fmt {
 	VFMT_10_10_10_2_UNORM = 57,
 	VFMT_10_10_10_2_SINT = 58,
 	VFMT_10_10_10_2_SNORM = 59,
+	VFMT_2_10_10_10_UINT = 60,
+	VFMT_2_10_10_10_UNORM = 61,
+	VFMT_2_10_10_10_SINT = 62,
+	VFMT_2_10_10_10_SNORM = 63,
 };
 
 enum a3xx_tex_fmt {
@@ -138,10 +142,12 @@ enum a3xx_tex_fmt {
 	TFMT_DXT1 = 36,
 	TFMT_DXT3 = 37,
 	TFMT_DXT5 = 38,
+	TFMT_2_10_10_10_UNORM = 40,
 	TFMT_10_10_10_2_UNORM = 41,
 	TFMT_9_9_9_E5_FLOAT = 42,
 	TFMT_11_11_10_FLOAT = 43,
 	TFMT_A8_UNORM = 44,
+	TFMT_L8_UNORM = 45,
 	TFMT_L8_A8_UNORM = 47,
 	TFMT_8_UNORM = 48,
 	TFMT_8_8_UNORM = 49,
@@ -183,6 +189,8 @@ enum a3xx_tex_fmt {
 	TFMT_32_SINT = 92,
 	TFMT_32_32_SINT = 93,
 	TFMT_32_32_32_32_SINT = 95,
+	TFMT_2_10_10_10_UINT = 96,
+	TFMT_10_10_10_2_UINT = 97,
 	TFMT_ETC2_RG11_SNORM = 112,
 	TFMT_ETC2_RG11_UNORM = 113,
 	TFMT_ETC2_R11_SNORM = 114,
@@ -215,6 +223,9 @@ enum a3xx_color_fmt {
 	RB_R8_UINT = 14,
 	RB_R8_SINT = 15,
 	RB_R10G10B10A2_UNORM = 16,
+	RB_A2R10G10B10_UNORM = 17,
+	RB_R10G10B10A2_UINT = 18,
+	RB_A2R10G10B10_UINT = 19,
 	RB_A8_UNORM = 20,
 	RB_R8_UNORM = 21,
 	RB_R16_FLOAT = 24,
@@ -1620,12 +1631,24 @@ static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val)
 }
 
 #define REG_A3XX_VFD_CONTROL_1					0x00002241
-#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK			0x0000ffff
+#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK			0x0000000f
 #define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT			0
 static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val)
 {
 	return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK;
 }
+#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK			0x000000f0
+#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT			4
+static inline uint32_t A3XX_VFD_CONTROL_1_MAXTHRESHOLD(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK;
+}
+#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK			0x00000f00
+#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT			8
+static inline uint32_t A3XX_VFD_CONTROL_1_MINTHRESHOLD(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK;
+}
 #define A3XX_VFD_CONTROL_1_REGID4VTX__MASK			0x00ff0000
 #define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT			16
 static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 9f970365464..76f525cb0c2 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  64038 bytes, from 2015-11-17 16:37:36)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2015-09-24 17:30:00)
 
 Copyright (C) 2013-2015 by the following authors:
@@ -192,6 +192,9 @@ enum a4xx_tex_fmt {
 	TFMT4_32_32_32_32_FLOAT = 63,
 	TFMT4_9_9_9_E5_FLOAT = 32,
 	TFMT4_11_11_10_FLOAT = 37,
+	TFMT4_DXT1 = 86,
+	TFMT4_DXT3 = 87,
+	TFMT4_DXT5 = 88,
 	TFMT4_ATC_RGB = 100,
 	TFMT4_ATC_RGBA_EXPLICIT = 101,
 	TFMT4_ATC_RGBA_INTERPOLATED = 102,
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index ca3d2ac3fca..e45cab76368 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  64038 bytes, from 2015-11-17 16:37:36)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2015-09-24 17:30:00)
 
 Copyright (C) 2013-2015 by the following authors:
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index f095e3061b2..4aabc086607 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  64038 bytes, from 2015-11-17 16:37:36)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2015-09-24 17:30:00)
 
 Copyright (C) 2013-2015 by the following authors:

From 2379cc9fe03911133037616fd1d3576c5acc3a96 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Tue, 17 Nov 2015 11:42:53 -0500
Subject: [PATCH 084/335] freedreno/a4xx: add compressed texture formats

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 .../drivers/freedreno/a4xx/fd4_format.c       | 24 ++++++++++++++++++-
 .../drivers/freedreno/a4xx/fd4_texture.c      |  4 +++-
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index 847d4fb6d63..7ff3c8cb196 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -252,6 +252,28 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	VT(R32G32B32A32_FLOAT,   32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX),
 	_T(R32G32B32X32_FLOAT,   32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX),
 	V_(R32G32B32A32_FIXED,   32_32_32_32_FIXED, NONE,               WZYX),
+
+	/* compressed */
+	_T(ETC1_RGB8, ETC1, NONE, WZYX),
+	_T(ETC2_RGB8, ETC2_RGB8, NONE, WZYX),
+	_T(ETC2_SRGB8, ETC2_RGB8, NONE, WZYX),
+	_T(ETC2_RGB8A1, ETC2_RGB8A1, NONE, WZYX),
+	_T(ETC2_SRGB8A1, ETC2_RGB8A1, NONE, WZYX),
+	_T(ETC2_RGBA8, ETC2_RGBA8, NONE, WZYX),
+	_T(ETC2_SRGBA8, ETC2_RGBA8, NONE, WZYX),
+	_T(ETC2_R11_UNORM, ETC2_R11_UNORM, NONE, WZYX),
+	_T(ETC2_R11_SNORM, ETC2_R11_SNORM, NONE, WZYX),
+	_T(ETC2_RG11_UNORM, ETC2_RG11_UNORM, NONE, WZYX),
+	_T(ETC2_RG11_SNORM, ETC2_RG11_SNORM, NONE, WZYX),
+
+	_T(DXT1_RGB,   DXT1, NONE, WZYX),
+	_T(DXT1_SRGB,  DXT1, NONE, WZYX),
+	_T(DXT1_RGBA,  DXT1, NONE, WZYX),
+	_T(DXT1_SRGBA, DXT1, NONE, WZYX),
+	_T(DXT3_RGBA,  DXT3, NONE, WZYX),
+	_T(DXT3_SRGBA, DXT3, NONE, WZYX),
+	_T(DXT5_RGBA,  DXT5, NONE, WZYX),
+	_T(DXT5_SRGBA, DXT5, NONE, WZYX),
 };
 
 /* convert pipe format to vertex buffer format: */
@@ -295,7 +317,7 @@ fd4_pipe2fetchsize(enum pipe_format format)
 	if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
 		format = PIPE_FORMAT_Z32_FLOAT;
 
-	switch (util_format_get_blocksizebits(format)) {
+	switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) {
 	case 8:   return TFETCH4_1_BYTE;
 	case 16:  return TFETCH4_2_BYTE;
 	case 32:  return TFETCH4_4_BYTE;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index dbff5a738fd..a1e4536edd3 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -237,7 +237,9 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 		A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
 	so->texconst2 =
 		A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
-		A4XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp);
+		A4XX_TEX_CONST_2_PITCH(
+			util_format_get_nblocksx(
+				cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
 
 	switch (prsc->target) {
 	case PIPE_TEXTURE_1D_ARRAY:

From 4671c13852f3bb6341e4a6d4030948399e711e56 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Tue, 17 Nov 2015 12:35:09 -0500
Subject: [PATCH 085/335] freedreno/a4xx: add fake RGTC support (required for
 GL3)

The a4xx bits corresponding to 'freedreno/a3xx: add fake RGTC support
(required for GL3)'

TODO some more r/e.. maybe we get lucky and hw supports some of this
directly?  For now this will help us enable gl3.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 docs/relnotes/11.1.0.html                     |  2 +-
 .../drivers/freedreno/a4xx/fd4_format.c       | 20 +++++++++++++++++++
 .../drivers/freedreno/a4xx/fd4_format.h       |  1 +
 .../drivers/freedreno/a4xx/fd4_texture.c      |  2 +-
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 00d517706b5..f4563eab526 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -60,7 +60,7 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_ARB_vertex_type_2_10_10_10_rev on freedreno (a3xx)</li>
 <li>GL_EXT_buffer_storage implemented for when ES 3.1 support is gained</li>
 <li>GL_EXT_draw_elements_base_vertex on all drivers</li>
-<li>GL_EXT_texture_compression_rgtc / latc on freedreno (a3xx)</li>
+<li>GL_EXT_texture_compression_rgtc / latc on freedreno (a3xx & a4xx)</li>
 <li>GL_NV_conditional_render on freedreno</li>
 <li>GL_OES_draw_elements_base_vertex on all drivers</li>
 <li>EGL_KHR_create_context on softpipe, llvmpipe</li>
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index 7ff3c8cb196..087d04f3c74 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -274,6 +274,16 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	_T(DXT3_SRGBA, DXT3, NONE, WZYX),
 	_T(DXT5_RGBA,  DXT5, NONE, WZYX),
 	_T(DXT5_SRGBA, DXT5, NONE, WZYX),
+
+	/* faked */
+	_T(RGTC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+	_T(RGTC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
+	_T(RGTC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+	_T(RGTC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
+	_T(LATC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+	_T(LATC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
+	_T(LATC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+	_T(LATC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
 };
 
 /* convert pipe format to vertex buffer format: */
@@ -316,6 +326,8 @@ fd4_pipe2fetchsize(enum pipe_format format)
 {
 	if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
 		format = PIPE_FORMAT_Z32_FLOAT;
+	else if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
+		format = PIPE_FORMAT_R8G8B8A8_UNORM;
 
 	switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) {
 	case 8:   return TFETCH4_1_BYTE;
@@ -331,6 +343,14 @@ fd4_pipe2fetchsize(enum pipe_format format)
 	}
 }
 
+unsigned
+fd4_pipe2nblocksx(enum pipe_format format, unsigned width)
+{
+	if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
+		format = PIPE_FORMAT_R8G8B8A8_UNORM;
+	return util_format_get_nblocksx(format, width);
+}
+
 /* we need to special case a bit the depth/stencil restore, because we are
  * using the texture sampler to blit into the depth/stencil buffer, *not*
  * into a color buffer.  Otherwise fd4_tex_swiz() will do the wrong thing,
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.h b/src/gallium/drivers/freedreno/a4xx/fd4_format.h
index 04837da650b..8c365f081de 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.h
@@ -38,6 +38,7 @@ enum a4xx_tex_fmt fd4_pipe2tex(enum pipe_format format);
 enum a4xx_color_fmt fd4_pipe2color(enum pipe_format format);
 enum pipe_format fd4_gmem_restore_format(enum pipe_format format);
 enum a3xx_color_swap fd4_pipe2swap(enum pipe_format format);
+unsigned fd4_pipe2nblocksx(enum pipe_format format, unsigned width);
 enum a4xx_tex_fetchsize fd4_pipe2fetchsize(enum pipe_format format);
 enum a4xx_depth_format fd4_pipe2depth(enum pipe_format format);
 
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index a1e4536edd3..297854f6505 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -238,7 +238,7 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 	so->texconst2 =
 		A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
 		A4XX_TEX_CONST_2_PITCH(
-			util_format_get_nblocksx(
+			fd4_pipe2nblocksx(
 				cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
 
 	switch (prsc->target) {

From 624ec66653e2ce0abc6f4021111cf067b70741c1 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Fri, 6 Nov 2015 11:35:20 -0500
Subject: [PATCH 086/335] nir: remove nir_variable::max_ifc_array_access

No users.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/glsl/nir/glsl_to_nir.cpp |  9 ---------
 src/glsl/nir/nir.h           | 13 -------------
 2 files changed, 22 deletions(-)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index c4b53f38b6b..5e9d57205a3 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -293,15 +293,6 @@ nir_visitor::visit(ir_variable *ir)
    var->type = ir->type;
    var->name = ralloc_strdup(var, ir->name);
 
-   if (ir->is_interface_instance() && ir->get_max_ifc_array_access() != NULL) {
-      unsigned size = ir->get_interface_type()->length;
-      var->max_ifc_array_access = ralloc_array(var, unsigned, size);
-      memcpy(var->max_ifc_array_access, ir->get_max_ifc_array_access(),
-             size * sizeof(unsigned));
-   } else {
-      var->max_ifc_array_access = NULL;
-   }
-
    var->data.read_only = ir->data.read_only;
    var->data.centroid = ir->data.centroid;
    var->data.sample = ir->data.sample;
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 46add22da70..90f1e628fe0 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -147,19 +147,6 @@ typedef struct {
     */
    char *name;
 
-   /**
-    * For variables which satisfy the is_interface_instance() predicate, this
-    * points to an array of integers such that if the ith member of the
-    * interface block is an array, max_ifc_array_access[i] is the maximum
-    * array element of that member that has been accessed.  If the ith member
-    * of the interface block is not an array, max_ifc_array_access[i] is
-    * unused.
-    *
-    * For variables whose type is not an interface block, this pointer is
-    * NULL.
-    */
-   unsigned *max_ifc_array_access;
-
    struct nir_variable_data {
 
       /**

From d27ae2cf8cd548fe822ae9bcf11ead1dadfed744 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Fri, 6 Nov 2015 11:35:21 -0500
Subject: [PATCH 087/335] nir: add array length field

This will simplify things somewhat in clone.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/glsl/nir/glsl_to_nir.cpp | 5 +++++
 src/glsl/nir/nir.h           | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 5e9d57205a3..e149d73e051 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -240,6 +240,8 @@ constant_copy(ir_constant *ir, void *mem_ctx)
 
    unsigned total_elems = ir->type->components();
    unsigned i;
+
+   ret->num_elements = 0;
    switch (ir->type->base_type) {
    case GLSL_TYPE_UINT:
       for (i = 0; i < total_elems; i++)
@@ -264,6 +266,8 @@ constant_copy(ir_constant *ir, void *mem_ctx)
    case GLSL_TYPE_STRUCT:
       ret->elements = ralloc_array(mem_ctx, nir_constant *,
                                    ir->type->length);
+      ret->num_elements = ir->type->length;
+
       i = 0;
       foreach_in_list(ir_constant, field, &ir->components) {
          ret->elements[i] = constant_copy(field, mem_ctx);
@@ -274,6 +278,7 @@ constant_copy(ir_constant *ir, void *mem_ctx)
    case GLSL_TYPE_ARRAY:
       ret->elements = ralloc_array(mem_ctx, nir_constant *,
                                    ir->type->length);
+      ret->num_elements = ir->type->length;
 
       for (i = 0; i < ir->type->length; i++)
          ret->elements[i] = constant_copy(ir->array_elements[i], mem_ctx);
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 90f1e628fe0..3d65128e751 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -111,6 +111,11 @@ typedef struct nir_constant {
     */
    union nir_constant_data value;
 
+   /* we could get this from the var->type but makes clone *much* easier to
+    * not have to care about the type.
+    */
+   unsigned num_elements;
+
    /* Array elements / Structure Fields */
    struct nir_constant **elements;
 } nir_constant;

From 7bc097899924f40140981567c7bb52297dd801f2 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 3 Nov 2015 00:31:15 -0800
Subject: [PATCH 088/335] i965/nir: Add OPT() and OPT_V() macros for invoking
 NIR passes.

OPT() is the normal macro for passes that return booleans, while OPT_V()
is a variant that works for passes that don't properly report progress.
(Such passes should be fixed to return a boolean, eventually.)

These macros take care of calling nir_validate_shader() and setting
progress appropriately.  In the future, it would be easy to add shader
dumping similar to INTEL_DEBUG=optimizer by extending the macro.

v2 (Jason Ekstrand):
 - Fix an unused variable warning

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/mesa/drivers/dri/i965/brw_nir.c | 131 +++++++++++++---------------
 1 file changed, 59 insertions(+), 72 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index fe5cad4e435..b19f9691956 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -56,8 +56,9 @@ remap_vs_attrs(nir_block *block, void *closure)
 }
 
 static void
-brw_nir_lower_inputs(const struct brw_device_info *devinfo,
-                     nir_shader *nir, bool is_scalar)
+brw_nir_lower_inputs(nir_shader *nir,
+                     const struct brw_device_info *devinfo,
+                     bool is_scalar)
 {
    switch (nir->stage) {
    case MESA_SHADER_VERTEX:
@@ -170,46 +171,49 @@ brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
    }
 }
 
+#define _OPT(do_pass) (({     \
+   bool this_progress = true; \
+   do_pass                    \
+   nir_validate_shader(nir);  \
+   this_progress;             \
+}))
+
+#define OPT(pass, ...) _OPT(                 \
+   this_progress = pass(nir ,##__VA_ARGS__); \
+   progress = progress || this_progress;     \
+)
+
+#define OPT_V(pass, ...) _OPT( \
+   pass(nir, ##__VA_ARGS__);   \
+)
+
 static void
 nir_optimize(nir_shader *nir, bool is_scalar)
 {
    bool progress;
    do {
       progress = false;
-      nir_lower_vars_to_ssa(nir);
-      nir_validate_shader(nir);
+      OPT_V(nir_lower_vars_to_ssa);
 
       if (is_scalar) {
-         nir_lower_alu_to_scalar(nir);
-         nir_validate_shader(nir);
+         OPT_V(nir_lower_alu_to_scalar);
       }
 
-      progress |= nir_copy_prop(nir);
-      nir_validate_shader(nir);
+      OPT(nir_copy_prop);
 
       if (is_scalar) {
-         nir_lower_phis_to_scalar(nir);
-         nir_validate_shader(nir);
+         OPT_V(nir_lower_phis_to_scalar);
       }
 
-      progress |= nir_copy_prop(nir);
-      nir_validate_shader(nir);
-      progress |= nir_opt_dce(nir);
-      nir_validate_shader(nir);
-      progress |= nir_opt_cse(nir);
-      nir_validate_shader(nir);
-      progress |= nir_opt_peephole_select(nir);
-      nir_validate_shader(nir);
-      progress |= nir_opt_algebraic(nir);
-      nir_validate_shader(nir);
-      progress |= nir_opt_constant_folding(nir);
-      nir_validate_shader(nir);
-      progress |= nir_opt_dead_cf(nir);
-      nir_validate_shader(nir);
-      progress |= nir_opt_remove_phis(nir);
-      nir_validate_shader(nir);
-      progress |= nir_opt_undef(nir);
-      nir_validate_shader(nir);
+      OPT(nir_copy_prop);
+      OPT(nir_opt_dce);
+      OPT(nir_opt_cse);
+      OPT(nir_opt_peephole_select);
+      OPT(nir_opt_algebraic);
+      OPT(nir_opt_constant_folding);
+      OPT(nir_opt_dead_cf);
+      OPT(nir_opt_remove_phis);
+      OPT(nir_opt_undef);
    } while (progress);
 }
 
@@ -228,6 +232,7 @@ brw_create_nir(struct brw_context *brw,
       .lower_txp = ~0,
    };
    bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
+   bool progress = false;
    nir_shader *nir;
 
    /* First, lower the GLSL IR or Mesa IR to NIR */
@@ -235,80 +240,63 @@ brw_create_nir(struct brw_context *brw,
       nir = glsl_to_nir(shader_prog, stage, options);
    } else {
       nir = prog_to_nir(prog, options);
-      nir_convert_to_ssa(nir); /* turn registers into SSA */
+      OPT_V(nir_convert_to_ssa); /* turn registers into SSA */
    }
    nir_validate_shader(nir);
 
    if (stage == MESA_SHADER_GEOMETRY) {
-      nir_lower_gs_intrinsics(nir);
-      nir_validate_shader(nir);
+      OPT(nir_lower_gs_intrinsics);
    }
 
-   nir_lower_global_vars_to_local(nir);
-   nir_validate_shader(nir);
+   OPT(nir_lower_global_vars_to_local);
 
-   nir_lower_tex(nir, &tex_options);
-   nir_validate_shader(nir);
+   OPT_V(nir_lower_tex, &tex_options);
 
-   nir_normalize_cubemap_coords(nir);
-   nir_validate_shader(nir);
+   OPT(nir_normalize_cubemap_coords);
 
-   nir_split_var_copies(nir);
-   nir_validate_shader(nir);
+   OPT(nir_split_var_copies);
 
    nir_optimize(nir, is_scalar);
 
    /* Lower a bunch of stuff */
-   nir_lower_var_copies(nir);
-   nir_validate_shader(nir);
+   OPT_V(nir_lower_var_copies);
 
    /* Get rid of split copies */
    nir_optimize(nir, is_scalar);
 
-   brw_nir_lower_inputs(devinfo, nir, is_scalar);
-   brw_nir_lower_outputs(nir, is_scalar);
+   OPT_V(brw_nir_lower_inputs, devinfo, is_scalar);
+   OPT_V(brw_nir_lower_outputs, is_scalar);
    nir_assign_var_locations(&nir->uniforms,
                             &nir->num_uniforms,
                             is_scalar ? type_size_scalar : type_size_vec4);
-   nir_lower_io(nir, -1, is_scalar ? type_size_scalar : type_size_vec4);
-   nir_validate_shader(nir);
+   OPT_V(nir_lower_io, -1, is_scalar ? type_size_scalar : type_size_vec4);
 
-   nir_remove_dead_variables(nir);
-   nir_validate_shader(nir);
+   OPT(nir_remove_dead_variables);
 
    if (shader_prog) {
-      nir_lower_samplers(nir, shader_prog);
-      nir_validate_shader(nir);
+      OPT_V(nir_lower_samplers, shader_prog);
    }
 
-   nir_lower_system_values(nir);
-   nir_validate_shader(nir);
+   OPT(nir_lower_system_values);
 
    if (shader_prog) {
-      nir_lower_atomics(nir, shader_prog);
-      nir_validate_shader(nir);
+      OPT_V(nir_lower_atomics, shader_prog);
    }
 
    nir_optimize(nir, is_scalar);
 
    if (brw->gen >= 6) {
       /* Try and fuse multiply-adds */
-      brw_nir_opt_peephole_ffma(nir);
-      nir_validate_shader(nir);
+      OPT(brw_nir_opt_peephole_ffma);
    }
 
-   nir_opt_algebraic_late(nir);
-   nir_validate_shader(nir);
+   OPT(nir_opt_algebraic_late);
 
-   nir_lower_locals_to_regs(nir);
-   nir_validate_shader(nir);
+   OPT(nir_lower_locals_to_regs);
 
-   nir_lower_to_source_mods(nir);
-   nir_validate_shader(nir);
-   nir_copy_prop(nir);
-   nir_validate_shader(nir);
-   nir_opt_dce(nir);
-   nir_validate_shader(nir);
+   OPT_V(nir_lower_to_source_mods);
+   OPT(nir_copy_prop);
+   OPT(nir_opt_dce);
 
    if (unlikely(debug_enabled)) {
       /* Re-index SSA defs so we print more sensible numbers. */
@@ -322,17 +310,16 @@ brw_create_nir(struct brw_context *brw,
       nir_print_shader(nir, stderr);
    }
 
-   nir_convert_from_ssa(nir, true);
-   nir_validate_shader(nir);
+   OPT_V(nir_convert_from_ssa, true);
 
    if (!is_scalar) {
-      nir_move_vec_src_uses_to_dest(nir);
-      nir_validate_shader(nir);
-
-      nir_lower_vec_to_movs(nir);
-      nir_validate_shader(nir);
+      OPT_V(nir_move_vec_src_uses_to_dest);
+      OPT(nir_lower_vec_to_movs);
    }
 
+   /* Needed only so that OPT and OPT_V can set it */
+   (void)progress;
+
    /* This is the last pass we run before we start emitting stuff.  It
     * determines when we need to insert boolean resolves on Gen <= 5.  We
     * run it last because it stashes data in instr->pass_flags and we don't

From 9ff71b649b4b3808a9e17ce69743c6037fd6603c Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 3 Nov 2015 00:31:22 -0800
Subject: [PATCH 089/335] i965/nir: Validate that NIR passes call
 nir_metadata_preserve().

Failing to call nir_metadata_preserve() can have nasty consequences:
some pass breaks dominance information, but leaves it marked as valid,
causing some subsequent pass to go haywire and probably crash.

This pass adds a simple validation mechanism to ensure passes handle
this properly.  We add a new bogus metadata flag that isn't used for
anything in particular, set it before each pass, and ensure it *isn't*
still set after the pass.  nir_metadata_preserve will reset the flag,
so correct passes will work, and bad passes will assert fail.

(I would have made these functions static inline, but nir.h is included
in C++, so we can't bit-or enums without lots of casting...)

Thanks to Dylan Baker for the idea.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/glsl/nir/nir.h                  |  5 ++++
 src/glsl/nir/nir_metadata.c         | 36 +++++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_nir.c | 10 +++++---
 3 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 3d65128e751..7eccebe76c6 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1312,6 +1312,7 @@ typedef enum {
    nir_metadata_block_index = 0x1,
    nir_metadata_dominance = 0x2,
    nir_metadata_live_ssa_defs = 0x4,
+   nir_metadata_not_properly_reset = 0x8,
 } nir_metadata;
 
 typedef struct {
@@ -1891,8 +1892,12 @@ void nir_print_instr(const nir_instr *instr, FILE *fp);
 
 #ifdef DEBUG
 void nir_validate_shader(nir_shader *shader);
+void nir_metadata_set_validation_flag(nir_shader *shader);
+void nir_metadata_check_validation_flag(nir_shader *shader);
 #else
 static inline void nir_validate_shader(nir_shader *shader) { (void) shader; }
+static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; }
+static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
 #endif /* DEBUG */
 
 void nir_calc_dominance_impl(nir_function_impl *impl);
diff --git a/src/glsl/nir/nir_metadata.c b/src/glsl/nir/nir_metadata.c
index 6de981f430f..d5324b35a78 100644
--- a/src/glsl/nir/nir_metadata.c
+++ b/src/glsl/nir/nir_metadata.c
@@ -52,3 +52,39 @@ nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved)
 {
    impl->valid_metadata &= preserved;
 }
+
+#ifdef DEBUG
+/**
+ * Make sure passes properly invalidate metadata (part 1).
+ *
+ * Call this before running a pass to set a bogus metadata flag, which will
+ * only be preserved if the pass forgets to call nir_metadata_preserve().
+ */
+void
+nir_metadata_set_validation_flag(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl) {
+         overload->impl->valid_metadata |= nir_metadata_not_properly_reset;
+      }
+   }
+}
+
+/**
+ * Make sure passes properly invalidate metadata (part 2).
+ *
+ * Call this after a pass makes progress to verify that the bogus metadata set by
+ * the earlier function was properly thrown away.  Note that passes may not call
+ * nir_metadata_preserve() if they don't actually make any changes at all.
+ */
+void
+nir_metadata_check_validation_flag(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl) {
+         assert(!(overload->impl->valid_metadata &
+                  nir_metadata_not_properly_reset));
+      }
+   }
+}
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index b19f9691956..7826729db85 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -178,9 +178,13 @@ brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
    this_progress;             \
 }))
 
-#define OPT(pass, ...) _OPT(                 \
-   this_progress = pass(nir ,##__VA_ARGS__); \
-   progress = progress || this_progress;     \
+#define OPT(pass, ...) _OPT(                   \
+   nir_metadata_set_validation_flag(nir);      \
+   this_progress = pass(nir ,##__VA_ARGS__);   \
+   if (this_progress) {                        \
+      progress = true;                         \
+      nir_metadata_check_validation_flag(nir); \
+   }                                           \
 )
 
 #define OPT_V(pass, ...) _OPT( \

From 9fbd390dd4b60746c2ce60fb20c61c45c0e8a022 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 11 Nov 2015 08:31:29 -0800
Subject: [PATCH 090/335] nir: Add support for cloning shaders

This commit is heavily based on one by Rob Clark <robdclark@gmail.com> but
reworked to re-use nir_create functions and do less hashing.

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Rob Clark <robclark@freedesktop.org>
---
 src/glsl/Makefile.sources |   1 +
 src/glsl/nir/nir.c        |   8 +
 src/glsl/nir/nir.h        |   2 +
 src/glsl/nir/nir_clone.c  | 671 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 682 insertions(+)
 create mode 100644 src/glsl/nir/nir_clone.c

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index d4b02c17b0d..d9db5f61e04 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -26,6 +26,7 @@ NIR_FILES = \
 	nir/nir.h \
 	nir/nir_array.h \
 	nir/nir_builder.h \
+	nir/nir_clone.c \
 	nir/nir_constant_expressions.h \
 	nir/nir_control_flow.c \
 	nir/nir_control_flow.h \
diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index bb7a5fa5835..93c18fbaea5 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -312,6 +312,14 @@ nir_block_create(nir_shader *shader)
    block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
                                           _mesa_key_pointer_equal);
    block->imm_dom = NULL;
+   /* XXX maybe it would be worth it to defer allocation?  This
+    * way it doesn't get allocated for shader ref's that never run
+    * nir_calc_dominance?  For example, state-tracker creates an
+    * initial IR, clones that, runs appropriate lowering pass, passes
+    * to driver which does common lowering/opt, and then stores ref
+    * which is later used to do state specific lowering and futher
+    * opt.  Do any of the references not need dominance metadata?
+    */
    block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
                                           _mesa_key_pointer_equal);
 
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 7eccebe76c6..e9d722eed7e 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1890,6 +1890,8 @@ void nir_index_blocks(nir_function_impl *impl);
 void nir_print_shader(nir_shader *shader, FILE *fp);
 void nir_print_instr(const nir_instr *instr, FILE *fp);
 
+nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s);
+
 #ifdef DEBUG
 void nir_validate_shader(nir_shader *shader);
 void nir_metadata_set_validation_flag(nir_shader *shader);
diff --git a/src/glsl/nir/nir_clone.c b/src/glsl/nir/nir_clone.c
new file mode 100644
index 00000000000..33ff5261b21
--- /dev/null
+++ b/src/glsl/nir/nir_clone.c
@@ -0,0 +1,671 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_control_flow_private.h"
+
+/* Secret Decoder Ring:
+ *   clone_foo():
+ *        Allocate and clone a foo.
+ *   __clone_foo():
+ *        Clone body of foo (ie. parent class, embedded struct, etc)
+ */
+
+typedef struct {
+   /* maps orig ptr -> cloned ptr: */
+   struct hash_table *ptr_table;
+
+   /* List of phi sources. */
+   struct list_head phi_srcs;
+
+   /* new shader object, used as memctx for just about everything else: */
+   nir_shader *ns;
+} clone_state;
+
+static void
+init_clone_state(clone_state *state)
+{
+   state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                              _mesa_key_pointer_equal);
+   list_inithead(&state->phi_srcs);
+}
+
+static void
+free_clone_state(clone_state *state)
+{
+   _mesa_hash_table_destroy(state->ptr_table, NULL);
+}
+
+static void *
+lookup_ptr(clone_state *state, const void *ptr)
+{
+   struct hash_entry *entry;
+
+   if (!ptr)
+      return NULL;
+
+   entry = _mesa_hash_table_search(state->ptr_table, ptr);
+   assert(entry && "Failed to find pointer!");
+   if (!entry)
+      return NULL;
+
+   return entry->data;
+}
+
+static void
+store_ptr(clone_state *state, void *nptr, const void *ptr)
+{
+   _mesa_hash_table_insert(state->ptr_table, ptr, nptr);
+}
+
+static nir_constant *
+clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar)
+{
+   nir_constant *nc = ralloc(nvar, nir_constant);
+
+   nc->value = c->value;
+   nc->num_elements = c->num_elements;
+   nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
+   for (unsigned i = 0; i < c->num_elements; i++) {
+      nc->elements[i] = clone_constant(state, c->elements[i], nvar);
+   }
+
+   return nc;
+}
+
+/* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid
+ * having to deal with locals and globals separately:
+ */
+static nir_variable *
+clone_variable(clone_state *state, const nir_variable *var)
+{
+   nir_variable *nvar = rzalloc(state->ns, nir_variable);
+   store_ptr(state, nvar, var);
+
+   nvar->type = var->type;
+   nvar->name = ralloc_strdup(nvar, var->name);
+   nvar->data = var->data;
+   nvar->num_state_slots = var->num_state_slots;
+   nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots);
+   memcpy(nvar->state_slots, var->state_slots,
+          var->num_state_slots * sizeof(nir_state_slot));
+   if (var->constant_initializer) {
+      nvar->constant_initializer =
+         clone_constant(state, var->constant_initializer, nvar);
+   }
+   nvar->interface_type = var->interface_type;
+
+   return nvar;
+}
+
+/* clone list of nir_variable: */
+static void
+clone_var_list(clone_state *state, struct exec_list *dst,
+               const struct exec_list *list)
+{
+   exec_list_make_empty(dst);
+   foreach_list_typed(nir_variable, var, node, list) {
+      nir_variable *nvar = clone_variable(state, var);
+      exec_list_push_tail(dst, &nvar->node);
+   }
+}
+
+/* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create()
+ * to avoid having to deal with locals and globals separately:
+ */
+static nir_register *
+clone_register(clone_state *state, const nir_register *reg)
+{
+   nir_register *nreg = rzalloc(state->ns, nir_register);
+   store_ptr(state, nreg, reg);
+
+   nreg->num_components = reg->num_components;
+   nreg->num_array_elems = reg->num_array_elems;
+   nreg->index = reg->index;
+   nreg->name = ralloc_strdup(nreg, reg->name);
+   nreg->is_global = reg->is_global;
+   nreg->is_packed = reg->is_packed;
+
+   /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */
+   list_inithead(&nreg->uses);
+   list_inithead(&nreg->defs);
+   list_inithead(&nreg->if_uses);
+
+   return nreg;
+}
+
+/* clone list of nir_register: */
+static void
+clone_reg_list(clone_state *state, struct exec_list *dst,
+               const struct exec_list *list)
+{
+   exec_list_make_empty(dst);
+   foreach_list_typed(nir_register, reg, node, list) {
+      nir_register *nreg = clone_register(state, reg);
+      exec_list_push_tail(dst, &nreg->node);
+   }
+}
+
+static void
+__clone_src(clone_state *state, void *ninstr_or_if,
+            nir_src *nsrc, const nir_src *src)
+{
+   nsrc->is_ssa = src->is_ssa;
+   if (src->is_ssa) {
+      nsrc->ssa = lookup_ptr(state, src->ssa);
+   } else {
+      nsrc->reg.reg = lookup_ptr(state, src->reg.reg);
+      if (src->reg.indirect) {
+         nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src);
+         __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect);
+      }
+      nsrc->reg.base_offset = src->reg.base_offset;
+   }
+}
+
+static void
+__clone_dst(clone_state *state, nir_instr *ninstr,
+            nir_dest *ndst, const nir_dest *dst)
+{
+   ndst->is_ssa = dst->is_ssa;
+   if (dst->is_ssa) {
+      nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name);
+      store_ptr(state, &ndst->ssa, &dst->ssa);
+   } else {
+      ndst->reg.reg = lookup_ptr(state, dst->reg.reg);
+      if (dst->reg.indirect) {
+         ndst->reg.indirect = ralloc(ninstr, nir_src);
+         __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect);
+      }
+      ndst->reg.base_offset = dst->reg.base_offset;
+   }
+}
+
+static nir_deref *clone_deref(clone_state *state, const nir_deref *deref,
+                              nir_instr *ninstr, nir_deref *parent);
+
+static nir_deref_var *
+clone_deref_var(clone_state *state, const nir_deref_var *dvar,
+                nir_instr *ninstr)
+{
+   nir_variable *nvar = lookup_ptr(state, dvar->var);
+   nir_deref_var *ndvar = nir_deref_var_create(ninstr, nvar);
+
+   if (dvar->deref.child)
+      ndvar->deref.child = clone_deref(state, dvar->deref.child,
+                                       ninstr, &ndvar->deref);
+
+   return ndvar;
+}
+
+static nir_deref_array *
+clone_deref_array(clone_state *state, const nir_deref_array *darr,
+                  nir_instr *ninstr, nir_deref *parent)
+{
+   nir_deref_array *ndarr = nir_deref_array_create(parent);
+
+   ndarr->deref.type = darr->deref.type;
+   if (darr->deref.child)
+      ndarr->deref.child = clone_deref(state, darr->deref.child,
+                                       ninstr, &ndarr->deref);
+
+   ndarr->deref_array_type = darr->deref_array_type;
+   ndarr->base_offset = darr->base_offset;
+   if (ndarr->deref_array_type == nir_deref_array_type_indirect)
+      __clone_src(state, ninstr, &ndarr->indirect, &darr->indirect);
+
+   return ndarr;
+}
+
+static nir_deref_struct *
+clone_deref_struct(clone_state *state, const nir_deref_struct *dstr,
+                   nir_instr *ninstr, nir_deref *parent)
+{
+   nir_deref_struct *ndstr = nir_deref_struct_create(parent, dstr->index);
+
+   ndstr->deref.type = dstr->deref.type;
+   if (dstr->deref.child)
+      ndstr->deref.child = clone_deref(state, dstr->deref.child,
+                                       ninstr, &ndstr->deref);
+
+   return ndstr;
+}
+
+static nir_deref *
+clone_deref(clone_state *state, const nir_deref *dref,
+            nir_instr *ninstr, nir_deref *parent)
+{
+   switch (dref->deref_type) {
+   case nir_deref_type_array:
+      return &clone_deref_array(state, nir_deref_as_array(dref),
+                                ninstr, parent)->deref;
+   case nir_deref_type_struct:
+      return &clone_deref_struct(state, nir_deref_as_struct(dref),
+                                 ninstr, parent)->deref;
+   default:
+      unreachable("bad deref type");
+      return NULL;
+   }
+}
+
+static nir_alu_instr *
+clone_alu(clone_state *state, const nir_alu_instr *alu)
+{
+   nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
+
+   __clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest);
+   nalu->dest.saturate = alu->dest.saturate;
+   nalu->dest.write_mask = alu->dest.write_mask;
+
+   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+      __clone_src(state, &nalu->instr, &nalu->src[i].src, &alu->src[i].src);
+      nalu->src[i].negate = alu->src[i].negate;
+      nalu->src[i].abs = alu->src[i].abs;
+      memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
+             sizeof(nalu->src[i].swizzle));
+   }
+
+   return nalu;
+}
+
+static nir_intrinsic_instr *
+clone_intrinsic(clone_state *state, const nir_intrinsic_instr *itr)
+{
+   nir_intrinsic_instr *nitr =
+      nir_intrinsic_instr_create(state->ns, itr->intrinsic);
+
+   unsigned num_variables = nir_intrinsic_infos[itr->intrinsic].num_variables;
+   unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs;
+
+   if (nir_intrinsic_infos[itr->intrinsic].has_dest)
+      __clone_dst(state, &nitr->instr, &nitr->dest, &itr->dest);
+
+   nitr->num_components = itr->num_components;
+   memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index));
+
+   for (unsigned i = 0; i < num_variables; i++) {
+      nitr->variables[i] = clone_deref_var(state, itr->variables[i],
+                                           &nitr->instr);
+   }
+
+   for (unsigned i = 0; i < num_srcs; i++)
+      __clone_src(state, &nitr->instr, &nitr->src[i], &itr->src[i]);
+
+   return nitr;
+}
+
+static nir_load_const_instr *
+clone_load_const(clone_state *state, const nir_load_const_instr *lc)
+{
+   nir_load_const_instr *nlc =
+      nir_load_const_instr_create(state->ns, lc->def.num_components);
+
+   memcpy(&nlc->value, &lc->value, sizeof(nlc->value));
+
+   store_ptr(state, &nlc->def, &lc->def);
+
+   return nlc;
+}
+
+static nir_ssa_undef_instr *
+clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa)
+{
+   nir_ssa_undef_instr *nsa =
+      nir_ssa_undef_instr_create(state->ns, sa->def.num_components);
+
+   store_ptr(state, &nsa->def, &sa->def);
+
+   return nsa;
+}
+
+static nir_tex_instr *
+clone_tex(clone_state *state, const nir_tex_instr *tex)
+{
+   nir_tex_instr *ntex = nir_tex_instr_create(state->ns, tex->num_srcs);
+
+   ntex->sampler_dim = tex->sampler_dim;
+   ntex->dest_type = tex->dest_type;
+   ntex->op = tex->op;
+   __clone_dst(state, &ntex->instr, &ntex->dest, &tex->dest);
+   for (unsigned i = 0; i < ntex->num_srcs; i++) {
+      ntex->src[i].src_type = tex->src[i].src_type;
+      __clone_src(state, &ntex->instr, &ntex->src[i].src, &tex->src[i].src);
+   }
+   ntex->coord_components = tex->coord_components;
+   ntex->is_array = tex->is_array;
+   ntex->is_shadow = tex->is_shadow;
+   ntex->is_new_style_shadow = tex->is_new_style_shadow;
+   memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset));
+   ntex->component = tex->component;
+   ntex->sampler_index = tex->sampler_index;
+   ntex->sampler_array_size = tex->sampler_array_size;
+   if (tex->sampler)
+      ntex->sampler = clone_deref_var(state, tex->sampler, &ntex->instr);
+
+   return ntex;
+}
+
+static nir_phi_instr *
+clone_phi(clone_state *state, const nir_phi_instr *phi, nir_block *nblk)
+{
+   nir_phi_instr *nphi = nir_phi_instr_create(state->ns);
+
+   __clone_dst(state, &nphi->instr, &nphi->dest, &phi->dest);
+
+   /* Cloning a phi node is a bit different from other instructions.  The
+    * sources of phi instructions are the only time where we can use an SSA
+    * def before it is defined.  In order to handle this, we just copy over
+    * the sources from the old phi instruction directly and then fix them up
+    * in a second pass once all the instrutions in the function have been
+    * properly cloned.
+    *
+    * In order to ensure that the copied sources (which are the same as the
+    * old phi instruction's sources for now) don't get inserted into the old
+    * shader's use-def lists, we have to add the phi instruction *before* we
+    * set up its sources.
+    */
+   nir_instr_insert_after_block(nblk, &nphi->instr);
+
+   foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
+      nir_phi_src *nsrc = ralloc(nphi, nir_phi_src);
+
+      /* Just copy the old source for now. */
+      memcpy(nsrc, src, sizeof(*src));
+
+      /* Since we're not letting nir_insert_instr handle use/def stuff for us,
+       * we have to set the parent_instr manually.  It doesn't really matter
+       * when we do it, so we might as well do it here.
+       */
+      nsrc->src.parent_instr = &nphi->instr;
+
+      /* Stash it in the list of phi sources.  We'll walk this list and fix up
+       * sources at the very end of clone_function_impl.
+       */
+      list_add(&nsrc->src.use_link, &state->phi_srcs);
+
+      exec_list_push_tail(&nphi->srcs, &nsrc->node);
+   }
+
+   return nphi;
+}
+
+static nir_jump_instr *
+clone_jump(clone_state *state, const nir_jump_instr *jmp)
+{
+   nir_jump_instr *njmp = nir_jump_instr_create(state->ns, jmp->type);
+
+   return njmp;
+}
+
+static nir_call_instr *
+clone_call(clone_state *state, const nir_call_instr *call)
+{
+   nir_function_overload *ncallee = lookup_ptr(state, call->callee);
+   nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee);
+
+   for (unsigned i = 0; i < ncall->num_params; i++)
+      ncall->params[i] = clone_deref_var(state, call->params[i], &ncall->instr);
+
+   ncall->return_deref = clone_deref_var(state, call->return_deref,
+                                         &ncall->instr);
+
+   return ncall;
+}
+
+static nir_instr *
+clone_instr(clone_state *state, const nir_instr *instr)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu:
+      return &clone_alu(state, nir_instr_as_alu(instr))->instr;
+   case nir_instr_type_intrinsic:
+      return &clone_intrinsic(state, nir_instr_as_intrinsic(instr))->instr;
+   case nir_instr_type_load_const:
+      return &clone_load_const(state, nir_instr_as_load_const(instr))->instr;
+   case nir_instr_type_ssa_undef:
+      return &clone_ssa_undef(state, nir_instr_as_ssa_undef(instr))->instr;
+   case nir_instr_type_tex:
+      return &clone_tex(state, nir_instr_as_tex(instr))->instr;
+   case nir_instr_type_phi:
+      unreachable("Cannot clone phis with clone_instr");
+   case nir_instr_type_jump:
+      return &clone_jump(state, nir_instr_as_jump(instr))->instr;
+   case nir_instr_type_call:
+      return &clone_call(state, nir_instr_as_call(instr))->instr;
+   case nir_instr_type_parallel_copy:
+      unreachable("Cannot clone parallel copies");
+   default:
+      unreachable("bad instr type");
+      return NULL;
+   }
+}
+
+static nir_block *
+clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk)
+{
+   /* Don't actually create a new block.  Just use the one from the tail of
+    * the list.  NIR guarantees that the tail of the list is a block and that
+    * no two blocks are side-by-side in the IR;  It should be empty.
+    */
+   nir_block *nblk =
+      exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
+   assert(nblk->cf_node.type == nir_cf_node_block);
+   assert(exec_list_is_empty(&nblk->instr_list));
+
+   /* We need this for phi sources */
+   store_ptr(state, nblk, blk);
+
+   nir_foreach_instr(blk, instr) {
+      if (instr->type == nir_instr_type_phi) {
+         /* Phi instructions are a bit of a special case when cloning because
+          * we don't want inserting the instruction to automatically handle
+          * use/defs for us.  Instead, we need to wait until all the
+          * blocks/instructions are in so that we can set their sources up.
+          */
+         clone_phi(state, nir_instr_as_phi(instr), nblk);
+      } else {
+         nir_instr *ninstr = clone_instr(state, instr);
+         nir_instr_insert_after_block(nblk, ninstr);
+      }
+   }
+
+   return nblk;
+}
+
+static void
+clone_cf_list(clone_state *state, struct exec_list *dst,
+              const struct exec_list *list);
+
+static nir_if *
+clone_if(clone_state *state, struct exec_list *cf_list, const nir_if *i)
+{
+   nir_if *ni = nir_if_create(state->ns);
+
+   __clone_src(state, ni, &ni->condition, &i->condition);
+
+   nir_cf_node_insert_end(cf_list, &ni->cf_node);
+
+   clone_cf_list(state, &ni->then_list, &i->then_list);
+   clone_cf_list(state, &ni->else_list, &i->else_list);
+
+   return ni;
+}
+
+static nir_loop *
+clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop)
+{
+   nir_loop *nloop = nir_loop_create(state->ns);
+
+   nir_cf_node_insert_end(cf_list, &nloop->cf_node);
+
+   clone_cf_list(state, &nloop->body, &loop->body);
+
+   return nloop;
+}
+
+/* clone list of nir_cf_node: */
+static void
+clone_cf_list(clone_state *state, struct exec_list *dst,
+              const struct exec_list *list)
+{
+   foreach_list_typed(nir_cf_node, cf, node, list) {
+      switch (cf->type) {
+      case nir_cf_node_block:
+         clone_block(state, dst, nir_cf_node_as_block(cf));
+         break;
+      case nir_cf_node_if:
+         clone_if(state, dst, nir_cf_node_as_if(cf));
+         break;
+      case nir_cf_node_loop:
+         clone_loop(state, dst, nir_cf_node_as_loop(cf));
+         break;
+      default:
+         unreachable("bad cf type");
+      }
+   }
+}
+
+static nir_function_impl *
+clone_function_impl(clone_state *state, const nir_function_impl *fi,
+                    nir_function_overload *nfo)
+{
+   nir_function_impl *nfi = nir_function_impl_create(nfo);
+
+   clone_var_list(state, &nfi->locals, &fi->locals);
+   clone_reg_list(state, &nfi->registers, &fi->registers);
+   nfi->reg_alloc = fi->reg_alloc;
+
+   nfi->num_params = fi->num_params;
+   nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params);
+   for (unsigned i = 0; i < fi->num_params; i++) {
+      nfi->params[i] = lookup_ptr(state, fi->params[i]);
+   }
+   nfi->return_var = lookup_ptr(state, fi->return_var);
+
+   assert(list_empty(&state->phi_srcs));
+
+   clone_cf_list(state, &nfi->body, &fi->body);
+
+   /* After we've cloned almost everything, we have to walk the list of phi
+    * sources and fix them up.  Thanks to loops, the block and SSA value for a
+    * phi source may not be defined when we first encounter it.  Instead, we
+    * add it to the phi_srcs list and we fix it up here.
+    */
+   list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) {
+      src->pred = lookup_ptr(state, src->pred);
+      assert(src->src.is_ssa);
+      src->src.ssa = lookup_ptr(state, src->src.ssa);
+
+      /* Remove from this list and place in the uses of the SSA def */
+      list_del(&src->src.use_link);
+      list_addtail(&src->src.use_link, &src->src.ssa->uses);
+   }
+   assert(list_empty(&state->phi_srcs));
+
+   /* All metadata is invalidated in the cloning process */
+   nfi->valid_metadata = 0;
+
+   return nfi;
+}
+
+static nir_function_overload *
+clone_function_overload(clone_state *state, const nir_function_overload *fo,
+                        nir_function *nfxn)
+{
+   nir_function_overload *nfo = nir_function_overload_create(nfxn);
+
+   /* Needed for call instructions */
+   store_ptr(state, nfo, fo);
+
+   nfo->num_params = fo->num_params;
+   nfo->params = ralloc_array(state->ns, nir_parameter, fo->num_params);
+   memcpy(nfo->params, fo->params, sizeof(nir_parameter) * fo->num_params);
+
+   nfo->return_type = fo->return_type;
+
+   /* At first glance, it looks like we should clone the function_impl here.
+    * However, call instructions need to be able to reference at least the
+    * overload and those will get processed as we clone the function_impl's.
+    * We stop here and do function_impls as a second pass.
+    */
+
+   return nfo;
+}
+
+static nir_function *
+clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns)
+{
+   assert(ns == state->ns);
+   nir_function *nfxn = nir_function_create(ns, fxn->name);
+
+   foreach_list_typed(nir_function_overload, fo, node, &fxn->overload_list)
+      clone_function_overload(state, fo, nfxn);
+
+   return nfxn;
+}
+
+nir_shader *
+nir_shader_clone(void *mem_ctx, const nir_shader *s)
+{
+   clone_state state;
+   init_clone_state(&state);
+
+   nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options);
+   state.ns = ns;
+
+   clone_var_list(&state, &ns->uniforms, &s->uniforms);
+   clone_var_list(&state, &ns->inputs,   &s->inputs);
+   clone_var_list(&state, &ns->outputs,  &s->outputs);
+   clone_var_list(&state, &ns->globals,  &s->globals);
+   clone_var_list(&state, &ns->system_values, &s->system_values);
+
+   /* Go through and clone functions and overloads */
+   foreach_list_typed(nir_function, fxn, node, &s->functions)
+      clone_function(&state, fxn, ns);
+
+   /* Only after all overloads are cloned can we clone the actual function
+    * implementations.  This is because nir_call_instr's need to reference the
+    * overloads of other functions and we don't know what order the functions
+    * will have in the list.
+    */
+   nir_foreach_overload(s, fo) {
+      nir_function_overload *nfo = lookup_ptr(&state, fo);
+      clone_function_impl(&state, fo->impl, nfo);
+   }
+
+   clone_reg_list(&state, &ns->registers, &s->registers);
+   ns->reg_alloc = s->reg_alloc;
+
+   ns->info = s->info;
+   ns->info.name = ralloc_strdup(ns, ns->info.name);
+   if (ns->info.label)
+      ns->info.label = ralloc_strdup(ns, ns->info.label);
+
+   ns->num_inputs = s->num_inputs;
+   ns->num_uniforms = s->num_uniforms;
+   ns->num_outputs = s->num_outputs;
+
+   free_clone_state(&state);
+
+   return ns;
+}

From 0bee3acc2a303b4cbbac0f6f54ffc8be79bc7470 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Mon, 16 Nov 2015 11:48:05 -0800
Subject: [PATCH 091/335] i965/nir: Add hooks for testing nir_shader_clone

This commit adds code for testing nir_shader_clone by running it after each
and every optimization pass and throwing away the old shader.  Testing
nir_shader_clone is hidden behind a new INTEL_CLONE_NIR environment
variable.

Reviewed-by: Rob Clark <robclark@freedesktop.org>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_nir.c | 35 +++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 7826729db85..7896f29803b 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -171,11 +171,26 @@ brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
    }
 }
 
-#define _OPT(do_pass) (({     \
-   bool this_progress = true; \
-   do_pass                    \
-   nir_validate_shader(nir);  \
-   this_progress;             \
+static bool
+should_clone_nir()
+{
+   static int should_clone = -1;
+   if (should_clone < 1)
+      should_clone = brw_env_var_as_boolean("NIR_TEST_CLONE", false);
+
+   return should_clone;
+}
+
+#define _OPT(do_pass) (({                                            \
+   bool this_progress = true;                                        \
+   do_pass                                                           \
+   nir_validate_shader(nir);                                         \
+   if (should_clone_nir()) {                                         \
+      nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \
+      ralloc_free(nir);                                              \
+      nir = clone;                                                   \
+   }                                                                 \
+   this_progress;                                                    \
 }))
 
 #define OPT(pass, ...) _OPT(                   \
@@ -191,7 +206,7 @@ brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
    pass(nir, ##__VA_ARGS__);   \
 )
 
-static void
+static nir_shader *
 nir_optimize(nir_shader *nir, bool is_scalar)
 {
    bool progress;
@@ -219,6 +234,8 @@ nir_optimize(nir_shader *nir, bool is_scalar)
       OPT(nir_opt_remove_phis);
       OPT(nir_opt_undef);
    } while (progress);
+
+   return nir;
 }
 
 nir_shader *
@@ -260,13 +277,13 @@ brw_create_nir(struct brw_context *brw,
 
    OPT(nir_split_var_copies);
 
-   nir_optimize(nir, is_scalar);
+   nir = nir_optimize(nir, is_scalar);
 
    /* Lower a bunch of stuff */
    OPT_V(nir_lower_var_copies);
 
    /* Get rid of split copies */
-   nir_optimize(nir, is_scalar);
+   nir = nir_optimize(nir, is_scalar);
 
    OPT_V(brw_nir_lower_inputs, devinfo, is_scalar);
    OPT_V(brw_nir_lower_outputs, is_scalar);
@@ -287,7 +304,7 @@ brw_create_nir(struct brw_context *brw,
       OPT_V(nir_lower_atomics, shader_prog);
    }
 
-   nir_optimize(nir, is_scalar);
+   nir = nir_optimize(nir, is_scalar);
 
    if (brw->gen >= 6) {
       /* Try and fuse multiply-adds */

From 56a1c10bb8cc957caa9ebc3723b4692a522a02f5 Mon Sep 17 00:00:00 2001
From: Jimmy Berry <jimmy@boombatower.com>
Date: Mon, 9 Nov 2015 23:20:37 -0600
Subject: [PATCH 092/335] gallium/hud: control visibility at startup and
 runtime.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- env GALLIUM_HUD_VISIBLE: control default visibility
- env GALLIUM_HUD_SIGNAL_TOGGLE: toggle visibility via signal

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
---
 docs/envvars.html                       |  6 +++++
 src/gallium/auxiliary/hud/hud_context.c | 29 +++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/docs/envvars.html b/docs/envvars.html
index bdfe9991a6c..530bbb78696 100644
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -179,6 +179,12 @@ Mesa EGL supports different sets of environment variables.  See the
 <li>GALLIUM_HUD - draws various information on the screen, like framerate,
     cpu load, driver statistics, performance counters, etc.
     Set GALLIUM_HUD=help and run e.g. glxgears for more info.
+<li>GALLIUM_HUD_VISIBLE - control default visibility, defaults to true.
+<li>GALLIUM_HUD_TOGGLE_SIGNAL - toggle visibility via user specified signal.
+    Especially useful to toggle hud at specific points of application and
+    disable for unencumbered viewing the rest of the time. For example, set
+    GALLIUM_HUD_VISIBLE to false and GALLIUM_HUD_SIGNAL_TOGGLE to 10 (SIGUSR1).
+    Use kill -10 <pid> to toggle the hud as desired.
 <li>GALLIUM_LOG_FILE - specifies a file for logging all errors, warnings, etc.
     rather than stderr.
 <li>GALLIUM_PRINT_OPTIONS - if non-zero, print all the Gallium environment
diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c
index ffe30b8fa79..a055480646d 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -33,6 +33,7 @@
  * Set GALLIUM_HUD=help for more info.
  */
 
+#include <signal.h>
 #include <stdio.h>
 
 #include "hud/hud_context.h"
@@ -51,6 +52,8 @@
 #include "tgsi/tgsi_text.h"
 #include "tgsi/tgsi_dump.h"
 
+/* Control the visibility of all HUD contexts */
+static boolean huds_visible = TRUE;
 
 struct hud_context {
    struct pipe_context *pipe;
@@ -95,6 +98,11 @@ struct hud_context {
    } text, bg, whitelines;
 };
 
+static void
+signal_visible_handler(int sig, siginfo_t *siginfo, void *context)
+{
+   huds_visible = !huds_visible;
+}
 
 static void
 hud_draw_colored_prims(struct hud_context *hud, unsigned prim,
@@ -441,6 +449,9 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
    struct hud_pane *pane;
    struct hud_graph *gr;
 
+   if (!huds_visible)
+      return;
+
    hud->fb_width = tex->width0;
    hud->fb_height = tex->height0;
    hud->constants.two_div_fb_width = 2.0f / hud->fb_width;
@@ -1125,6 +1136,10 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso)
    struct pipe_sampler_view view_templ;
    unsigned i;
    const char *env = debug_get_option("GALLIUM_HUD", NULL);
+   unsigned signo = debug_get_num_option("GALLIUM_HUD_TOGGLE_SIGNAL", 0);
+   static boolean sig_handled = FALSE;
+   struct sigaction action = {};
+   huds_visible = debug_get_bool_option("GALLIUM_HUD_VISIBLE", TRUE);
 
    if (!env || !*env)
       return NULL;
@@ -1267,6 +1282,20 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso)
 
    LIST_INITHEAD(&hud->pane_list);
 
+   /* setup sig handler once for all hud contexts */
+   if (!sig_handled && signo != 0) {
+      action.sa_sigaction = &signal_visible_handler;
+      action.sa_flags = SA_SIGINFO;
+
+      if (signo >= NSIG)
+         fprintf(stderr, "gallium_hud: invalid signal %u\n", signo);
+      else if (sigaction(signo, &action, NULL) < 0)
+         fprintf(stderr, "gallium_hud: unable to set handler for signal %u\n", signo);
+      fflush(stderr);
+
+      sig_handled = TRUE;
+   }
+
    hud_parse_env_var(hud, env);
    return hud;
 }

From 09d610796c03ae5a238b999ace5ccc2d3effd700 Mon Sep 17 00:00:00 2001
From: Jimmy Berry <jimmy@boombatower.com>
Date: Tue, 3 Nov 2015 23:24:47 -0600
Subject: [PATCH 093/335] gallium/hud: document GALLIUM_HUD_PERIOD in
 envvars.html.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
---
 docs/envvars.html | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/envvars.html b/docs/envvars.html
index 530bbb78696..1b2c03ef377 100644
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -179,6 +179,8 @@ Mesa EGL supports different sets of environment variables.  See the
 <li>GALLIUM_HUD - draws various information on the screen, like framerate,
     cpu load, driver statistics, performance counters, etc.
     Set GALLIUM_HUD=help and run e.g. glxgears for more info.
+<li>GALLIUM_HUD_PERIOD - sets the hud update rate in seconds (float). Use zero
+    to update every frame. The default period is 1/2 second.
 <li>GALLIUM_HUD_VISIBLE - control default visibility, defaults to true.
 <li>GALLIUM_HUD_TOGGLE_SIGNAL - toggle visibility via user specified signal.
     Especially useful to toggle hud at specific points of application and

From fc19a0d2e422ea8e45bc5440a91f858f5f345884 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sat, 7 Nov 2015 18:58:59 -0800
Subject: [PATCH 094/335] i965: Allow indirect GS input indexing in the scalar
 backend.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows arbitrary non-constant indices on GS input arrays,
both for the vertex index, and any array offsets beyond that.

All indirects are handled via the pull model.  We could potentially
handle indirect addressing of pushed data as well, but it would add
additional code complexity, and we usually have to pull inputs anyway
due to the sheer volume of input data.  Plus, marking pushed inputs
as live due to indirect addressing could exacerbate register pressure
problems pretty badly.  We'd need to be careful.

v2: Use updated MOV_INDIRECT opcode.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp     |  17 ---
 src/mesa/drivers/dri/i965/brw_fs.h       |   3 +-
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 128 ++++++++++++++++++-----
 src/mesa/drivers/dri/i965/brw_shader.cpp |   3 +
 4 files changed, 106 insertions(+), 45 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 995ab229544..72a21587a4f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1689,24 +1689,7 @@ fs_visitor::assign_gs_urb_setup()
    first_non_payload_grf +=
       8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in;
 
-   const unsigned first_icp_handle = payload.num_regs -
-      (vue_prog_data->include_vue_handles ? nir->info.gs.vertices_in : 0);
-
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
-      /* Lower URB_READ_SIMD8 opcodes into real messages. */
-      if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8) {
-         assert(inst->src[0].file == IMM);
-         inst->src[0] = retype(brw_vec8_grf(first_icp_handle +
-                                            inst->src[0].ud,
-                                            0), BRW_REGISTER_TYPE_UD);
-         /* for now, assume constant - we can do per-slot offsets later */
-         assert(inst->src[1].file == IMM);
-         inst->offset = inst->src[1].ud;
-         inst->src[1] = fs_reg();
-         inst->mlen = 1;
-         inst->base_mrf = -1;
-      }
-
       /* Rewrite all ATTR file references to GRFs. */
       convert_attr_sources_to_hw_regs(inst);
    }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index cbfc07f68bc..f52093ba3ce 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -303,7 +303,8 @@ public:
                        unsigned stream_id);
    void emit_gs_thread_end();
    void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
-                           unsigned offset, unsigned num_components);
+                           const fs_reg &indirect_offset, unsigned imm_offset,
+                           unsigned num_components);
    void emit_cs_terminate();
    fs_reg *emit_cs_local_invocation_id_setup();
    fs_reg *emit_cs_work_group_id_setup();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index c282f835cae..ebdcb3a4246 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1551,41 +1551,112 @@ fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src,
 void
 fs_visitor::emit_gs_input_load(const fs_reg &dst,
                                const nir_src &vertex_src,
-                               unsigned input_offset,
+                               const fs_reg &indirect_offset,
+                               unsigned imm_offset,
                                unsigned num_components)
 {
-   const brw_vue_prog_data *vue_prog_data = (const brw_vue_prog_data *) prog_data;
-   const unsigned vertex = nir_src_as_const_value(vertex_src)->u[0];
+   struct brw_gs_prog_data *gs_prog_data = (struct brw_gs_prog_data *) prog_data;
 
-   const unsigned array_stride = vue_prog_data->urb_read_length * 8;
+   /* Offset 0 is the VUE header, which contains VARYING_SLOT_LAYER [.y],
+    * VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w].  Only
+    * gl_PointSize is available as a GS input, however, so it must be that.
+    */
+   const bool is_point_size =
+      indirect_offset.file == BAD_FILE && imm_offset == 0;
 
-   const bool pushed = 4 * input_offset < array_stride;
+   nir_const_value *vertex_const = nir_src_as_const_value(vertex_src);
+   const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8;
 
-   if (input_offset == 0) {
-      /* This is the VUE header, containing VARYING_SLOT_LAYER [.y],
-       * VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w].
-       * Only gl_PointSize is available as a GS input, so they must
-       * be asking for that input.
-       */
-      if (pushed) {
-         bld.MOV(dst, fs_reg(ATTR, array_stride * vertex + 3, dst.type));
+   if (indirect_offset.file == BAD_FILE && vertex_const != NULL &&
+       4 * imm_offset < push_reg_count) {
+      imm_offset = 4 * imm_offset + vertex_const->u[0] * push_reg_count;
+      /* This input was pushed into registers. */
+      if (is_point_size) {
+         /* gl_PointSize comes in .w */
+         bld.MOV(dst, fs_reg(ATTR, imm_offset + 3, dst.type));
       } else {
-         fs_reg tmp = bld.vgrf(dst.type, 4);
-         fs_inst *inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp,
-                                  fs_reg(vertex), fs_reg(0));
-         inst->regs_written = 4;
-         bld.MOV(dst, offset(tmp, bld, 3));
+         for (unsigned i = 0; i < num_components; i++) {
+            bld.MOV(offset(dst, bld, i),
+                    fs_reg(ATTR, imm_offset + i, dst.type));
+         }
       }
    } else {
-      if (pushed) {
-         int index = vertex * array_stride + 4 * input_offset;
-         for (unsigned i = 0; i < num_components; i++) {
-            bld.MOV(offset(dst, bld, i), fs_reg(ATTR, index + i, dst.type));
-         }
+      /* Resort to the pull model.  Ensure the VUE handles are provided. */
+      gs_prog_data->base.include_vue_handles = true;
+
+      unsigned first_icp_handle = gs_prog_data->include_primitive_id ? 3 : 2;
+      fs_reg icp_handle;
+
+      if (vertex_const) {
+         /* The vertex index is constant; just select the proper URB handle. */
+         icp_handle =
+            retype(brw_vec8_grf(first_icp_handle + vertex_const->i[0], 0),
+                   BRW_REGISTER_TYPE_UD);
       } else {
-         fs_inst *inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst,
-                                  fs_reg(vertex), fs_reg(input_offset));
+         /* The vertex index is non-constant.  We need to use indirect
+          * addressing to fetch the proper URB handle.
+          *
+          * First, we start with the sequence <7, 6, 5, 4, 3, 2, 1, 0>
+          * indicating that channel <n> should read the handle from
+          * DWord <n>.  We convert that to bytes by multiplying by 4.
+          *
+          * Next, we convert the vertex index to bytes by multiplying
+          * by 32 (shifting by 5), and add the two together.  This is
+          * the final indirect byte offset.
+          */
+         fs_reg sequence = bld.vgrf(BRW_REGISTER_TYPE_W, 1);
+         fs_reg channel_offsets = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+         fs_reg vertex_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+         fs_reg icp_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+         icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+
+         /* sequence = <7, 6, 5, 4, 3, 2, 1, 0> */
+         bld.MOV(sequence, fs_reg(brw_imm_v(0x76543210)));
+         /* channel_offsets = 4 * sequence = <28, 24, 20, 16, 12, 8, 4, 0> */
+         bld.SHL(channel_offsets, sequence, fs_reg(2u));
+         /* Convert vertex_index to bytes (multiply by 32) */
+         bld.SHL(vertex_offset_bytes,
+                 retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD),
+                 brw_imm_ud(5u));
+         bld.ADD(icp_offset_bytes, vertex_offset_bytes, channel_offsets);
+
+         /* Use first_icp_handle as the base offset.  There is one register
+          * of URB handles per vertex, so inform the register allocator that
+          * we might read up to nir->info.gs.vertices_in registers.
+          */
+         bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle,
+                  fs_reg(brw_vec8_grf(first_icp_handle, 0)),
+                  fs_reg(icp_offset_bytes),
+                  fs_reg(nir->info.gs.vertices_in * REG_SIZE));
+      }
+
+      fs_inst *inst;
+      if (indirect_offset.file == BAD_FILE) {
+         /* Constant indexing - use global offset. */
+         inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle);
+         inst->offset = imm_offset;
+         inst->base_mrf = -1;
+         inst->mlen = 1;
          inst->regs_written = num_components;
+      } else {
+         /* Indirect indexing - use per-slot offsets as well. */
+         const fs_reg srcs[] = { icp_handle, indirect_offset };
+         fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+         bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
+
+         inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, payload);
+         inst->offset = imm_offset;
+         inst->base_mrf = -1;
+         inst->mlen = 2;
+         inst->regs_written = num_components;
+      }
+
+      if (is_point_size) {
+         /* Read the whole VUE header (because of alignment) and read .w. */
+         fs_reg tmp = bld.vgrf(dst.type, 4);
+         inst->dst = tmp;
+         inst->regs_written = 4;
+         bld.MOV(dst, offset(tmp, bld, 3));
       }
    }
 }
@@ -1626,6 +1697,7 @@ fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld,
                                   nir_intrinsic_instr *instr)
 {
    assert(stage == MESA_SHADER_GEOMETRY);
+   fs_reg indirect_offset;
 
    fs_reg dest;
    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
@@ -1644,9 +1716,11 @@ fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld,
       unreachable("load_input intrinsics are invalid for the GS stage");
 
    case nir_intrinsic_load_per_vertex_input_indirect:
-      assert(!"Not allowed");
+      indirect_offset = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_D);
+      /* fallthrough */
    case nir_intrinsic_load_per_vertex_input:
-      emit_gs_input_load(dest, instr->src[0], instr->const_index[0],
+      emit_gs_input_load(dest, instr->src[0],
+                         indirect_offset, instr->const_index[0],
                          instr->num_components);
       break;
 
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index c4a567f4cc9..d22164874c3 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -137,6 +137,9 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
       compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true;
    }
 
+   if (compiler->scalar_stage[MESA_SHADER_GEOMETRY])
+      compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false;
+
    return compiler;
 }
 

From c82498c4daf1cfdee065011a10ee4345ae67ef3b Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 17 Nov 2015 16:25:06 -0800
Subject: [PATCH 095/335] glsl: Silence ignored qualifier warning

I think the intention was to mark the "this" parameter as const, but
const goes on the other end to do that.

In file included from glsl_symbol_table.cpp:26:0:
ast.h:339:35: warning: type qualifiers ignored on function return type [-Wignored-qualifiers]
    const bool is_single_dimension()
                                   ^

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
---
 src/glsl/ast.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index 1b75234d578..ae763424f71 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -336,7 +336,7 @@ public:
       array_dimensions.push_tail(&dim->link);
    }
 
-   const bool is_single_dimension()
+   bool is_single_dimension() const
    {
       return this->array_dimensions.tail_pred->prev != NULL &&
              this->array_dimensions.tail_pred->prev->is_head_sentinel();

From 37c2cfa6bc6743e2b9d30cf119353d5e2e4bb063 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 17 Nov 2015 16:58:02 -0800
Subject: [PATCH 096/335] glsl: Silence unused parameter warnings

builtin_functions.cpp:5289:52: warning: unused parameter 'num_arguments' [-Wunused-parameter]
                                           unsigned num_arguments,
                                                    ^
builtin_functions.cpp:5290:52: warning: unused parameter 'flags' [-Wunused-parameter]
                                           unsigned flags)
                                                    ^

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/glsl/builtin_functions.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 13494446b59..eb438d9fd45 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -5243,8 +5243,8 @@ builtin_builder::_image_size_prototype(const glsl_type *image_type,
 
 ir_function_signature *
 builtin_builder::_image_samples_prototype(const glsl_type *image_type,
-                                          unsigned num_arguments,
-                                          unsigned flags)
+                                          unsigned /* num_arguments */,
+                                          unsigned /* flags */)
 {
    ir_variable *image = in_var(image_type, "image");
    ir_function_signature *sig =

From 0aded03046a5dd73bedece767dea8559463a7a57 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 17 Nov 2015 15:27:59 -0800
Subject: [PATCH 097/335] mesa: Don't expose GL_EXT_shader_integer_mix in GLES
 1.x

There are no shaders, so it doesn't even make sense to expose the
extension.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Cc: Nanley Chery <nanley.g.chery@intel.com>
---
 src/mesa/main/extensions_table.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index d12fd9f1c8d..8685a891951 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -176,7 +176,7 @@ EXT(EXT_rescale_normal                      , dummy_true
 EXT(EXT_secondary_color                     , dummy_true                             , GLL,  x ,  x ,  x , 1999)
 EXT(EXT_separate_shader_objects             , dummy_true                             ,  x ,  x ,  x , ES2, 2013)
 EXT(EXT_separate_specular_color             , dummy_true                             , GLL,  x ,  x ,  x , 1997)
-EXT(EXT_shader_integer_mix                  , EXT_shader_integer_mix                 , GLL, GLC, ES1,  30, 2013)
+EXT(EXT_shader_integer_mix                  , EXT_shader_integer_mix                 , GLL, GLC,  x ,  30, 2013)
 EXT(EXT_shadow_funcs                        , ARB_shadow                             , GLL,  x ,  x ,  x , 2002)
 EXT(EXT_stencil_two_side                    , EXT_stencil_two_side                   , GLL,  x ,  x ,  x , 2001)
 EXT(EXT_stencil_wrap                        , dummy_true                             , GLL,  x ,  x ,  x , 2002)

From 2f554761536bbfd0d8ec22e807c18bd6df0f22b8 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 17 Nov 2015 18:35:00 -0800
Subject: [PATCH 098/335] glsl: Fix off-by-one error in array size check
 assertion

Apparently, this has been a bug since 2010 (c30f6e5d).

Also use ARRAY_SIZE instead of open coding it.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Cc: mesa-stable@lists.freedesktop.org
---
 src/glsl/ir.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 8b5ba71fbba..80cbdbf613f 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1425,8 +1425,7 @@ static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf
 
 const char *ir_texture::opcode_string()
 {
-   assert((unsigned int) op <=
-	  sizeof(tex_opcode_strs) / sizeof(tex_opcode_strs[0]));
+   assert((unsigned int) op < ARRAY_SIZE(tex_opcode_strs));
    return tex_opcode_strs[op];
 }
 

From 1cfffb95ebf49a8342d4799e68ecc0009300cb2f Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 18 Nov 2015 17:08:39 -0700
Subject: [PATCH 099/335] hud: fix Windows build break

Protect signal-related code with PIPE_OS_UNIX test.

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
---
 src/gallium/auxiliary/hud/hud_context.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c
index a055480646d..24a68dd2574 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -98,11 +98,13 @@ struct hud_context {
    } text, bg, whitelines;
 };
 
+#ifdef PIPE_OS_UNIX
 static void
 signal_visible_handler(int sig, siginfo_t *siginfo, void *context)
 {
    huds_visible = !huds_visible;
 }
+#endif
 
 static void
 hud_draw_colored_prims(struct hud_context *hud, unsigned prim,
@@ -1137,8 +1139,10 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso)
    unsigned i;
    const char *env = debug_get_option("GALLIUM_HUD", NULL);
    unsigned signo = debug_get_num_option("GALLIUM_HUD_TOGGLE_SIGNAL", 0);
+#ifdef PIPE_OS_UNIX
    static boolean sig_handled = FALSE;
    struct sigaction action = {};
+#endif
    huds_visible = debug_get_bool_option("GALLIUM_HUD_VISIBLE", TRUE);
 
    if (!env || !*env)
@@ -1283,6 +1287,7 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso)
    LIST_INITHEAD(&hud->pane_list);
 
    /* setup sig handler once for all hud contexts */
+#ifdef PIPE_OS_UNIX
    if (!sig_handled && signo != 0) {
       action.sa_sigaction = &signal_visible_handler;
       action.sa_flags = SA_SIGINFO;
@@ -1295,6 +1300,7 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso)
 
       sig_handled = TRUE;
    }
+#endif
 
    hud_parse_env_var(hud, env);
    return hud;

From 7820b2c071ec974d824c9b6dc3a0dd0ad1b77444 Mon Sep 17 00:00:00 2001
From: Connor Abbott <connor.w.abbott@intel.com>
Date: Tue, 4 Aug 2015 14:04:34 -0700
Subject: [PATCH 100/335] nir: fix constant folding of bfi

Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
---
 src/glsl/nir/nir_opcodes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index 3c0f1da94af..729f695cf9c 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -563,7 +563,7 @@ opcode("bcsel", 0, tunsigned, [0, 0, 0],
       [tbool, tunsigned, tunsigned], "", "src0 ? src1 : src2")
 
 triop("bfi", tunsigned, """
-unsigned mask = src0, insert = src1 & mask, base = src2;
+unsigned mask = src0, insert = src1, base = src2;
 if (mask == 0) {
    dst = base;
 } else {
@@ -572,7 +572,7 @@ if (mask == 0) {
       tmp >>= 1;
       insert <<= 1;
    }
-   dst = (base & ~mask) | insert;
+   dst = (base & ~mask) | (insert & mask);
 }
 """)
 

From 84ed3819a42042757a32cae4260c26c4fb3b673c Mon Sep 17 00:00:00 2001
From: Connor Abbott <connor.w.abbott@intel.com>
Date: Fri, 14 Aug 2015 11:58:07 -0700
Subject: [PATCH 101/335] glsl: fix isinf() for doubles

Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
---
 src/glsl/builtin_functions.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index eb438d9fd45..1824b831f28 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -3573,7 +3573,16 @@ builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type
 
    ir_constant_data infinities;
    for (int i = 0; i < type->vector_elements; i++) {
-      infinities.f[i] = INFINITY;
+      switch (type->base_type) {
+      case GLSL_TYPE_FLOAT:
+         infinities.f[i] = INFINITY;
+         break;
+      case GLSL_TYPE_DOUBLE:
+         infinities.d[i] = INFINITY;
+         break;
+      default:
+         unreachable("unknown type");
+      }
    }
 
    body.emit(ret(equal(abs(x), imm(type, infinities))));

From f1ba0a5ea0faf331be9fa28fd6d43a3ff061b735 Mon Sep 17 00:00:00 2001
From: Connor Abbott <connor.w.abbott@intel.com>
Date: Fri, 14 Aug 2015 11:58:45 -0700
Subject: [PATCH 102/335] glsl: fix ir_constant::equals() for doubles

Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
---
 src/glsl/ir_equals.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/glsl/ir_equals.cpp b/src/glsl/ir_equals.cpp
index 5f0785e0ece..aafcd1f0dae 100644
--- a/src/glsl/ir_equals.cpp
+++ b/src/glsl/ir_equals.cpp
@@ -58,8 +58,13 @@ ir_constant::equals(const ir_instruction *ir, enum ir_node_type) const
       return false;
 
    for (unsigned i = 0; i < type->components(); i++) {
-      if (value.u[i] != other->value.u[i])
-         return false;
+      if (type->base_type == GLSL_TYPE_DOUBLE) {
+         if (value.d[i] != other->value.d[i])
+            return false;
+      } else {
+         if (value.u[i] != other->value.u[i])
+            return false;
+      }
    }
 
    return true;

From 15f8dc7b234fa79c9dca69b0b49afead463293e3 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 18 Nov 2015 09:25:48 -0700
Subject: [PATCH 103/335] os: check for GALLIUM_PROCESS_NAME to override
 os_get_process_name()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Useful for debugging and for glretrace.

Reviewed-by: Charmaine Lee <charmainel@vmware.com>
Reviewed-by: José Fonseca <jfonseca@vmware.com>
---
 src/gallium/auxiliary/os/os_process.c | 47 +++++++++++++++++----------
 1 file changed, 29 insertions(+), 18 deletions(-)

diff --git a/src/gallium/auxiliary/os/os_process.c b/src/gallium/auxiliary/os/os_process.c
index a6262283d87..d2dcd0d7fbc 100644
--- a/src/gallium/auxiliary/os/os_process.c
+++ b/src/gallium/auxiliary/os/os_process.c
@@ -54,37 +54,48 @@ boolean
 os_get_process_name(char *procname, size_t size)
 {
    const char *name;
+
+   /* First, check if the GALLIUM_PROCESS_NAME env var is set to
+    * override the normal process name query.
+    */
+   name = os_get_option("GALLIUM_PROCESS_NAME");
+
+   if (!name) {
+      /* do normal query */
+
 #if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
-   char szProcessPath[MAX_PATH];
-   char *lpProcessName;
-   char *lpProcessExt;
+      char szProcessPath[MAX_PATH];
+      char *lpProcessName;
+      char *lpProcessExt;
 
-   GetModuleFileNameA(NULL, szProcessPath, Elements(szProcessPath));
+      GetModuleFileNameA(NULL, szProcessPath, Elements(szProcessPath));
 
-   lpProcessName = strrchr(szProcessPath, '\\');
-   lpProcessName = lpProcessName ? lpProcessName + 1 : szProcessPath;
+      lpProcessName = strrchr(szProcessPath, '\\');
+      lpProcessName = lpProcessName ? lpProcessName + 1 : szProcessPath;
 
-   lpProcessExt = strrchr(lpProcessName, '.');
-   if (lpProcessExt) {
-      *lpProcessExt = '\0';
-   }
+      lpProcessExt = strrchr(lpProcessName, '.');
+      if (lpProcessExt) {
+         *lpProcessExt = '\0';
+      }
 
-   name = lpProcessName;
+      name = lpProcessName;
 
 #elif defined(__GLIBC__) || defined(__CYGWIN__)
-   name = program_invocation_short_name;
+      name = program_invocation_short_name;
 #elif defined(PIPE_OS_BSD) || defined(PIPE_OS_APPLE)
-   /* *BSD and OS X */
-   name = getprogname();
+      /* *BSD and OS X */
+      name = getprogname();
 #elif defined(PIPE_OS_HAIKU)
-   image_info info;
-   get_image_info(B_CURRENT_TEAM, &info);
-   name = info.name;
+      image_info info;
+      get_image_info(B_CURRENT_TEAM, &info);
+      name = info.name;
 #else
 #warning unexpected platform in os_process.c
-   return FALSE;
+      return FALSE;
 #endif
 
+   }
+
    assert(size > 0);
    assert(procname);
 

From 625414f78c4ece1c5b24a31afad2efa4ea504933 Mon Sep 17 00:00:00 2001
From: Ryan Houdek <Sonicadvance1@gmail.com>
Date: Thu, 5 Nov 2015 10:52:35 -0600
Subject: [PATCH 104/335] glapi: add EXT_blend_func_extended XML definitions

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/mapi/glapi/gen/EXT_gpu_shader4.xml  |  3 ++-
 src/mapi/glapi/gen/es_EXT.xml           | 26 +++++++++++++++++++++++++
 src/mesa/main/tests/dispatch_sanity.cpp |  8 ++++++++
 3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/src/mapi/glapi/gen/EXT_gpu_shader4.xml b/src/mapi/glapi/gen/EXT_gpu_shader4.xml
index b1f7eae2610..b4120b9c192 100644
--- a/src/mapi/glapi/gen/EXT_gpu_shader4.xml
+++ b/src/mapi/glapi/gen/EXT_gpu_shader4.xml
@@ -232,7 +232,8 @@
         <param name="params" type="GLuint *"/>
     </function>
 
-    <function name="BindFragDataLocationEXT" alias="BindFragDataLocation">
+    <function name="BindFragDataLocationEXT" alias="BindFragDataLocation"
+	    es2="3.0">
         <param name="program" type="GLuint"/>
         <param name="colorNumber" type="GLuint"/>
         <param name="name" type="const GLchar *"/>
diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml
index 9a777a24c61..577d8254c43 100644
--- a/src/mapi/glapi/gen/es_EXT.xml
+++ b/src/mapi/glapi/gen/es_EXT.xml
@@ -914,4 +914,30 @@
     </function>
 </category>
 
+<category name="GL_EXT_blend_func_extended" number="247">
+
+    <function name="BindFragDataLocationIndexedEXT" alias="BindFragDataLocationIndexed"
+            es2="3.0">
+        <param name="program" type="GLuint"/>
+        <param name="colorNumber" type="GLuint"/>
+        <param name="index" type="GLuint"/>
+        <param name="name" type="const GLchar *"/>
+    </function>
+
+    <function name="GetFragDataIndexEXT" alias="GetFragDataIndex"
+            es2="3.0">
+        <param name="program" type="GLuint"/>
+        <param name="name" type="const GLchar *"/>
+        <return type="GLint"/>
+    </function>
+
+    <function name="GetProgramResourceLocationIndexEXT" alias="GetProgramResourceLocationIndex"
+           es2="3.1">
+        <param name="program" type="GLuint"/>
+        <param name="programInterface" type="GLenum"/>
+        <param name="name" type="const GLchar *"/>
+        <return type="GLint"/>
+    </function>
+
+</category>
 </OpenGLAPI>
diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp
index abe0f432572..97f81f932f6 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -2421,6 +2421,11 @@ const struct function gles3_functions_possible[] = {
    { "glProgramUniform4uiEXT", 30, -1 },
    { "glProgramUniform4uivEXT", 30, -1 },
 
+   /* GL_EXT_blend_func_extended */
+   { "glBindFragDataLocationIndexedEXT", 30, -1 },
+   { "glGetFragDataIndexEXT", 30, -1 },
+   { "glBindFragDataLocationEXT", 30, -1 },
+
    { NULL, 0, -1 }
 };
 
@@ -2509,5 +2514,8 @@ const struct function gles31_functions_possible[] = {
    /* GL_EXT_buffer_storage */
    { "glBufferStorageEXT", 31, -1 },
 
+   /* GL_EXT_blend_func_extended */
+   { "glGetProgramResourceLocationIndexEXT", 31, -1 },
+
    { NULL, 0, -1 },
  };

From ceecb0876f1479661e561520c7e3561fd051720a Mon Sep 17 00:00:00 2001
From: Ryan Houdek <Sonicadvance1@gmail.com>
Date: Thu, 5 Nov 2015 10:53:40 -0600
Subject: [PATCH 105/335] glsl: add EXT_blend_func_extended parser enables

This adds a state for the maximum dual source draw variables available
and the variable for determining if the extension has been enabled
in the program shaders.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/glsl/glsl_parser_extras.cpp | 3 +++
 src/glsl/glsl_parser_extras.h   | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 3ed11683062..8fb05fae4c1 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -102,6 +102,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
 
    this->Const.MaxDrawBuffers = ctx->Const.MaxDrawBuffers;
 
+   this->Const.MaxDualSourceDrawBuffers = ctx->Const.MaxDualSourceDrawBuffers;
+
    /* 1.50 constants */
    this->Const.MaxVertexOutputComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
    this->Const.MaxGeometryInputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents;
@@ -644,6 +646,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
    EXT(AMD_shader_trinary_minmax,      true,  false,     dummy_true),
    EXT(AMD_vertex_shader_layer,        true,  false,     AMD_vertex_shader_layer),
    EXT(AMD_vertex_shader_viewport_index, true,  false,   AMD_vertex_shader_viewport_index),
+   EXT(EXT_blend_func_extended,        false,  true,     ARB_blend_func_extended),
    EXT(EXT_draw_buffers,               false,  true,     dummy_true),
    EXT(EXT_separate_shader_objects,    false, true,      dummy_true),
    EXT(EXT_shader_integer_mix,         true,  true,      EXT_shader_integer_mix),
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 1d8c1b8799f..17f8490b8da 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -380,6 +380,9 @@ struct _mesa_glsl_parse_state {
       /* ARB_draw_buffers */
       unsigned MaxDrawBuffers;
 
+      /* ARB_blend_func_extended */
+      unsigned MaxDualSourceDrawBuffers;
+
       /* 3.00 ES */
       int MinProgramTexelOffset;
       int MaxProgramTexelOffset;
@@ -595,6 +598,8 @@ struct _mesa_glsl_parse_state {
    bool AMD_vertex_shader_layer_warn;
    bool AMD_vertex_shader_viewport_index_enable;
    bool AMD_vertex_shader_viewport_index_warn;
+   bool EXT_blend_func_extended_enable;
+   bool EXT_blend_func_extended_warn;
    bool EXT_draw_buffers_enable;
    bool EXT_draw_buffers_warn;
    bool EXT_separate_shader_objects_enable;

From 1d1d02f2ac69475da112a6aadfa7c161b13ff4da Mon Sep 17 00:00:00 2001
From: Ryan Houdek <Sonicadvance1@gmail.com>
Date: Thu, 5 Nov 2015 10:59:32 -0600
Subject: [PATCH 106/335] glsl: add support for EXT_blend_func_extended
 builtins

gl_MaxDualSourceDrawBuffersEXT - Maximum dual-source draw buffers supported

For ESSL 1.0, it provides two builtins since you can't have user-defined
color output variables:
  gl_SecondaryFragColorEXT
  gl_SecondaryFragDataEXT[MaxDSDrawBuffers]

Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/glsl/ast_to_hir.cpp        | 24 ++++++++++++
 src/glsl/builtin_variables.cpp | 68 ++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 41d05c07eee..a9b1c0ed34b 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -7194,6 +7194,8 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
 {
    bool gl_FragColor_assigned = false;
    bool gl_FragData_assigned = false;
+   bool gl_FragSecondaryColor_assigned = false;
+   bool gl_FragSecondaryData_assigned = false;
    bool user_defined_fs_output_assigned = false;
    ir_variable *user_defined_fs_output = NULL;
 
@@ -7211,6 +7213,10 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
          gl_FragColor_assigned = true;
       else if (strcmp(var->name, "gl_FragData") == 0)
          gl_FragData_assigned = true;
+	else if (strcmp(var->name, "gl_SecondaryFragColorEXT") == 0)
+         gl_FragSecondaryColor_assigned = true;
+	else if (strcmp(var->name, "gl_SecondaryFragDataEXT") == 0)
+         gl_FragSecondaryData_assigned = true;
       else if (!is_gl_identifier(var->name)) {
          if (state->stage == MESA_SHADER_FRAGMENT &&
              var->data.mode == ir_var_shader_out) {
@@ -7242,11 +7248,29 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
       _mesa_glsl_error(&loc, state, "fragment shader writes to both "
                        "`gl_FragColor' and `%s'",
                        user_defined_fs_output->name);
+   } else if (gl_FragSecondaryColor_assigned && gl_FragSecondaryData_assigned) {
+      _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+                       "`gl_FragSecondaryColorEXT' and"
+                       " `gl_FragSecondaryDataEXT'");
+   } else if (gl_FragColor_assigned && gl_FragSecondaryData_assigned) {
+      _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+                       "`gl_FragColor' and"
+                       " `gl_FragSecondaryDataEXT'");
+   } else if (gl_FragData_assigned && gl_FragSecondaryColor_assigned) {
+      _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+                       "`gl_FragData' and"
+                       " `gl_FragSecondaryColorEXT'");
    } else if (gl_FragData_assigned && user_defined_fs_output_assigned) {
       _mesa_glsl_error(&loc, state, "fragment shader writes to both "
                        "`gl_FragData' and `%s'",
                        user_defined_fs_output->name);
    }
+
+   if ((gl_FragSecondaryColor_assigned || gl_FragSecondaryData_assigned) &&
+       !state->EXT_blend_func_extended_enable) {
+      _mesa_glsl_error(&loc, state,
+                       "Dual source blending requires EXT_blend_func_extended");
+   }
 }
 
 
diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index fc7a3c3f64c..e8eab808a19 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -377,6 +377,11 @@ private:
       return add_variable(name, type, ir_var_shader_out, slot);
    }
 
+   ir_variable *add_index_output(int slot, int index, const glsl_type *type, const char *name)
+   {
+      return add_index_variable(name, type, ir_var_shader_out, slot, index);
+   }
+
    ir_variable *add_system_value(int slot, const glsl_type *type,
                                  const char *name)
    {
@@ -385,6 +390,8 @@ private:
 
    ir_variable *add_variable(const char *name, const glsl_type *type,
                              enum ir_variable_mode mode, int slot);
+   ir_variable *add_index_variable(const char *name, const glsl_type *type,
+                             enum ir_variable_mode mode, int slot, int index);
    ir_variable *add_uniform(const glsl_type *type, const char *name);
    ir_variable *add_const(const char *name, int value);
    ir_variable *add_const_ivec3(const char *name, int x, int y, int z);
@@ -430,6 +437,46 @@ builtin_variable_generator::builtin_variable_generator(
 {
 }
 
+ir_variable *
+builtin_variable_generator::add_index_variable(const char *name,
+                                         const glsl_type *type,
+                                         enum ir_variable_mode mode, int slot, int index)
+{
+   ir_variable *var = new(symtab) ir_variable(type, name, mode);
+   var->data.how_declared = ir_var_declared_implicitly;
+
+   switch (var->data.mode) {
+   case ir_var_auto:
+   case ir_var_shader_in:
+   case ir_var_uniform:
+   case ir_var_system_value:
+      var->data.read_only = true;
+      break;
+   case ir_var_shader_out:
+   case ir_var_shader_storage:
+      break;
+   default:
+      /* The only variables that are added using this function should be
+       * uniforms, shader storage, shader inputs, and shader outputs, constants
+       * (which use ir_var_auto), and system values.
+       */
+      assert(0);
+      break;
+   }
+
+   var->data.location = slot;
+   var->data.explicit_location = (slot >= 0);
+   var->data.explicit_index = 1;
+   var->data.index = index;
+
+   /* Once the variable is created an initialized, add it to the symbol table
+    * and add the declaration to the IR stream.
+    */
+   instructions->push_tail(var);
+
+   symtab->add_variable(var);
+   return var;
+}
 
 ir_variable *
 builtin_variable_generator::add_variable(const char *name,
@@ -581,6 +628,14 @@ builtin_variable_generator::generate_constants()
          add_const("gl_MaxVaryingVectors",
                    state->ctx->Const.MaxVarying);
       }
+
+      /* EXT_blend_func_extended brings a built in constant
+       * for determining number of dual source draw buffers
+       */
+      if (state->EXT_blend_func_extended_enable) {
+         add_const("gl_MaxDualSourceDrawBuffersEXT",
+                   state->Const.MaxDualSourceDrawBuffers);
+      }
    } else {
       add_const("gl_MaxVertexUniformComponents",
                 state->Const.MaxVertexUniformComponents);
@@ -1017,6 +1072,19 @@ builtin_variable_generator::generate_fs_special_vars()
                  array(vec4_t, state->Const.MaxDrawBuffers), "gl_FragData");
    }
 
+   if (state->es_shader && state->language_version == 100 && state->EXT_blend_func_extended_enable) {
+      /* We make an assumption here that there will only ever be one dual-source draw buffer
+       * In case this assumption is ever proven to be false, make sure to assert here
+       * since we don't handle this case.
+       * In practice, this issue will never arise since no hardware will support it.
+       */
+      assert(state->Const.MaxDualSourceDrawBuffers <= 1);
+      add_index_output(FRAG_RESULT_DATA0, 1, vec4_t, "gl_SecondaryFragColorEXT");
+      add_index_output(FRAG_RESULT_DATA0, 1,
+                       array(vec4_t, state->Const.MaxDualSourceDrawBuffers),
+                       "gl_SecondaryFragDataEXT");
+   }
+
    /* gl_FragDepth has always been in desktop GLSL, but did not appear in GLSL
     * ES 1.00.
     */

From ef9e6d1ec8389f800d22c0dd091efb30f189fa45 Mon Sep 17 00:00:00 2001
From: Ryan Houdek <Sonicadvance1@gmail.com>
Date: Thu, 5 Nov 2015 20:44:03 -0600
Subject: [PATCH 107/335] glsl: add GL_EXT_blend_func_extended preprocessor
 define

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/glsl/glcpp/glcpp-parse.y | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 6aa7abec00e..ab5ec8450b9 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2384,6 +2384,8 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
                  add_builtin_define(parser, "GL_OES_standard_derivatives", 1);
               if (extensions->ARB_texture_multisample)
                  add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1);
+              if (extensions->ARB_blend_func_extended)
+                 add_builtin_define(parser, "GL_EXT_blend_func_extended", 1);
 	   }
 	} else {
 	   add_builtin_define(parser, "GL_ARB_draw_buffers", 1);

From 33ddc8e865e67f3dabcd1b598f4e978be6c7be6f Mon Sep 17 00:00:00 2001
From: Ryan Houdek <Sonicadvance1@gmail.com>
Date: Thu, 5 Nov 2015 11:03:44 -0600
Subject: [PATCH 108/335] glsl: add a parse check to check for the index layout
 qualifier

This can only be used if EXT_blend_func_extended is enabled

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/glsl/glsl_parser.yy | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index adf6a05acce..403cbd1564a 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -1476,6 +1476,11 @@ layout_qualifier_id:
       }
 
       if (match_layout_qualifier("index", $1, state) == 0) {
+         if (state->es_shader && !state->EXT_blend_func_extended_enable) {
+            _mesa_glsl_error(& @3, state, "index layout qualifier requires EXT_blend_func_extended");
+            YYERROR;
+         }
+
          $$.flags.q.explicit_index = 1;
 
          if ($3 >= 0) {

From 4b549f0d8ce425e96f57f4014edcfe7bac36920c Mon Sep 17 00:00:00 2001
From: Ryan Houdek <Sonicadvance1@gmail.com>
Date: Thu, 5 Nov 2015 11:05:17 -0600
Subject: [PATCH 109/335] mesa: enable usage of blend_func_extended blend
 factors in GLES2

Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/mesa/main/blend.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c
index ddf7f497f1e..f07552b4778 100644
--- a/src/mesa/main/blend.c
+++ b/src/mesa/main/blend.c
@@ -67,7 +67,7 @@ legal_src_factor(const struct gl_context *ctx, GLenum factor)
    case GL_SRC1_ALPHA:
    case GL_ONE_MINUS_SRC1_COLOR:
    case GL_ONE_MINUS_SRC1_ALPHA:
-      return _mesa_is_desktop_gl(ctx)
+      return ctx->API != API_OPENGLES
          && ctx->Extensions.ARB_blend_func_extended;
    default:
       return GL_FALSE;
@@ -100,14 +100,14 @@ legal_dst_factor(const struct gl_context *ctx, GLenum factor)
    case GL_ONE_MINUS_CONSTANT_ALPHA:
       return _mesa_is_desktop_gl(ctx) || ctx->API == API_OPENGLES2;
    case GL_SRC_ALPHA_SATURATE:
-      return (_mesa_is_desktop_gl(ctx)
+      return (ctx->API != API_OPENGLES
               && ctx->Extensions.ARB_blend_func_extended)
          || _mesa_is_gles3(ctx);
    case GL_SRC1_COLOR:
    case GL_SRC1_ALPHA:
    case GL_ONE_MINUS_SRC1_COLOR:
    case GL_ONE_MINUS_SRC1_ALPHA:
-      return _mesa_is_desktop_gl(ctx)
+      return ctx->API != API_OPENGLES
          && ctx->Extensions.ARB_blend_func_extended;
    default:
       return GL_FALSE;

From f7c23f225f37f96bf711ccc6b2a6315b64582b81 Mon Sep 17 00:00:00 2001
From: Ryan Houdek <Sonicadvance1@gmail.com>
Date: Thu, 5 Nov 2015 11:05:58 -0600
Subject: [PATCH 110/335] mesa: allow MAX_DUAL_SOURCE_DRAW_BUFFERS to be
 available to ES

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/mesa/main/get_hash_params.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index fbc7b8f8602..9b22b91ac1b 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -330,6 +330,9 @@ descriptor=[
 
 # GL_KHR_context_flush_control
   [ "CONTEXT_RELEASE_BEHAVIOR", "CONTEXT_ENUM(Const.ContextReleaseBehavior), NO_EXTRA" ],
+
+# blend_func_extended
+  [ "MAX_DUAL_SOURCE_DRAW_BUFFERS", "CONTEXT_INT(Const.MaxDualSourceDrawBuffers), extra_ARB_blend_func_extended" ],
 ]},
 
 # GLES3 is not a typo.
@@ -801,7 +804,6 @@ descriptor=[
 # GL_ARB_robustness
   [ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), NO_EXTRA" ],
 
-  [ "MAX_DUAL_SOURCE_DRAW_BUFFERS", "CONTEXT_INT(Const.MaxDualSourceDrawBuffers), extra_ARB_blend_func_extended" ],
 
 # GL_ARB_uniform_buffer_object
   [ "MAX_GEOMETRY_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks), extra_ARB_uniform_buffer_object_and_geometry_shader" ],

From 0ec218d167a2e5cef993ba7863608c7b40f4d649 Mon Sep 17 00:00:00 2001
From: Ryan Houdek <Sonicadvance1@gmail.com>
Date: Thu, 5 Nov 2015 11:07:08 -0600
Subject: [PATCH 111/335] mesa: enable EXT_blend_func_extended if the driver
 supports the ARB version

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 docs/relnotes/11.1.0.html        | 1 +
 src/mesa/main/extensions_table.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index f4563eab526..c0c9a6aceb5 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -58,6 +58,7 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_ARB_texture_query_lod on softpipe</li>
 <li>GL_ARB_texture_view on radeonsi and r600 (for evergeen and newer)</li>
 <li>GL_ARB_vertex_type_2_10_10_10_rev on freedreno (a3xx)</li>
+<li>GL_EXT_blend_func_extended on all drivers that support the ARB version</li>
 <li>GL_EXT_buffer_storage implemented for when ES 3.1 support is gained</li>
 <li>GL_EXT_draw_elements_base_vertex on all drivers</li>
 <li>GL_EXT_texture_compression_rgtc / latc on freedreno (a3xx & a4xx)</li>
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 8685a891951..7b5cc7b114e 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -141,6 +141,7 @@ EXT(EXT_abgr                                , dummy_true
 EXT(EXT_bgra                                , dummy_true                             , GLL,  x ,  x ,  x , 1995)
 EXT(EXT_blend_color                         , EXT_blend_color                        , GLL,  x ,  x ,  x , 1995)
 EXT(EXT_blend_equation_separate             , EXT_blend_equation_separate            , GLL, GLC,  x ,  x , 2003)
+EXT(EXT_blend_func_extended                 , ARB_blend_func_extended                ,  x ,  x ,  x , ES2, 2015)
 EXT(EXT_blend_func_separate                 , EXT_blend_func_separate                , GLL,  x ,  x ,  x , 1999)
 EXT(EXT_buffer_storage                      , ARB_buffer_storage                     ,  x ,  x ,  x ,  31, 2015)
 EXT(EXT_discard_framebuffer                 , dummy_true                             ,  x ,  x , ES1, ES2, 2009)

From bcda79676a7a51e433f0f044f43b61405ebcc9e5 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 19 Nov 2015 12:25:53 -0500
Subject: [PATCH 112/335] docs: GL3.1 for a3xx and a4xx

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 docs/relnotes/11.1.0.html | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index c0c9a6aceb5..2e20bb3444e 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -44,6 +44,7 @@ Note: some of the new features are only available with certain drivers.
 </p>
 
 <ul>
+<li>OpenGL 3.1 support on freedreno (a3xx, a4xx)</li>
 <li>GL_ARB_arrays_of_arrays on i965</li>
 <li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
 <li>GL_ARB_clear_texture on nv50, nvc0</li>

From f030227f4643ae7e832545dfa3c3db472e00ab65 Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Wed, 18 Nov 2015 15:01:43 -0800
Subject: [PATCH 113/335] mesa/extensions: Sort the extension table
 alphabetically

Make it easier to determine where to add new extensions.
Performed with the vim sort command.

v2: Insert newline after last #define (Matt)

Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/main/extensions_table.h | 174 +++++++++++++++++--------------
 1 file changed, 93 insertions(+), 81 deletions(-)

diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 7b5cc7b114e..20751688977 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -1,8 +1,31 @@
+/* The extension table is alphabetically sorted by the extension name string column. */
+
 #define GLL 0
 #define GLC 0
 #define ES1 0
 #define ES2 0
 #define  x ~0
+
+EXT(3DFX_texture_compression_FXT1           , TDFX_texture_compression_FXT1          , GLL, GLC,  x ,  x , 1999)
+
+EXT(AMD_conservative_depth                  , ARB_conservative_depth                 , GLL, GLC,  x ,  x , 2009)
+EXT(AMD_draw_buffers_blend                  , ARB_draw_buffers_blend                 , GLL, GLC,  x ,  x , 2009)
+EXT(AMD_performance_monitor                 , AMD_performance_monitor                , GLL, GLC,  x ,  x , 2007)
+EXT(AMD_pinned_memory                       , AMD_pinned_memory                      , GLL, GLC,  x ,  x , 2013)
+EXT(AMD_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GLL, GLC,  x ,  x , 2009)
+EXT(AMD_shader_stencil_export               , ARB_shader_stencil_export              , GLL, GLC,  x ,  x , 2009)
+EXT(AMD_shader_trinary_minmax               , dummy_true                             , GLL, GLC,  x ,  x , 2012)
+EXT(AMD_vertex_shader_layer                 , AMD_vertex_shader_layer                ,  x , GLC,  x ,  x , 2012)
+EXT(AMD_vertex_shader_viewport_index        , AMD_vertex_shader_viewport_index       ,  x , GLC,  x ,  x , 2012)
+
+EXT(ANGLE_texture_compression_dxt3          , ANGLE_texture_compression_dxt          , GLL, GLC, ES1, ES2, 2011)
+EXT(ANGLE_texture_compression_dxt5          , ANGLE_texture_compression_dxt          , GLL, GLC, ES1, ES2, 2011)
+
+EXT(APPLE_object_purgeable                  , APPLE_object_purgeable                 , GLL, GLC,  x ,  x , 2006)
+EXT(APPLE_packed_pixels                     , dummy_true                             , GLL,  x ,  x ,  x , 2002)
+EXT(APPLE_texture_max_level                 , dummy_true                             ,  x ,  x , ES1, ES2, 2009)
+EXT(APPLE_vertex_array_object               , dummy_true                             , GLL,  x ,  x ,  x , 2002)
+
 EXT(ARB_ES2_compatibility                   , ARB_ES2_compatibility                  , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_ES3_compatibility                   , ARB_ES3_compatibility                  , GLL, GLC,  x ,  x , 2012)
 EXT(ARB_arrays_of_arrays                    , ARB_arrays_of_arrays                   , GLL, GLC,  x ,  x , 2012)
@@ -16,9 +39,9 @@ EXT(ARB_color_buffer_float                  , ARB_color_buffer_float
 EXT(ARB_compressed_texture_pixel_storage    , dummy_true                             , GLL, GLC,  x ,  x , 2011)
 EXT(ARB_compute_shader                      , ARB_compute_shader                     , GLL, GLC,  x ,  x , 2012)
 EXT(ARB_conditional_render_inverted         , ARB_conditional_render_inverted        , GLL, GLC,  x ,  x , 2014)
+EXT(ARB_conservative_depth                  , ARB_conservative_depth                 , GLL, GLC,  x ,  x , 2011)
 EXT(ARB_copy_buffer                         , dummy_true                             , GLL, GLC,  x ,  x , 2008)
 EXT(ARB_copy_image                          , ARB_copy_image                         , GLL, GLC,  x ,  x , 2012)
-EXT(ARB_conservative_depth                  , ARB_conservative_depth                 , GLL, GLC,  x ,  x , 2011)
 EXT(ARB_debug_output                        , dummy_true                             , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_depth_buffer_float                  , ARB_depth_buffer_float                 , GLL, GLC,  x ,  x , 2008)
 EXT(ARB_depth_clamp                         , ARB_depth_clamp                        , GLL, GLC,  x ,  x , 2003)
@@ -56,8 +79,8 @@ EXT(ARB_multi_bind                          , dummy_true
 EXT(ARB_multi_draw_indirect                 , ARB_draw_indirect                      ,  x , GLC,  x ,  x , 2012)
 EXT(ARB_multisample                         , dummy_true                             , GLL,  x ,  x ,  x , 1994)
 EXT(ARB_multitexture                        , dummy_true                             , GLL,  x ,  x ,  x , 1998)
-EXT(ARB_occlusion_query2                    , ARB_occlusion_query2                   , GLL, GLC,  x ,  x , 2003)
 EXT(ARB_occlusion_query                     , ARB_occlusion_query                    , GLL,  x ,  x ,  x , 2001)
+EXT(ARB_occlusion_query2                    , ARB_occlusion_query2                   , GLL, GLC,  x ,  x , 2003)
 EXT(ARB_pipeline_statistics_query           , ARB_pipeline_statistics_query          , GLL, GLC,  x ,  x , 2014)
 EXT(ARB_pixel_buffer_object                 , EXT_pixel_buffer_object                , GLL, GLC,  x ,  x , 2004)
 EXT(ARB_point_parameters                    , EXT_point_parameters                   , GLL,  x ,  x ,  x , 1997)
@@ -83,13 +106,13 @@ EXT(ARB_shader_subroutine                   , ARB_shader_subroutine
 EXT(ARB_shader_texture_image_samples        , ARB_shader_texture_image_samples       , GLL, GLC,  x ,  x , 2014)
 EXT(ARB_shader_texture_lod                  , ARB_shader_texture_lod                 , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_shading_language_100                , dummy_true                             , GLL,  x ,  x ,  x , 2003)
-EXT(ARB_shading_language_packing            , ARB_shading_language_packing           , GLL, GLC,  x ,  x , 2011)
 EXT(ARB_shading_language_420pack            , ARB_shading_language_420pack           , GLL, GLC,  x ,  x , 2011)
+EXT(ARB_shading_language_packing            , ARB_shading_language_packing           , GLL, GLC,  x ,  x , 2011)
 EXT(ARB_shadow                              , ARB_shadow                             , GLL,  x ,  x ,  x , 2001)
 EXT(ARB_stencil_texturing                   , ARB_stencil_texturing                  , GLL, GLC,  x ,  x , 2012)
 EXT(ARB_sync                                , ARB_sync                               , GLL, GLC,  x ,  x , 2003)
-EXT(ARB_texture_barrier                     , NV_texture_barrier                     , GLL, GLC,  x ,  x , 2014)
 EXT(ARB_tessellation_shader                 , ARB_tessellation_shader                ,  x , GLC,  x ,  x , 2009)
+EXT(ARB_texture_barrier                     , NV_texture_barrier                     , GLL, GLC,  x ,  x , 2014)
 EXT(ARB_texture_border_clamp                , ARB_texture_border_clamp               , GLL,  x ,  x ,  x , 2000)
 EXT(ARB_texture_buffer_object               , ARB_texture_buffer_object              ,  x , GLC,  x ,  x , 2008)
 EXT(ARB_texture_buffer_object_rgb32         , ARB_texture_buffer_object_rgb32        ,  x , GLC,  x ,  x , 2009)
@@ -105,20 +128,20 @@ EXT(ARB_texture_env_crossbar                , ARB_texture_env_crossbar
 EXT(ARB_texture_env_dot3                    , ARB_texture_env_dot3                   , GLL,  x ,  x ,  x , 2001)
 EXT(ARB_texture_float                       , ARB_texture_float                      , GLL, GLC,  x ,  x , 2004)
 EXT(ARB_texture_gather                      , ARB_texture_gather                     , GLL, GLC,  x ,  x , 2009)
-EXT(ARB_texture_mirrored_repeat             , dummy_true                             , GLL,  x ,  x ,  x , 2001)
 EXT(ARB_texture_mirror_clamp_to_edge        , ARB_texture_mirror_clamp_to_edge       , GLL, GLC,  x ,  x , 2013)
+EXT(ARB_texture_mirrored_repeat             , dummy_true                             , GLL,  x ,  x ,  x , 2001)
 EXT(ARB_texture_multisample                 , ARB_texture_multisample                , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_texture_non_power_of_two            , ARB_texture_non_power_of_two           , GLL, GLC,  x ,  x , 2003)
 EXT(ARB_texture_query_levels                , ARB_texture_query_levels               , GLL, GLC,  x ,  x , 2012)
 EXT(ARB_texture_query_lod                   , ARB_texture_query_lod                  , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_texture_rectangle                   , NV_texture_rectangle                   , GLL, GLC,  x ,  x , 2004)
-EXT(ARB_texture_rgb10_a2ui                  , ARB_texture_rgb10_a2ui                 , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_texture_rg                          , ARB_texture_rg                         , GLL, GLC,  x ,  x , 2008)
+EXT(ARB_texture_rgb10_a2ui                  , ARB_texture_rgb10_a2ui                 , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_texture_stencil8                    , ARB_texture_stencil8                   , GLL, GLC,  x ,  x , 2013)
 EXT(ARB_texture_storage                     , dummy_true                             , GLL, GLC,  x ,  x , 2011)
 EXT(ARB_texture_storage_multisample         , ARB_texture_multisample                , GLL, GLC,  x ,  x , 2012)
-EXT(ARB_texture_view                        , ARB_texture_view                       , GLL, GLC,  x ,  x , 2012)
 EXT(ARB_texture_swizzle                     , EXT_texture_swizzle                    , GLL, GLC,  x ,  x , 2008)
+EXT(ARB_texture_view                        , ARB_texture_view                       , GLL, GLC,  x ,  x , 2012)
 EXT(ARB_timer_query                         , ARB_timer_query                        , GLL, GLC,  x ,  x , 2010)
 EXT(ARB_transform_feedback2                 , ARB_transform_feedback2                , GLL, GLC,  x ,  x , 2010)
 EXT(ARB_transform_feedback3                 , ARB_transform_feedback3                , GLL, GLC,  x ,  x , 2010)
@@ -127,29 +150,39 @@ EXT(ARB_transpose_matrix                    , dummy_true
 EXT(ARB_uniform_buffer_object               , ARB_uniform_buffer_object              , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GLL, GLC,  x ,  x , 2008)
 EXT(ARB_vertex_array_object                 , dummy_true                             , GLL, GLC,  x ,  x , 2006)
+EXT(ARB_vertex_attrib_64bit                 , ARB_vertex_attrib_64bit                ,  x , GLC,  x ,  x , 2010)
 EXT(ARB_vertex_attrib_binding               , dummy_true                             , GLL, GLC,  x ,  x , 2012)
 EXT(ARB_vertex_buffer_object                , dummy_true                             , GLL,  x ,  x ,  x , 2003)
 EXT(ARB_vertex_program                      , ARB_vertex_program                     , GLL,  x ,  x ,  x , 2002)
 EXT(ARB_vertex_shader                       , ARB_vertex_shader                      , GLL, GLC,  x ,  x , 2002)
-EXT(ARB_vertex_attrib_64bit                 , ARB_vertex_attrib_64bit                ,  x , GLC,  x ,  x , 2010)
 EXT(ARB_vertex_type_10f_11f_11f_rev         , ARB_vertex_type_10f_11f_11f_rev        , GLL, GLC,  x ,  x , 2013)
 EXT(ARB_vertex_type_2_10_10_10_rev          , ARB_vertex_type_2_10_10_10_rev         , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_viewport_array                      , ARB_viewport_array                     ,  x , GLC,  x ,  x , 2010)
 EXT(ARB_window_pos                          , dummy_true                             , GLL,  x ,  x ,  x , 2001)
 
+EXT(ATI_blend_equation_separate             , EXT_blend_equation_separate            , GLL, GLC,  x ,  x , 2003)
+EXT(ATI_draw_buffers                        , dummy_true                             , GLL,  x ,  x ,  x , 2002)
+EXT(ATI_fragment_shader                     , ATI_fragment_shader                    , GLL,  x ,  x ,  x , 2001)
+EXT(ATI_separate_stencil                    , ATI_separate_stencil                   , GLL,  x ,  x ,  x , 2006)
+EXT(ATI_texture_compression_3dc             , ATI_texture_compression_3dc            , GLL,  x ,  x ,  x , 2004)
+EXT(ATI_texture_env_combine3                , ATI_texture_env_combine3               , GLL,  x ,  x ,  x , 2002)
+EXT(ATI_texture_float                       , ARB_texture_float                      , GLL, GLC,  x ,  x , 2002)
+EXT(ATI_texture_mirror_once                 , ATI_texture_mirror_once                , GLL, GLC,  x ,  x , 2006)
+
 EXT(EXT_abgr                                , dummy_true                             , GLL, GLC,  x ,  x , 1995)
 EXT(EXT_bgra                                , dummy_true                             , GLL,  x ,  x ,  x , 1995)
 EXT(EXT_blend_color                         , EXT_blend_color                        , GLL,  x ,  x ,  x , 1995)
 EXT(EXT_blend_equation_separate             , EXT_blend_equation_separate            , GLL, GLC,  x ,  x , 2003)
 EXT(EXT_blend_func_extended                 , ARB_blend_func_extended                ,  x ,  x ,  x , ES2, 2015)
 EXT(EXT_blend_func_separate                 , EXT_blend_func_separate                , GLL,  x ,  x ,  x , 1999)
-EXT(EXT_buffer_storage                      , ARB_buffer_storage                     ,  x ,  x ,  x ,  31, 2015)
-EXT(EXT_discard_framebuffer                 , dummy_true                             ,  x ,  x , ES1, ES2, 2009)
 EXT(EXT_blend_minmax                        , EXT_blend_minmax                       , GLL,  x , ES1, ES2, 1995)
 EXT(EXT_blend_subtract                      , dummy_true                             , GLL,  x ,  x ,  x , 1995)
+EXT(EXT_buffer_storage                      , ARB_buffer_storage                     ,  x ,  x ,  x ,  31, 2015)
+EXT(EXT_color_buffer_float                  , dummy_true                             ,  x ,  x , ES1,  30, 2013)
 EXT(EXT_compiled_vertex_array               , dummy_true                             , GLL,  x ,  x ,  x , 1996)
 EXT(EXT_copy_texture                        , dummy_true                             , GLL,  x ,  x ,  x , 1995)
 EXT(EXT_depth_bounds_test                   , EXT_depth_bounds_test                  , GLL, GLC,  x ,  x , 2002)
+EXT(EXT_discard_framebuffer                 , dummy_true                             ,  x ,  x , ES1, ES2, 2009)
 EXT(EXT_draw_buffers                        , dummy_true                             ,  x ,  x ,  x , ES2, 2012)
 EXT(EXT_draw_buffers2                       , EXT_draw_buffers2                      , GLL, GLC,  x ,  x , 2006)
 EXT(EXT_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          ,  x ,  x ,  x , ES2, 2014)
@@ -173,6 +206,7 @@ EXT(EXT_point_parameters                    , EXT_point_parameters
 EXT(EXT_polygon_offset                      , dummy_true                             , GLL,  x ,  x ,  x , 1995)
 EXT(EXT_polygon_offset_clamp                , EXT_polygon_offset_clamp               , GLL, GLC,  x ,  x , 2014)
 EXT(EXT_provoking_vertex                    , EXT_provoking_vertex                   , GLL, GLC,  x ,  x , 2009)
+EXT(EXT_read_format_bgra                    , dummy_true                             ,  x ,  x , ES1, ES2, 2009)
 EXT(EXT_rescale_normal                      , dummy_true                             , GLL,  x ,  x ,  x , 1997)
 EXT(EXT_secondary_color                     , dummy_true                             , GLL,  x ,  x ,  x , 1999)
 EXT(EXT_separate_shader_objects             , dummy_true                             ,  x ,  x ,  x , ES2, 2013)
@@ -182,11 +216,10 @@ EXT(EXT_shadow_funcs                        , ARB_shadow
 EXT(EXT_stencil_two_side                    , EXT_stencil_two_side                   , GLL,  x ,  x ,  x , 2001)
 EXT(EXT_stencil_wrap                        , dummy_true                             , GLL,  x ,  x ,  x , 2002)
 EXT(EXT_subtexture                          , dummy_true                             , GLL,  x ,  x ,  x , 1995)
+EXT(EXT_texture                             , dummy_true                             , GLL,  x ,  x ,  x , 1996)
 EXT(EXT_texture3D                           , dummy_true                             , GLL,  x ,  x ,  x , 1996)
 EXT(EXT_texture_array                       , EXT_texture_array                      , GLL, GLC,  x ,  x , 2006)
 EXT(EXT_texture_compression_dxt1            , ANGLE_texture_compression_dxt          , GLL, GLC, ES1, ES2, 2004)
-EXT(ANGLE_texture_compression_dxt3          , ANGLE_texture_compression_dxt          , GLL, GLC, ES1, ES2, 2011)
-EXT(ANGLE_texture_compression_dxt5          , ANGLE_texture_compression_dxt          , GLL, GLC, ES1, ES2, 2011)
 EXT(EXT_texture_compression_latc            , EXT_texture_compression_latc           , GLL,  x ,  x ,  x , 2006)
 EXT(EXT_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GLL, GLC,  x ,  x , 2004)
 EXT(EXT_texture_compression_s3tc            , EXT_texture_compression_s3tc           , GLL, GLC,  x ,  x , 2000)
@@ -197,28 +230,66 @@ EXT(EXT_texture_env_combine                 , dummy_true
 EXT(EXT_texture_env_dot3                    , EXT_texture_env_dot3                   , GLL,  x ,  x ,  x , 2000)
 EXT(EXT_texture_filter_anisotropic          , EXT_texture_filter_anisotropic         , GLL, GLC, ES1, ES2, 1999)
 EXT(EXT_texture_format_BGRA8888             , dummy_true                             ,  x ,  x , ES1, ES2, 2005)
-EXT(EXT_texture_rg                          , ARB_texture_rg                         ,  x ,  x ,  x , ES2, 2011)
-EXT(EXT_read_format_bgra                    , dummy_true                             ,  x ,  x , ES1, ES2, 2009)
 EXT(EXT_texture_integer                     , EXT_texture_integer                    , GLL, GLC,  x ,  x , 2006)
 EXT(EXT_texture_lod_bias                    , dummy_true                             , GLL,  x , ES1,  x , 1999)
 EXT(EXT_texture_mirror_clamp                , EXT_texture_mirror_clamp               , GLL, GLC,  x ,  x , 2004)
 EXT(EXT_texture_object                      , dummy_true                             , GLL,  x ,  x ,  x , 1995)
-EXT(EXT_texture                             , dummy_true                             , GLL,  x ,  x ,  x , 1996)
 EXT(EXT_texture_rectangle                   , NV_texture_rectangle                   , GLL,  x ,  x ,  x , 2004)
-EXT(EXT_texture_shared_exponent             , EXT_texture_shared_exponent            , GLL, GLC,  x ,  x , 2004)
-EXT(EXT_texture_snorm                       , EXT_texture_snorm                      , GLL, GLC,  x ,  x , 2009)
+EXT(EXT_texture_rg                          , ARB_texture_rg                         ,  x ,  x ,  x , ES2, 2011)
 EXT(EXT_texture_sRGB                        , EXT_texture_sRGB                       , GLL, GLC,  x ,  x , 2004)
 EXT(EXT_texture_sRGB_decode                 , EXT_texture_sRGB_decode                , GLL, GLC,  x ,  x , 2006)
+EXT(EXT_texture_shared_exponent             , EXT_texture_shared_exponent            , GLL, GLC,  x ,  x , 2004)
+EXT(EXT_texture_snorm                       , EXT_texture_snorm                      , GLL, GLC,  x ,  x , 2009)
 EXT(EXT_texture_swizzle                     , EXT_texture_swizzle                    , GLL, GLC,  x ,  x , 2008)
 EXT(EXT_texture_type_2_10_10_10_REV         , dummy_true                             ,  x ,  x ,  x , ES2, 2008)
 EXT(EXT_timer_query                         , EXT_timer_query                        , GLL, GLC,  x ,  x , 2006)
 EXT(EXT_transform_feedback                  , EXT_transform_feedback                 , GLL, GLC,  x ,  x , 2011)
 EXT(EXT_unpack_subimage                     , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
-EXT(EXT_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GLL, GLC,  x ,  x , 2008)
 EXT(EXT_vertex_array                        , dummy_true                             , GLL,  x ,  x ,  x , 1995)
-EXT(EXT_color_buffer_float                  , dummy_true                             ,  x ,  x , ES1,  30, 2013)
+EXT(EXT_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GLL, GLC,  x ,  x , 2008)
 
+EXT(IBM_multimode_draw_arrays               , dummy_true                             , GLL, GLC,  x ,  x , 1998)
+EXT(IBM_rasterpos_clip                      , dummy_true                             , GLL,  x ,  x ,  x , 1996)
+EXT(IBM_texture_mirrored_repeat             , dummy_true                             , GLL,  x ,  x ,  x , 1998)
 
+EXT(INGR_blend_func_separate                , EXT_blend_func_separate                , GLL,  x ,  x ,  x , 1999)
+
+EXT(INTEL_performance_query                 , INTEL_performance_query                , GLL, GLC,  x , ES2, 2013)
+
+EXT(KHR_context_flush_control               , dummy_true                             , GLL, GLC,  x , ES2, 2014)
+EXT(KHR_debug                               , dummy_true                             , GLL, GLC,  x ,  x , 2012)
+EXT(KHR_texture_compression_astc_hdr        , KHR_texture_compression_astc_hdr       , GLL, GLC,  x , ES2, 2012)
+EXT(KHR_texture_compression_astc_ldr        , KHR_texture_compression_astc_ldr       , GLL, GLC,  x , ES2, 2012)
+
+EXT(MESA_pack_invert                        , MESA_pack_invert                       , GLL, GLC,  x ,  x , 2002)
+EXT(MESA_texture_signed_rgba                , EXT_texture_snorm                      , GLL, GLC,  x ,  x , 2009)
+EXT(MESA_window_pos                         , dummy_true                             , GLL,  x ,  x ,  x , 2000)
+EXT(MESA_ycbcr_texture                      , MESA_ycbcr_texture                     , GLL, GLC,  x ,  x , 2002)
+
+EXT(NV_blend_square                         , dummy_true                             , GLL,  x ,  x ,  x , 1999)
+EXT(NV_conditional_render                   , NV_conditional_render                  , GLL, GLC,  x ,  x , 2008)
+EXT(NV_depth_clamp                          , ARB_depth_clamp                        , GLL, GLC,  x ,  x , 2001)
+EXT(NV_draw_buffers                         , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
+EXT(NV_fbo_color_attachments                , dummy_true                             ,  x ,  x ,  x , ES2, 2010)
+EXT(NV_fog_distance                         , NV_fog_distance                        , GLL,  x ,  x ,  x , 2001)
+EXT(NV_fragment_program_option              , NV_fragment_program_option             , GLL,  x ,  x ,  x , 2005)
+EXT(NV_light_max_exponent                   , dummy_true                             , GLL,  x ,  x ,  x , 1999)
+EXT(NV_packed_depth_stencil                 , dummy_true                             , GLL, GLC,  x ,  x , 2000)
+EXT(NV_point_sprite                         , NV_point_sprite                        , GLL, GLC,  x ,  x , 2001)
+EXT(NV_primitive_restart                    , NV_primitive_restart                   , GLL,  x ,  x ,  x , 2002)
+EXT(NV_read_buffer                          , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
+EXT(NV_read_depth                           , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
+EXT(NV_read_depth_stencil                   , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
+EXT(NV_read_stencil                         , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
+EXT(NV_texgen_reflection                    , dummy_true                             , GLL,  x ,  x ,  x , 1999)
+EXT(NV_texture_barrier                      , NV_texture_barrier                     , GLL, GLC,  x ,  x , 2009)
+EXT(NV_texture_env_combine4                 , NV_texture_env_combine4                , GLL,  x ,  x ,  x , 1999)
+EXT(NV_texture_rectangle                    , NV_texture_rectangle                   , GLL,  x ,  x ,  x , 2000)
+EXT(NV_vdpau_interop                        , NV_vdpau_interop                       , GLL, GLC,  x ,  x , 2010)
+
+EXT(OES_EGL_image                           , OES_EGL_image                          , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
+EXT(OES_EGL_image_external                  , OES_EGL_image_external                 ,  x ,  x , ES1, ES2, 2010)
+EXT(OES_EGL_sync                            , dummy_true                             ,  x ,  x , ES1, ES2, 2010)
 EXT(OES_blend_equation_separate             , EXT_blend_equation_separate            ,  x ,  x , ES1,  x , 2009)
 EXT(OES_blend_func_separate                 , EXT_blend_func_separate                ,  x ,  x , ES1,  x , 2009)
 EXT(OES_blend_subtract                      , dummy_true                             ,  x ,  x , ES1,  x , 2009)
@@ -231,9 +302,6 @@ EXT(OES_depth_texture                       , ARB_depth_texture
 EXT(OES_depth_texture_cube_map              , OES_depth_texture_cube_map             ,  x ,  x ,  x , ES2, 2012)
 EXT(OES_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          ,  x ,  x ,  x , ES2, 2014)
 EXT(OES_draw_texture                        , OES_draw_texture                       ,  x ,  x , ES1,  x , 2004)
-EXT(OES_EGL_sync                            , dummy_true                             ,  x ,  x , ES1, ES2, 2010)
-EXT(OES_EGL_image                           , OES_EGL_image                          , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
-EXT(OES_EGL_image_external                  , OES_EGL_image_external                 ,  x ,  x , ES1, ES2, 2010)
 EXT(OES_element_index_uint                  , dummy_true                             ,  x ,  x , ES1, ES2, 2005)
 EXT(OES_fbo_render_mipmap                   , dummy_true                             ,  x ,  x , ES1, ES2, 2005)
 EXT(OES_fixed_point                         , dummy_true                             ,  x ,  x , ES1,  x , 2002)
@@ -261,73 +329,17 @@ EXT(OES_texture_float_linear                , OES_texture_float_linear
 EXT(OES_texture_half_float                  , OES_texture_half_float                 ,  x ,  x ,  x , ES2, 2005)
 EXT(OES_texture_half_float_linear           , OES_texture_half_float_linear          ,  x ,  x ,  x , ES2, 2005)
 EXT(OES_texture_mirrored_repeat             , dummy_true                             ,  x ,  x , ES1,  x , 2005)
-EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample                ,  x ,  x , ES1,  31, 2014)
 EXT(OES_texture_npot                        , ARB_texture_non_power_of_two           ,  x ,  x , ES1, ES2, 2005)
+EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample                ,  x ,  x , ES1,  31, 2014)
 EXT(OES_vertex_array_object                 , dummy_true                             ,  x ,  x , ES1, ES2, 2010)
 
-
-EXT(KHR_debug                               , dummy_true                             , GLL, GLC,  x ,  x , 2012)
-EXT(KHR_context_flush_control               , dummy_true                             , GLL, GLC,  x , ES2, 2014)
-EXT(KHR_texture_compression_astc_hdr        , KHR_texture_compression_astc_hdr       , GLL, GLC,  x , ES2, 2012)
-EXT(KHR_texture_compression_astc_ldr        , KHR_texture_compression_astc_ldr       , GLL, GLC,  x , ES2, 2012)
-
-
-EXT(3DFX_texture_compression_FXT1           , TDFX_texture_compression_FXT1          , GLL, GLC,  x ,  x , 1999)
-EXT(AMD_conservative_depth                  , ARB_conservative_depth                 , GLL, GLC,  x ,  x , 2009)
-EXT(AMD_draw_buffers_blend                  , ARB_draw_buffers_blend                 , GLL, GLC,  x ,  x , 2009)
-EXT(AMD_performance_monitor                 , AMD_performance_monitor                , GLL, GLC,  x ,  x , 2007)
-EXT(AMD_pinned_memory                       , AMD_pinned_memory                      , GLL, GLC,  x ,  x , 2013)
-EXT(AMD_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GLL, GLC,  x ,  x , 2009)
-EXT(AMD_shader_stencil_export               , ARB_shader_stencil_export              , GLL, GLC,  x ,  x , 2009)
-EXT(AMD_shader_trinary_minmax               , dummy_true                             , GLL, GLC,  x ,  x , 2012)
-EXT(AMD_vertex_shader_layer                 , AMD_vertex_shader_layer                ,  x , GLC,  x ,  x , 2012)
-EXT(AMD_vertex_shader_viewport_index        , AMD_vertex_shader_viewport_index       ,  x , GLC,  x ,  x , 2012)
-EXT(APPLE_object_purgeable                  , APPLE_object_purgeable                 , GLL, GLC,  x ,  x , 2006)
-EXT(APPLE_packed_pixels                     , dummy_true                             , GLL,  x ,  x ,  x , 2002)
-EXT(APPLE_texture_max_level                 , dummy_true                             ,  x ,  x , ES1, ES2, 2009)
-EXT(APPLE_vertex_array_object               , dummy_true                             , GLL,  x ,  x ,  x , 2002)
-EXT(ATI_blend_equation_separate             , EXT_blend_equation_separate            , GLL, GLC,  x ,  x , 2003)
-EXT(ATI_draw_buffers                        , dummy_true                             , GLL,  x ,  x ,  x , 2002)
-EXT(ATI_fragment_shader                     , ATI_fragment_shader                    , GLL,  x ,  x ,  x , 2001)
-EXT(ATI_separate_stencil                    , ATI_separate_stencil                   , GLL,  x ,  x ,  x , 2006)
-EXT(ATI_texture_compression_3dc             , ATI_texture_compression_3dc            , GLL,  x ,  x ,  x , 2004)
-EXT(ATI_texture_env_combine3                , ATI_texture_env_combine3               , GLL,  x ,  x ,  x , 2002)
-EXT(ATI_texture_float                       , ARB_texture_float                      , GLL, GLC,  x ,  x , 2002)
-EXT(ATI_texture_mirror_once                 , ATI_texture_mirror_once                , GLL, GLC,  x ,  x , 2006)
-EXT(IBM_multimode_draw_arrays               , dummy_true                             , GLL, GLC,  x ,  x , 1998)
-EXT(IBM_rasterpos_clip                      , dummy_true                             , GLL,  x ,  x ,  x , 1996)
-EXT(IBM_texture_mirrored_repeat             , dummy_true                             , GLL,  x ,  x ,  x , 1998)
-EXT(INGR_blend_func_separate                , EXT_blend_func_separate                , GLL,  x ,  x ,  x , 1999)
-EXT(INTEL_performance_query                 , INTEL_performance_query                , GLL, GLC,  x , ES2, 2013)
-EXT(MESA_pack_invert                        , MESA_pack_invert                       , GLL, GLC,  x ,  x , 2002)
-EXT(MESA_texture_signed_rgba                , EXT_texture_snorm                      , GLL, GLC,  x ,  x , 2009)
-EXT(MESA_window_pos                         , dummy_true                             , GLL,  x ,  x ,  x , 2000)
-EXT(MESA_ycbcr_texture                      , MESA_ycbcr_texture                     , GLL, GLC,  x ,  x , 2002)
-EXT(NV_blend_square                         , dummy_true                             , GLL,  x ,  x ,  x , 1999)
-EXT(NV_conditional_render                   , NV_conditional_render                  , GLL, GLC,  x ,  x , 2008)
-EXT(NV_depth_clamp                          , ARB_depth_clamp                        , GLL, GLC,  x ,  x , 2001)
-EXT(NV_draw_buffers                         , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
-EXT(NV_fbo_color_attachments                , dummy_true                             ,  x ,  x ,  x , ES2, 2010)
-EXT(NV_fog_distance                         , NV_fog_distance                        , GLL,  x ,  x ,  x , 2001)
-EXT(NV_fragment_program_option              , NV_fragment_program_option             , GLL,  x ,  x ,  x , 2005)
-EXT(NV_light_max_exponent                   , dummy_true                             , GLL,  x ,  x ,  x , 1999)
-EXT(NV_packed_depth_stencil                 , dummy_true                             , GLL, GLC,  x ,  x , 2000)
-EXT(NV_point_sprite                         , NV_point_sprite                        , GLL, GLC,  x ,  x , 2001)
-EXT(NV_primitive_restart                    , NV_primitive_restart                   , GLL,  x ,  x ,  x , 2002)
-EXT(NV_read_buffer                          , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
-EXT(NV_read_depth                           , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
-EXT(NV_read_depth_stencil                   , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
-EXT(NV_read_stencil                         , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
-EXT(NV_texgen_reflection                    , dummy_true                             , GLL,  x ,  x ,  x , 1999)
-EXT(NV_texture_barrier                      , NV_texture_barrier                     , GLL, GLC,  x ,  x , 2009)
-EXT(NV_texture_env_combine4                 , NV_texture_env_combine4                , GLL,  x ,  x ,  x , 1999)
-EXT(NV_texture_rectangle                    , NV_texture_rectangle                   , GLL,  x ,  x ,  x , 2000)
-EXT(NV_vdpau_interop                        , NV_vdpau_interop                       , GLL, GLC,  x ,  x , 2010)
 EXT(S3_s3tc                                 , ANGLE_texture_compression_dxt          , GLL, GLC,  x ,  x , 1999)
+
 EXT(SGIS_generate_mipmap                    , dummy_true                             , GLL,  x ,  x ,  x , 1997)
 EXT(SGIS_texture_border_clamp               , ARB_texture_border_clamp               , GLL,  x ,  x ,  x , 1997)
 EXT(SGIS_texture_edge_clamp                 , dummy_true                             , GLL,  x ,  x ,  x , 1997)
 EXT(SGIS_texture_lod                        , dummy_true                             , GLL,  x ,  x ,  x , 1997)
+
 EXT(SUN_multi_draw_arrays                   , dummy_true                             , GLL,  x ,  x ,  x , 1999)
 #undef GLL
 #undef GLC

From e8c5ef3ecaafae0ad6c300019c489401a9af714c Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Wed, 18 Nov 2015 15:01:44 -0800
Subject: [PATCH 114/335] mesa: Add test for sorted extension table

Enable developers to know if the table's alphabetical sorting
is maintained or lost.

v2: Move "*" next to pointer name (Matt)
    Include extensions_table.h instead of extensions.h (Ian)
    Remove extra " *" in comment (Ian)

Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/main/tests/Makefile.am         |  1 +
 src/mesa/main/tests/mesa_extensions.cpp | 51 +++++++++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 src/mesa/main/tests/mesa_extensions.cpp

diff --git a/src/mesa/main/tests/Makefile.am b/src/mesa/main/tests/Makefile.am
index bd7ab7365c0..d6977e20e85 100644
--- a/src/mesa/main/tests/Makefile.am
+++ b/src/mesa/main/tests/Makefile.am
@@ -27,6 +27,7 @@ AM_CPPFLAGS += -DHAVE_SHARED_GLAPI
 main_test_SOURCES +=			\
 	dispatch_sanity.cpp		\
 	mesa_formats.cpp			\
+	mesa_extensions.cpp			\
 	program_state_string.cpp
 
 main_test_LDADD += \
diff --git a/src/mesa/main/tests/mesa_extensions.cpp b/src/mesa/main/tests/mesa_extensions.cpp
new file mode 100644
index 00000000000..0c7addd4282
--- /dev/null
+++ b/src/mesa/main/tests/mesa_extensions.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \name mesa_extensions.cpp
+ *
+ * Verify that the extensions table is sorted.
+ */
+
+#include <gtest/gtest.h>
+#include "util/macros.h"
+
+/**
+ * Debug/test: verify the extension table is alphabetically sorted.
+ */
+TEST(MesaExtensionsTest, AlphabeticallySorted)
+{
+   const char *ext_names[] = {
+   #define EXT(name_str, ...) #name_str,
+   #include "main/extensions_table.h"
+   #undef EXT
+   };
+
+   for (unsigned i = 0; i < ARRAY_SIZE(ext_names) - 1; ++i) {
+      const char *current_str = ext_names[i];
+      const char *next_str = ext_names[i+1];
+
+      /* We expect the extension table to be alphabetically sorted */
+      ASSERT_LT(strcmp(current_str, next_str), 0);
+   }
+}

From c15a407eb49d3b26bdbf039816636adb184c276a Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 2 Nov 2015 10:29:45 -0800
Subject: [PATCH 115/335] i965: Make brw_imm_vf4() take 8-bit restricted
 floats.

This partially reverts commit bbf8239f92ecd79431dfa41402e1c85318e7267f.

I didn't like that commit to begin with -- computing things at compile
time is fine -- but for purposes of verifying that the resulting values
are correct, looking up 0x00 and 0x30 in a table is a lot better than
evaluating a recursive function.

Anyway, by making brw_imm_vf4() take the actual 8-bit restricted floats
directly (instead of only integral values that would be converted to
restricted float), we can use this function as a replacement for the
vector float src_reg/fs_reg constructors.

brw_float_to_vf() is not currently an inline function, so it will not be
evaluated at compile time. I'll address that in a follow-up patch.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_clip_util.c |  5 ++-
 src/mesa/drivers/dri/i965/brw_reg.h       | 38 +++++------------------
 2 files changed, 11 insertions(+), 32 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c
index 40ad14402a7..73ba85e2a61 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_util.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
@@ -224,7 +224,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
               vec1(t_nopersp),
               brw_imm_f(0));
       brw_IF(p, BRW_EXECUTE_1);
-      brw_MOV(p, t_nopersp, brw_imm_vf4(1, 0, 0, 0));
+      brw_MOV(p, t_nopersp, brw_imm_vf4(brw_float_to_vf(1.0),
+                                        brw_float_to_vf(0.0),
+                                        brw_float_to_vf(0.0),
+                                        brw_float_to_vf(0.0)));
       brw_ENDIF(p);
 
       /* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it. */
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index 3da83b43b5d..e34e7ea0a52 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -43,7 +43,6 @@
 #define BRW_REG_H
 
 #include <stdbool.h>
-#include "main/imports.h"
 #include "main/compiler.h"
 #include "main/macros.h"
 #include "program/prog_instruction.h"
@@ -638,38 +637,15 @@ brw_imm_vf(unsigned v)
    return imm;
 }
 
-/**
- * Convert an integer into a "restricted" 8-bit float, used in vector
- * immediates.  The 8-bit floating point format has a sign bit, an
- * excess-3 3-bit exponent, and a 4-bit mantissa.  All integer values
- * from -31 to 31 can be represented exactly.
- */
-static inline uint8_t
-int_to_float8(int x)
-{
-   if (x == 0) {
-      return 0;
-   } else if (x < 0) {
-      return 1 << 7 | int_to_float8(-x);
-   } else {
-      const unsigned exponent = _mesa_logbase2(x);
-      const unsigned mantissa = (x - (1 << exponent)) << (4 - exponent);
-      assert(exponent <= 4);
-      return (exponent + 3) << 4 | mantissa;
-   }
-}
-
-/**
- * Construct a floating-point packed vector immediate from its integer
- * values. \sa int_to_float8()
- */
 static inline struct brw_reg
-brw_imm_vf4(int v0, int v1, int v2, int v3)
+brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
 {
-   return brw_imm_vf((int_to_float8(v0) << 0) |
-                     (int_to_float8(v1) << 8) |
-                     (int_to_float8(v2) << 16) |
-                     (int_to_float8(v3) << 24));
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+   imm.vstride = BRW_VERTICAL_STRIDE_0;
+   imm.width = BRW_WIDTH_4;
+   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+   imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
+   return imm;
 }
 
 

From 3ccc41ecfc5e9345a1c291748d8840984f7413ae Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 2 Nov 2015 11:26:16 -0800
Subject: [PATCH 116/335] i965/fs: Replace fs_reg(imm) constructors with
 brw_imm_*().

Cuts 10k of .text, of which only 776 bytes are the fs_reg constructor
implementations themselves.

   text     data      bss      dec      hex  filename
5204535   214112    27784  5446431   531b1f  i965_dri.so before
5193977   214112    27784  5435873   52f1e1  i965_dri.so after

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 .../drivers/dri/i965/brw_blorp_blit_eu.cpp    |   2 +-
 src/mesa/drivers/dri/i965/brw_fs.cpp          | 100 +++---------
 src/mesa/drivers/dri/i965/brw_fs_builder.h    |   4 +-
 .../dri/i965/brw_fs_combine_constants.cpp     |   2 +-
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp      | 150 +++++++++---------
 .../dri/i965/brw_fs_surface_builder.cpp       |  49 +++---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp  |  42 ++---
 src/mesa/drivers/dri/i965/brw_ir_fs.h         |   5 -
 .../dri/i965/test_fs_cmod_propagation.cpp     |  30 ++--
 9 files changed, 167 insertions(+), 217 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index 5308d175416..e684bdbb72c 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -85,7 +85,7 @@ brw_blorp_eu_emitter::emit_texture_lookup(const struct brw_reg &dst,
                                           unsigned msg_length)
 {
    fs_inst *inst = new (mem_ctx) fs_inst(op, 16, dst, brw_message_reg(base_mrf),
-                                         fs_reg(0u));
+                                         brw_imm_ud(0u));
 
    inst->base_mrf = base_mrf;
    inst->mlen = msg_length;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 72a21587a4f..e9c990d4308 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -186,7 +186,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
     * the redundant ones.
     */
    fs_reg vec4_offset = vgrf(glsl_type::int_type);
-   bld.ADD(vec4_offset, varying_offset, fs_reg(const_offset & ~3));
+   bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~3));
 
    int scale = 1;
    if (devinfo->gen == 4 && bld.dispatch_width() == 8) {
@@ -374,54 +374,6 @@ fs_reg::fs_reg()
    this->file = BAD_FILE;
 }
 
-/** Immediate value constructor. */
-fs_reg::fs_reg(float f)
-{
-   init();
-   this->file = IMM;
-   this->type = BRW_REGISTER_TYPE_F;
-   this->stride = 0;
-   this->f = f;
-}
-
-/** Immediate value constructor. */
-fs_reg::fs_reg(int32_t i)
-{
-   init();
-   this->file = IMM;
-   this->type = BRW_REGISTER_TYPE_D;
-   this->stride = 0;
-   this->d = i;
-}
-
-/** Immediate value constructor. */
-fs_reg::fs_reg(uint32_t u)
-{
-   init();
-   this->file = IMM;
-   this->type = BRW_REGISTER_TYPE_UD;
-   this->stride = 0;
-   this->ud = u;
-}
-
-/** Vector float immediate value constructor. */
-fs_reg::fs_reg(uint8_t vf[4])
-{
-   init();
-   this->file = IMM;
-   this->type = BRW_REGISTER_TYPE_VF;
-   memcpy(&this->ud, vf, sizeof(unsigned));
-}
-
-/** Vector float immediate value constructor. */
-fs_reg::fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
-{
-   init();
-   this->file = IMM;
-   this->type = BRW_REGISTER_TYPE_VF;
-   this->ud = (vf0 <<  0) | (vf1 <<  8) | (vf2 << 16) | (vf3 << 24);
-}
-
 fs_reg::fs_reg(struct brw_reg reg) :
    backend_reg(reg)
 {
@@ -590,7 +542,7 @@ fs_visitor::emit_shader_time_end()
    fs_reg reset = shader_end_time;
    reset.set_smear(2);
    set_condmod(BRW_CONDITIONAL_Z,
-               ibld.AND(ibld.null_reg_ud(), reset, fs_reg(1u)));
+               ibld.AND(ibld.null_reg_ud(), reset, brw_imm_ud(1u)));
    ibld.IF(BRW_PREDICATE_NORMAL);
 
    fs_reg start = shader_start_time;
@@ -605,11 +557,11 @@ fs_visitor::emit_shader_time_end()
     * is 2 cycles.  Remove that overhead, so I can forget about that when
     * trying to determine the time taken for single instructions.
     */
-   cbld.ADD(diff, diff, fs_reg(-2u));
+   cbld.ADD(diff, diff, brw_imm_ud(-2u));
    SHADER_TIME_ADD(cbld, 0, diff);
-   SHADER_TIME_ADD(cbld, 1, fs_reg(1u));
+   SHADER_TIME_ADD(cbld, 1, brw_imm_ud(1u));
    ibld.emit(BRW_OPCODE_ELSE);
-   SHADER_TIME_ADD(cbld, 2, fs_reg(1u));
+   SHADER_TIME_ADD(cbld, 2, brw_imm_ud(1u));
    ibld.emit(BRW_OPCODE_ENDIF);
 }
 
@@ -619,7 +571,7 @@ fs_visitor::SHADER_TIME_ADD(const fs_builder &bld,
                             fs_reg value)
 {
    int index = shader_time_index * 3 + shader_time_subindex;
-   fs_reg offset = fs_reg(index * SHADER_TIME_STRIDE);
+   struct brw_reg offset = brw_imm_d(index * SHADER_TIME_STRIDE);
 
    fs_reg payload;
    if (dispatch_width == 8)
@@ -1032,7 +984,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
    if (pixel_center_integer) {
       bld.MOV(wpos, this->pixel_x);
    } else {
-      bld.ADD(wpos, this->pixel_x, fs_reg(0.5f));
+      bld.ADD(wpos, this->pixel_x, brw_imm_f(0.5f));
    }
    wpos = offset(wpos, bld, 1);
 
@@ -1048,7 +1000,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
 	 offset += key->drawable_height - 1.0f;
       }
 
-      bld.ADD(wpos, pixel_y, fs_reg(offset));
+      bld.ADD(wpos, pixel_y, brw_imm_f(offset));
    }
    wpos = offset(wpos, bld, 1);
 
@@ -1225,7 +1177,7 @@ fs_visitor::emit_frontfacing_interpolation()
       fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W));
       g0.negate = true;
 
-      bld.ASR(*reg, g0, fs_reg(15));
+      bld.ASR(*reg, g0, brw_imm_d(15));
    } else {
       /* Bit 31 of g1.6 is 0 if the polygon is front facing. We want to create
        * a boolean result from this (1/true or 0/false).
@@ -1240,7 +1192,7 @@ fs_visitor::emit_frontfacing_interpolation()
       fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D));
       g1_6.negate = true;
 
-      bld.ASR(*reg, g1_6, fs_reg(31));
+      bld.ASR(*reg, g1_6, brw_imm_d(31));
    }
 
    return reg;
@@ -1257,7 +1209,7 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos)
       /* Convert int_sample_pos to floating point */
       bld.MOV(dst, int_sample_pos);
       /* Scale to the range [0, 1] */
-      bld.MUL(dst, dst, fs_reg(1 / 16.0f));
+      bld.MUL(dst, dst, brw_imm_f(1 / 16.0f));
    }
    else {
       /* From ARB_sample_shading specification:
@@ -1265,7 +1217,7 @@ fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos)
        *  rasterization is disabled, gl_SamplePosition will always be
        *  (0.5, 0.5).
        */
-      bld.MOV(dst, fs_reg(0.5f));
+      bld.MOV(dst, brw_imm_f(0.5f));
    }
 }
 
@@ -1360,8 +1312,8 @@ fs_visitor::emit_sampleid_setup()
 
       abld.exec_all().group(1, 0)
           .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
-               fs_reg(sspi_mask));
-      abld.exec_all().group(1, 0).SHR(t1, t1, fs_reg(5));
+               brw_imm_ud(sspi_mask));
+      abld.exec_all().group(1, 0).SHR(t1, t1, brw_imm_d(5));
 
       /* This works for both SIMD8 and SIMD16 */
       abld.exec_all().group(4, 0)
@@ -1376,7 +1328,7 @@ fs_visitor::emit_sampleid_setup()
        * "When rendering to a non-multisample buffer, or if multisample
        *  rasterization is disabled, gl_SampleID will always be zero."
        */
-      abld.MOV(*reg, fs_reg(0));
+      abld.MOV(*reg, brw_imm_d(0));
    }
 
    return reg;
@@ -2047,16 +1999,16 @@ fs_visitor::demote_pull_constants()
          /* Generate a pull load into dst. */
          if (inst->src[i].reladdr) {
             VARYING_PULL_CONSTANT_LOAD(ibld, dst,
-                                       fs_reg(index),
+                                       brw_imm_ud(index),
                                        *inst->src[i].reladdr,
                                        pull_index);
             inst->src[i].reladdr = NULL;
             inst->src[i].stride = 1;
          } else {
             const fs_builder ubld = ibld.exec_all().group(8, 0);
-            fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
+            struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
             ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
-                      dst, fs_reg(index), offset);
+                      dst, brw_imm_ud(index), offset);
             inst->src[i].set_smear(pull_index & 3);
          }
          brw_mark_surface_used(prog_data, index);
@@ -2748,7 +2700,7 @@ fs_visitor::eliminate_find_live_channel()
       case SHADER_OPCODE_FIND_LIVE_CHANNEL:
          if (depth == 0) {
             inst->opcode = BRW_OPCODE_MOV;
-            inst->src[0] = fs_reg(0u);
+            inst->src[0] = brw_imm_ud(0u);
             inst->sources = 1;
             inst->force_writemask_all = true;
             progress = true;
@@ -3660,7 +3612,7 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op,
        (has_lod || shadow_c.file != BAD_FILE ||
         (op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8))) {
       for (unsigned i = coord_components; i < 3; i++)
-         bld.MOV(offset(msg_end, bld, i), fs_reg(0.0f));
+         bld.MOV(offset(msg_end, bld, i), brw_imm_f(0.0f));
 
       msg_end = offset(msg_end, bld, 3 - coord_components);
    }
@@ -3717,7 +3669,7 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op,
          /* There's no plain shadow compare message, so we use shadow
           * compare with a bias of 0.0.
           */
-         bld.MOV(msg_end, fs_reg(0.0f));
+         bld.MOV(msg_end, brw_imm_f(0.0f));
          msg_end = offset(msg_end, bld, 1);
       }
 
@@ -3811,7 +3763,7 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op,
    case SHADER_OPCODE_TXF_CMS:
       msg_lod = offset(msg_coords, bld, 3);
       /* lod */
-      bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u));
+      bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
       /* sample index */
       bld.MOV(retype(offset(msg_lod, bld, 1), BRW_REGISTER_TYPE_UD), sample_index);
       msg_end = offset(msg_lod, bld, 2);
@@ -3891,7 +3843,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
    if (bld.shader->stage != MESA_SHADER_FRAGMENT &&
        op == SHADER_OPCODE_TEX) {
       op = SHADER_OPCODE_TXL;
-      lod = fs_reg(0.0f);
+      lod = brw_imm_f(0.0f);
    }
 
    /* Set up the LOD info */
@@ -4102,7 +4054,7 @@ emit_surface_header(const fs_builder &bld, const fs_reg &sample_mask)
 {
    fs_builder ubld = bld.exec_all().group(8, 0);
    const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD);
-   ubld.MOV(dst, fs_reg(0));
+   ubld.MOV(dst, brw_imm_d(0));
    ubld.MOV(component(dst, 7), sample_mask);
    return dst;
 }
@@ -4244,7 +4196,7 @@ fs_visitor::lower_logical_sends()
       case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
          lower_surface_logical_send(ibld, inst,
                                     SHADER_OPCODE_TYPED_SURFACE_READ,
-                                    fs_reg(0xffff));
+                                    brw_imm_d(0xffff));
          break;
 
       case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
@@ -5233,7 +5185,7 @@ fs_visitor::run_gs()
        */
       if (gs_compile->control_data_header_size_bits <= 32) {
          const fs_builder abld = bld.annotate("initialize control data bits");
-         abld.MOV(this->control_data_bits, fs_reg(0u));
+         abld.MOV(this->control_data_bits, brw_imm_ud(0u));
       }
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index 22b2f22073f..dd3c383a17d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -225,7 +225,7 @@ namespace brw {
       sample_mask_reg() const
       {
          if (shader->stage != MESA_SHADER_FRAGMENT) {
-            return src_reg(0xffff);
+            return brw_imm_d(0xffff);
          } else if (((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill) {
             return brw_flag_reg(0, 1);
          } else {
@@ -548,7 +548,7 @@ namespace brw {
             const dst_reg x_times_one_minus_a = vgrf(dst.type);
 
             MUL(y_times_a, y, a);
-            ADD(one_minus_a, negate(a), src_reg(1.0f));
+            ADD(one_minus_a, negate(a), brw_imm_f(1.0f));
             MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
             return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
          }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
index 0c115f50748..c3ad7ad4771 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
@@ -279,7 +279,7 @@ fs_visitor::opt_combine_constants()
                       imm->block->last_non_control_flow_inst()->next);
       const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1, 0);
 
-      ibld.MOV(reg, fs_reg(imm->val));
+      ibld.MOV(reg, brw_imm_f(imm->val));
       imm->nr = reg.nr;
       imm->subreg_offset = reg.subreg_offset;
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index ebdcb3a4246..8364bbfc0f4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -208,7 +208,7 @@ emit_system_values_block(nir_block *block, void *void_visitor)
             const fs_builder abld = v->bld.annotate("gl_InvocationID", NULL);
             fs_reg g1(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
             fs_reg iid = abld.vgrf(BRW_REGISTER_TYPE_UD, 1);
-            abld.SHR(iid, g1, fs_reg(27u));
+            abld.SHR(iid, g1, brw_imm_ud(27u));
             *reg = iid;
          }
          break;
@@ -454,7 +454,7 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
       tmp.subreg_offset = 2;
       tmp.stride = 2;
 
-      fs_inst *or_inst = bld.OR(tmp, g0, fs_reg(0x3f80));
+      fs_inst *or_inst = bld.OR(tmp, g0, brw_imm_d(0x3f80));
       or_inst->src[1].type = BRW_REGISTER_TYPE_UW;
 
       tmp.type = BRW_REGISTER_TYPE_D;
@@ -479,9 +479,9 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
          g1_6.negate = true;
       }
 
-      bld.OR(tmp, g1_6, fs_reg(0x3f800000));
+      bld.OR(tmp, g1_6, brw_imm_d(0x3f800000));
    }
-   bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, fs_reg(0xbf800000));
+   bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, brw_imm_d(0xbf800000));
 
    return true;
 }
@@ -594,14 +594,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
          * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
          * zero.
          */
-      bld.CMP(bld.null_reg_f(), op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ);
+      bld.CMP(bld.null_reg_f(), op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ);
 
       fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
       op[0].type = BRW_REGISTER_TYPE_UD;
       result.type = BRW_REGISTER_TYPE_UD;
-      bld.AND(result_int, op[0], fs_reg(0x80000000u));
+      bld.AND(result_int, op[0], brw_imm_ud(0x80000000u));
 
-      inst = bld.OR(result_int, result_int, fs_reg(0x3f800000u));
+      inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f800000u));
       inst->predicate = BRW_PREDICATE_NORMAL;
       if (instr->dest.saturate) {
          inst = bld.MOV(result, result);
@@ -615,9 +615,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
        *               -> non-negative val generates 0x00000000.
        *  Predicated OR sets 1 if val is positive.
        */
-      bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_G);
-      bld.ASR(result, op[0], fs_reg(31));
-      inst = bld.OR(result, result, fs_reg(1));
+      bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_G);
+      bld.ASR(result, op[0], brw_imm_d(31));
+      inst = bld.OR(result, result, brw_imm_d(1));
       inst->predicate = BRW_PREDICATE_NORMAL;
       break;
 
@@ -665,21 +665,21 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
    case nir_op_fddy:
       if (fs_key->high_quality_derivatives) {
          inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0],
-                         fs_reg(fs_key->render_to_fbo));
+                         brw_imm_d(fs_key->render_to_fbo));
       } else {
          inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0],
-                         fs_reg(fs_key->render_to_fbo));
+                         brw_imm_d(fs_key->render_to_fbo));
       }
       inst->saturate = instr->dest.saturate;
       break;
    case nir_op_fddy_fine:
       inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0],
-                      fs_reg(fs_key->render_to_fbo));
+                      brw_imm_d(fs_key->render_to_fbo));
       inst->saturate = instr->dest.saturate;
       break;
    case nir_op_fddy_coarse:
       inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0],
-                      fs_reg(fs_key->render_to_fbo));
+                      brw_imm_d(fs_key->render_to_fbo));
       inst->saturate = instr->dest.saturate;
       break;
 
@@ -828,10 +828,10 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
       break;
 
    case nir_op_f2b:
-      bld.CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ);
+      bld.CMP(result, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ);
       break;
    case nir_op_i2b:
-      bld.CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ);
+      bld.CMP(result, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ);
       break;
 
    case nir_op_ftrunc:
@@ -931,9 +931,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
        * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
        * subtract the result from 31 to convert the MSB count into an LSB count.
        */
-      bld.CMP(bld.null_reg_d(), result, fs_reg(-1), BRW_CONDITIONAL_NZ);
+      bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
 
-      inst = bld.ADD(result, result, fs_reg(31));
+      inst = bld.ADD(result, result, brw_imm_d(31));
       inst->predicate = BRW_PREDICATE_NORMAL;
       inst->src[0].negate = true;
       break;
@@ -986,7 +986,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
       if (optimize_frontfacing_ternary(instr, result))
          return;
 
-      bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_NZ);
+      bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ);
       inst = bld.SEL(result, op[1], op[2]);
       inst->predicate = BRW_PREDICATE_NORMAL;
       break;
@@ -1001,7 +1001,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
    if (devinfo->gen <= 5 &&
        (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == BRW_NIR_BOOLEAN_NEEDS_RESOLVE) {
       fs_reg masked = vgrf(glsl_type::int_type);
-      bld.AND(masked, result, fs_reg(1));
+      bld.AND(masked, result, brw_imm_d(1));
       masked.negate = true;
       bld.MOV(retype(result, BRW_REGISTER_TYPE_D), masked);
    }
@@ -1014,7 +1014,7 @@ fs_visitor::nir_emit_load_const(const fs_builder &bld,
    fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, instr->def.num_components);
 
    for (unsigned i = 0; i < instr->def.num_components; i++)
-      bld.MOV(offset(reg, bld, i), fs_reg(instr->value.i[i]));
+      bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i[i]));
 
    nir_ssa_values[instr->def.index] = reg;
 }
@@ -1042,7 +1042,7 @@ fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg,
 
       reg.reladdr = new(v->mem_ctx) fs_reg(v->vgrf(glsl_type::int_type));
       v->bld.MUL(*reg.reladdr, v->get_nir_src(*indirect),
-                 fs_reg(multiplier));
+                 brw_imm_d(multiplier));
    }
 
    return reg;
@@ -1108,12 +1108,12 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
              */
             bld.emit_minmax(tmp, retype(get_nir_src(deref_array->indirect),
                                         BRW_REGISTER_TYPE_UD),
-                            fs_reg(size - base - 1), BRW_CONDITIONAL_L);
+                            brw_imm_ud(size - base - 1), BRW_CONDITIONAL_L);
          } else {
             bld.MOV(tmp, get_nir_src(deref_array->indirect));
          }
 
-         bld.MUL(tmp, tmp, fs_reg(element_size));
+         bld.MUL(tmp, tmp, brw_imm_ud(element_size));
          if (image.reladdr)
             bld.ADD(*image.reladdr, *image.reladdr, tmp);
          else
@@ -1232,7 +1232,7 @@ intexp2(const fs_builder &bld, const fs_reg &x)
    fs_reg result = bld.vgrf(x.type, 1);
    fs_reg one = bld.vgrf(x.type, 1);
 
-   bld.MOV(one, retype(fs_reg(1), one.type));
+   bld.MOV(one, retype(brw_imm_d(1), one.type));
    bld.SHL(result, one, x);
    return result;
 }
@@ -1285,7 +1285,7 @@ fs_visitor::emit_gs_end_primitive(const nir_src &vertex_count_nir_src)
 
    /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */
    fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
-   abld.ADD(prev_count, vertex_count, fs_reg(0xffffffffu));
+   abld.ADD(prev_count, vertex_count, brw_imm_ud(0xffffffffu));
    fs_reg mask = intexp2(abld, prev_count);
    /* Note: we're relying on the fact that the GEN SHL instruction only pays
     * attention to the lower 5 bits of its second source argument, so on this
@@ -1356,26 +1356,26 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
    if (opcode != SHADER_OPCODE_URB_WRITE_SIMD8) {
       fs_reg dword_index = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
       fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
-      abld.ADD(prev_count, vertex_count, fs_reg(0xffffffffu));
+      abld.ADD(prev_count, vertex_count, brw_imm_ud(0xffffffffu));
       unsigned log2_bits_per_vertex =
          _mesa_fls(gs_compile->control_data_bits_per_vertex);
-      abld.SHR(dword_index, prev_count, fs_reg(6u - log2_bits_per_vertex));
+      abld.SHR(dword_index, prev_count, brw_imm_ud(6u - log2_bits_per_vertex));
 
       if (per_slot_offset.file != BAD_FILE) {
          /* Set the per-slot offset to dword_index / 4, so that we'll write to
           * the appropriate OWord within the control data header.
           */
-         abld.SHR(per_slot_offset, dword_index, fs_reg(2u));
+         abld.SHR(per_slot_offset, dword_index, brw_imm_ud(2u));
       }
 
       /* Set the channel masks to 1 << (dword_index % 4), so that we'll
        * write to the appropriate DWORD within the OWORD.
        */
       fs_reg channel = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
-      fwa_bld.AND(channel, dword_index, fs_reg(3u));
+      fwa_bld.AND(channel, dword_index, brw_imm_ud(3u));
       channel_mask = intexp2(fwa_bld, channel);
       /* Then the channel masks need to be in bits 23:16. */
-      fwa_bld.SHL(channel_mask, channel_mask, fs_reg(16u));
+      fwa_bld.SHL(channel_mask, channel_mask, brw_imm_ud(16u));
    }
 
    /* Store the control data bits in the message payload and send it. */
@@ -1435,11 +1435,11 @@ fs_visitor::set_gs_stream_control_data_bits(const fs_reg &vertex_count,
 
    /* reg::sid = stream_id */
    fs_reg sid = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
-   abld.MOV(sid, fs_reg(stream_id));
+   abld.MOV(sid, brw_imm_ud(stream_id));
 
    /* reg:shift_count = 2 * (vertex_count - 1) */
    fs_reg shift_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
-   abld.SHL(shift_count, vertex_count, fs_reg(1u));
+   abld.SHL(shift_count, vertex_count, brw_imm_ud(1u));
 
    /* Note: we're relying on the fact that the GEN SHL instruction only pays
     * attention to the lower 5 bits of its second source argument, so on this
@@ -1510,14 +1510,14 @@ fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src,
        */
       fs_inst *inst =
          abld.AND(bld.null_reg_d(), vertex_count,
-                  fs_reg(32u / gs_compile->control_data_bits_per_vertex - 1u));
+                  brw_imm_ud(32u / gs_compile->control_data_bits_per_vertex - 1u));
       inst->conditional_mod = BRW_CONDITIONAL_Z;
 
       abld.IF(BRW_PREDICATE_NORMAL);
       /* If vertex_count is 0, then no control data bits have been
        * accumulated yet, so we can skip emitting them.
        */
-      abld.CMP(bld.null_reg_d(), vertex_count, fs_reg(0u),
+      abld.CMP(bld.null_reg_d(), vertex_count, brw_imm_ud(0u),
                BRW_CONDITIONAL_NEQ);
       abld.IF(BRW_PREDICATE_NORMAL);
       emit_gs_control_data_bits(vertex_count);
@@ -1530,7 +1530,7 @@ fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src,
        * effect of any call to EndPrimitive() that the shader may have
        * made before outputting its first vertex.
        */
-      inst = abld.MOV(this->control_data_bits, fs_reg(0u));
+      inst = abld.MOV(this->control_data_bits, brw_imm_ud(0u));
       inst->force_writemask_all = true;
       abld.emit(BRW_OPCODE_ENDIF);
    }
@@ -1613,7 +1613,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
          /* sequence = <7, 6, 5, 4, 3, 2, 1, 0> */
          bld.MOV(sequence, fs_reg(brw_imm_v(0x76543210)));
          /* channel_offsets = 4 * sequence = <28, 24, 20, 16, 12, 8, 4, 0> */
-         bld.SHL(channel_offsets, sequence, fs_reg(2u));
+         bld.SHL(channel_offsets, sequence, brw_imm_ud(2u));
          /* Convert vertex_index to bytes (multiply by 32) */
          bld.SHL(vertex_offset_bytes,
                  retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD),
@@ -1627,7 +1627,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
          bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle,
                   fs_reg(brw_vec8_grf(first_icp_handle, 0)),
                   fs_reg(icp_offset_bytes),
-                  fs_reg(nir->info.gs.vertices_in * REG_SIZE));
+                  brw_imm_ud(nir->info.gs.vertices_in * REG_SIZE));
       }
 
       fs_inst *inst;
@@ -1797,7 +1797,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
       fs_inst *cmp;
       if (instr->intrinsic == nir_intrinsic_discard_if) {
          cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]),
-                       fs_reg(0), BRW_CONDITIONAL_Z);
+                       brw_imm_d(0), BRW_CONDITIONAL_Z);
       } else {
          fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
                                        BRW_REGISTER_TYPE_UW));
@@ -1845,7 +1845,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
                                       FS_OPCODE_INTERPOLATE_AT_CENTROID,
                                       dst_xy,
                                       fs_reg(), /* src */
-                                      fs_reg(0u),
+                                      brw_imm_ud(0u),
                                       interpolation);
          break;
 
@@ -1859,7 +1859,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
                                          FS_OPCODE_INTERPOLATE_AT_SAMPLE,
                                          dst_xy,
                                          fs_reg(), /* src */
-                                         fs_reg(msg_data),
+                                         brw_imm_ud(msg_data),
                                          interpolation);
          } else {
             const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
@@ -1868,7 +1868,8 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
             if (nir_src_is_dynamically_uniform(instr->src[0])) {
                const fs_reg sample_id = bld.emit_uniformize(sample_src);
                const fs_reg msg_data = vgrf(glsl_type::uint_type);
-               bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
+               bld.exec_all().group(1, 0)
+                  .SHL(msg_data, sample_id, brw_imm_ud(4u));
                emit_pixel_interpolater_send(bld,
                                             FS_OPCODE_INTERPOLATE_AT_SAMPLE,
                                             dst_xy,
@@ -1894,7 +1895,8 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
                        sample_src, sample_id,
                        BRW_CONDITIONAL_EQ);
                const fs_reg msg_data = vgrf(glsl_type::uint_type);
-               bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
+               bld.exec_all().group(1, 0)
+                  .SHL(msg_data, sample_id, brw_imm_ud(4u));
                fs_inst *inst =
                   emit_pixel_interpolater_send(bld,
                                                FS_OPCODE_INTERPOLATE_AT_SAMPLE,
@@ -1925,7 +1927,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
                                          FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
                                          dst_xy,
                                          fs_reg(), /* src */
-                                         fs_reg(off_x | (off_y << 4)),
+                                         brw_imm_ud(off_x | (off_y << 4)),
                                          interpolation);
          } else {
             fs_reg src = vgrf(glsl_type::ivec2_type);
@@ -1933,7 +1935,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
                                        BRW_REGISTER_TYPE_F);
             for (int i = 0; i < 2; i++) {
                fs_reg temp = vgrf(glsl_type::float_type);
-               bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f));
+               bld.MUL(temp, offset(offset_src, bld, i), brw_imm_f(16.0f));
                fs_reg itemp = vgrf(glsl_type::int_type);
                bld.MOV(itemp, temp);  /* float to int */
 
@@ -1953,7 +1955,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
                 * FRAGMENT_INTERPOLATION_OFFSET_BITS"
                 */
                set_condmod(BRW_CONDITIONAL_L,
-                           bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
+                           bld.SEL(offset(src, bld, i), itemp, brw_imm_d(7)));
             }
 
             const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
@@ -1961,7 +1963,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
                                          opcode,
                                          dst_xy,
                                          src,
-                                         fs_reg(0u),
+                                         brw_imm_ud(0u),
                                          interpolation);
          }
          break;
@@ -2021,14 +2023,14 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
 
       cs_prog_data->uses_num_work_groups = true;
 
-      fs_reg surf_index = fs_reg(surface);
+      fs_reg surf_index = brw_imm_ud(surface);
       brw_mark_surface_used(prog_data, surface);
 
       /* Read the 3 GLuint components of gl_NumWorkGroups */
       for (unsigned i = 0; i < 3; i++) {
          fs_reg read_result =
             emit_untyped_read(bld, surf_index,
-                              fs_reg(i << 2),
+                              brw_imm_ud(i << 2),
                               1 /* dims */, 1 /* size */,
                               BRW_PREDICATE_NONE);
          read_result.type = dest.type;
@@ -2068,16 +2070,16 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       /* Emit a surface read or atomic op. */
       switch (instr->intrinsic) {
       case nir_intrinsic_atomic_counter_read:
-         tmp = emit_untyped_read(bld, fs_reg(surface), offset, 1, 1);
+         tmp = emit_untyped_read(bld, brw_imm_ud(surface), offset, 1, 1);
          break;
 
       case nir_intrinsic_atomic_counter_inc:
-         tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(),
+         tmp = emit_untyped_atomic(bld, brw_imm_ud(surface), offset, fs_reg(),
                                    fs_reg(), 1, 1, BRW_AOP_INC);
          break;
 
       case nir_intrinsic_atomic_counter_dec:
-         tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(),
+         tmp = emit_untyped_atomic(bld, brw_imm_ud(surface), offset, fs_reg(),
                                    fs_reg(), 1, 1, BRW_AOP_PREDEC);
          break;
 
@@ -2219,14 +2221,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       for (unsigned c = 0; c < info->dest_components; ++c) {
          if ((int)c >= type->coordinate_components()) {
              bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
-                     fs_reg(1));
+                     brw_imm_d(1));
          } else if (c == 1 && is_1d_array_image) {
             bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
                     offset(size, bld, 2));
          } else if (c == 2 && is_cube_array_image) {
             bld.emit(SHADER_OPCODE_INT_QUOTIENT,
                      offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
-                     offset(size, bld, c), fs_reg(6));
+                     offset(size, bld, c), brw_imm_d(6));
          } else {
             bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
                     offset(size, bld, c));
@@ -2238,7 +2240,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
 
    case nir_intrinsic_image_samples:
       /* The driver does not support multi-sampled images. */
-      bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), fs_reg(1));
+      bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(1));
       break;
 
    case nir_intrinsic_load_uniform_indirect:
@@ -2269,7 +2271,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       if (const_index) {
          const unsigned index = stage_prog_data->binding_table.ubo_start +
                                 const_index->u[0];
-         surf_index = fs_reg(index);
+         surf_index = brw_imm_ud(index);
          brw_mark_surface_used(prog_data, index);
       } else {
          /* The block index is not a constant. Evaluate the index expression
@@ -2278,7 +2280,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
           */
          surf_index = vgrf(glsl_type::uint_type);
          bld.ADD(surf_index, get_nir_src(instr->src[0]),
-                 fs_reg(stage_prog_data->binding_table.ubo_start));
+                 brw_imm_ud(stage_prog_data->binding_table.ubo_start));
          surf_index = bld.emit_uniformize(surf_index);
 
          /* Assume this may touch any UBO. It would be nice to provide
@@ -2294,7 +2296,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
          fs_reg base_offset = vgrf(glsl_type::int_type);
          bld.SHR(base_offset, retype(get_nir_src(instr->src[1]),
                                      BRW_REGISTER_TYPE_D),
-                 fs_reg(2));
+                 brw_imm_d(2));
 
          unsigned vec4_offset = instr->const_index[0] / 4;
          for (int i = 0; i < instr->num_components; i++)
@@ -2304,7 +2306,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
          fs_reg packed_consts = vgrf(glsl_type::float_type);
          packed_consts.type = dest.type;
 
-         fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15);
+         struct brw_reg const_offset_reg = brw_imm_ud(instr->const_index[0] & ~15);
          bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
                   surf_index, const_offset_reg);
 
@@ -2336,12 +2338,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       if (const_uniform_block) {
          unsigned index = stage_prog_data->binding_table.ssbo_start +
                           const_uniform_block->u[0];
-         surf_index = fs_reg(index);
+         surf_index = brw_imm_ud(index);
          brw_mark_surface_used(prog_data, index);
       } else {
          surf_index = vgrf(glsl_type::uint_type);
          bld.ADD(surf_index, get_nir_src(instr->src[0]),
-                 fs_reg(stage_prog_data->binding_table.ssbo_start));
+                 brw_imm_ud(stage_prog_data->binding_table.ssbo_start));
 
          /* Assume this may touch any UBO. It would be nice to provide
           * a tighter bound, but the array information is already lowered away.
@@ -2356,7 +2358,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       if (has_indirect) {
          offset_reg = get_nir_src(instr->src[1]);
       } else {
-         offset_reg = fs_reg(instr->const_index[0]);
+         offset_reg = brw_imm_ud(instr->const_index[0]);
       }
 
       /* Read the vector */
@@ -2407,12 +2409,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       if (const_uniform_block) {
          unsigned index = stage_prog_data->binding_table.ssbo_start +
                           const_uniform_block->u[0];
-         surf_index = fs_reg(index);
+         surf_index = brw_imm_ud(index);
          brw_mark_surface_used(prog_data, index);
       } else {
          surf_index = vgrf(glsl_type::uint_type);
          bld.ADD(surf_index, get_nir_src(instr->src[1]),
-                  fs_reg(stage_prog_data->binding_table.ssbo_start));
+                  brw_imm_ud(stage_prog_data->binding_table.ssbo_start));
 
          brw_mark_surface_used(prog_data,
                                stage_prog_data->binding_table.ssbo_start +
@@ -2436,12 +2438,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
          fs_reg offset_reg;
 
          if (!has_indirect) {
-            offset_reg = fs_reg(instr->const_index[0] + 4 * first_component);
+            offset_reg = brw_imm_ud(instr->const_index[0] + 4 * first_component);
          } else {
             offset_reg = vgrf(glsl_type::uint_type);
             bld.ADD(offset_reg,
                     retype(get_nir_src(instr->src[2]), BRW_REGISTER_TYPE_UD),
-                    fs_reg(4 * first_component));
+                    brw_imm_ud(4 * first_component));
          }
 
          emit_untyped_write(bld, surf_index, offset_reg,
@@ -2512,7 +2514,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       int reg_width = dispatch_width / 8;
 
       /* Set LOD = 0 */
-      fs_reg source = fs_reg(0);
+      fs_reg source = brw_imm_d(0);
 
       int mlen = 1 * reg_width;
 
@@ -2531,7 +2533,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
                                   BRW_REGISTER_TYPE_UD);
       const unsigned index = prog_data->binding_table.ssbo_start + ssbo_index;
       fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, buffer_size,
-                               src_payload, fs_reg(index));
+                               src_payload, brw_imm_ud(index));
       inst->header_size = 0;
       inst->mlen = mlen;
       inst->regs_written = regs_written;
@@ -2560,12 +2562,12 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
    if (const_surface) {
       unsigned surf_index = stage_prog_data->binding_table.ssbo_start +
                             const_surface->u[0];
-      surface = fs_reg(surf_index);
+      surface = brw_imm_ud(surf_index);
       brw_mark_surface_used(prog_data, surf_index);
    } else {
       surface = vgrf(glsl_type::uint_type);
       bld.ADD(surface, get_nir_src(instr->src[0]),
-              fs_reg(stage_prog_data->binding_table.ssbo_start));
+              brw_imm_ud(stage_prog_data->binding_table.ssbo_start));
 
       /* Assume this may touch any SSBO. This is the same we do for other
        * UBO/SSBO accesses with non-constant surface.
@@ -2597,7 +2599,7 @@ void
 fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
 {
    unsigned sampler = instr->sampler_index;
-   fs_reg sampler_reg(sampler);
+   fs_reg sampler_reg(brw_imm_ud(sampler));
 
    int gather_component = instr->component;
 
@@ -2676,7 +2678,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
 
          /* Emit code to evaluate the actual indexing expression */
          sampler_reg = vgrf(glsl_type::uint_type);
-         bld.ADD(sampler_reg, src, fs_reg(sampler));
+         bld.ADD(sampler_reg, src, brw_imm_ud(sampler));
          sampler_reg = bld.emit_uniformize(sampler_reg);
          break;
       }
@@ -2691,14 +2693,14 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
           key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
          mcs = emit_mcs_fetch(coordinate, instr->coord_components, sampler_reg);
       } else {
-         mcs = fs_reg(0u);
+         mcs = brw_imm_ud(0u);
       }
    }
 
    for (unsigned i = 0; i < 3; i++) {
       if (instr->const_offset[i] != 0) {
          assert(offset_components == 0);
-         tex_offset = fs_reg(brw_texture_offset(instr->const_offset, 3));
+         tex_offset = brw_imm_ud(brw_texture_offset(instr->const_offset, 3));
          break;
       }
    }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
index 534d8490cdf..45694ec0894 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
@@ -44,7 +44,7 @@ namespace brw {
              */
             const fs_reg usurface = bld.emit_uniformize(surface);
             const fs_reg srcs[] = {
-               addr, src, usurface, fs_reg(dims), fs_reg(arg)
+               addr, src, usurface, brw_imm_ud(dims), brw_imm_ud(arg)
             };
             const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, rsize);
             fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
@@ -330,7 +330,7 @@ namespace {
              * messages causes a hang on IVB and VLV.
              */
             set_predicate(pred,
-                          bld.CMP(bld.null_reg_ud(), stride, fs_reg(4),
+                          bld.CMP(bld.null_reg_ud(), stride, brw_imm_d(4),
                                   BRW_CONDITIONAL_G));
 
             return BRW_PREDICATE_NORMAL;
@@ -361,7 +361,7 @@ namespace {
              */
             bld.CMP(bld.null_reg_ud(),
                     retype(size, BRW_REGISTER_TYPE_UD),
-                    fs_reg(0), BRW_CONDITIONAL_NZ);
+                    brw_imm_d(0), BRW_CONDITIONAL_NZ);
 
             return BRW_PREDICATE_NORMAL;
          } else {
@@ -438,7 +438,7 @@ namespace {
              * FINISHME: Factor out this frequently recurring pattern into a
              * helper function.
              */
-            const fs_reg srcs[] = { addr, fs_reg(0), offset(addr, bld, 1) };
+            const fs_reg srcs[] = { addr, brw_imm_d(0), offset(addr, bld, 1) };
             const fs_reg dst = bld.vgrf(addr.type, dims);
             bld.LOAD_PAYLOAD(dst, srcs, dims, 0);
             return dst;
@@ -488,7 +488,7 @@ namespace {
             bld.ADD(offset(addr, bld, c), offset(off, bld, c),
                     (c < dims ?
                      offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, c) :
-                     fs_reg(0)));
+                     fs_reg(brw_imm_d(0))));
 
          /* The layout of 3-D textures in memory is sort-of like a tiling
           * format.  At each miplevel, the slices are arranged in rows of
@@ -515,7 +515,7 @@ namespace {
             /* Decompose z into a major (tmp.y) and a minor (tmp.x)
              * index.
              */
-            bld.BFE(offset(tmp, bld, 0), offset(tile, bld, 2), fs_reg(0),
+            bld.BFE(offset(tmp, bld, 0), offset(tile, bld, 2), brw_imm_d(0),
                     offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2));
             bld.SHR(offset(tmp, bld, 1),
                     offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2),
@@ -549,7 +549,7 @@ namespace {
             for (unsigned c = 0; c < 2; ++c) {
                /* Calculate the minor x and y indices. */
                bld.BFE(offset(minor, bld, c), offset(tile, bld, c),
-                       fs_reg(0), offset(addr, bld, c));
+                       brw_imm_d(0), offset(addr, bld, c));
 
                /* Calculate the major x and y indices. */
                bld.SHR(offset(major, bld, c),
@@ -595,7 +595,7 @@ namespace {
 
                /* XOR tmp.x and tmp.y with bit 6 of the memory address. */
                bld.XOR(tmp, tmp, offset(tmp, bld, 1));
-               bld.AND(tmp, tmp, fs_reg(1 << 6));
+               bld.AND(tmp, tmp, brw_imm_d(1 << 6));
                bld.XOR(dst, dst, tmp);
             }
 
@@ -647,7 +647,7 @@ namespace {
                const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
 
                /* Shift each component left to the correct bitfield position. */
-               bld.SHL(tmp, offset(src, bld, c), fs_reg(shifts[c] % 32));
+               bld.SHL(tmp, offset(src, bld, c), brw_imm_ud(shifts[c] % 32));
 
                /* Add everything up. */
                if (seen[shifts[c] / 32]) {
@@ -679,13 +679,13 @@ namespace {
                /* Shift left to discard the most significant bits. */
                bld.SHL(offset(dst, bld, c),
                        offset(src, bld, shifts[c] / 32),
-                       fs_reg(32 - shifts[c] % 32 - widths[c]));
+                       brw_imm_ud(32 - shifts[c] % 32 - widths[c]));
 
                /* Shift back to the least significant bits using an arithmetic
                 * shift to get sign extension on signed types.
                 */
                bld.ASR(offset(dst, bld, c),
-                       offset(dst, bld, c), fs_reg(32 - widths[c]));
+                       offset(dst, bld, c), brw_imm_ud(32 - widths[c]));
             }
          }
 
@@ -709,13 +709,13 @@ namespace {
             if (widths[c]) {
                /* Clamp to the maximum value. */
                bld.emit_minmax(offset(dst, bld, c), offset(src, bld, c),
-                               fs_reg((int)scale(widths[c] - s)),
+                               brw_imm_d((int)scale(widths[c] - s)),
                                BRW_CONDITIONAL_L);
 
                /* Clamp to the minimum value. */
                if (is_signed)
                   bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
-                                  fs_reg(-(int)scale(widths[c] - s) - 1),
+                                  brw_imm_d(-(int)scale(widths[c] - s) - 1),
                                   BRW_CONDITIONAL_GE);
             }
          }
@@ -741,12 +741,12 @@ namespace {
 
                /* Divide by the normalization constants. */
                bld.MUL(offset(dst, bld, c), offset(dst, bld, c),
-                       fs_reg(1.0f / scale(widths[c] - s)));
+                       brw_imm_f(1.0f / scale(widths[c] - s)));
 
                /* Clamp to the minimum value. */
                if (is_signed)
                   bld.emit_minmax(offset(dst, bld, c),
-                                  offset(dst, bld, c), fs_reg(-1.0f),
+                                  offset(dst, bld, c), brw_imm_f(-1.0f),
                                   BRW_CONDITIONAL_GE);
             }
          }
@@ -771,10 +771,10 @@ namespace {
                /* Clamp the normalized floating-point argument. */
                if (is_signed) {
                   bld.emit_minmax(offset(fdst, bld, c), offset(src, bld, c),
-                                  fs_reg(-1.0f), BRW_CONDITIONAL_GE);
+                                  brw_imm_f(-1.0f), BRW_CONDITIONAL_GE);
 
                   bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
-                                  fs_reg(1.0f), BRW_CONDITIONAL_L);
+                                  brw_imm_f(1.0f), BRW_CONDITIONAL_L);
                } else {
                   set_saturate(true, bld.MOV(offset(fdst, bld, c),
                                              offset(src, bld, c)));
@@ -782,7 +782,7 @@ namespace {
 
                /* Multiply by the normalization constants. */
                bld.MUL(offset(fdst, bld, c), offset(fdst, bld, c),
-                       fs_reg((float)scale(widths[c] - s)));
+                       brw_imm_f((float)scale(widths[c] - s)));
 
                /* Convert to integer. */
                bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c));
@@ -814,7 +814,7 @@ namespace {
                 */
                if (widths[c] < 16)
                   bld.SHL(offset(dst, bld, c),
-                          offset(dst, bld, c), fs_reg(15 - widths[c]));
+                          offset(dst, bld, c), brw_imm_ud(15 - widths[c]));
 
                /* Convert to 32-bit floating point. */
                bld.F16TO32(offset(fdst, bld, c), offset(dst, bld, c));
@@ -842,7 +842,7 @@ namespace {
                /* Clamp to the minimum value. */
                if (widths[c] < 16)
                   bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
-                                  fs_reg(0.0f), BRW_CONDITIONAL_GE);
+                                  brw_imm_f(0.0f), BRW_CONDITIONAL_GE);
 
                /* Convert to 16-bit floating-point. */
                bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c));
@@ -855,7 +855,7 @@ namespace {
                 */
                if (widths[c] < 16)
                   bld.SHR(offset(dst, bld, c), offset(dst, bld, c),
-                          fs_reg(15 - widths[c]));
+                          brw_imm_ud(15 - widths[c]));
             }
          }
 
@@ -874,7 +874,8 @@ namespace {
 
          for (unsigned c = 0; c < 4; ++c)
             bld.MOV(offset(dst, bld, c),
-                    widths[c] ? offset(src, bld, c) : fs_reg(pad[c]));
+                    widths[c] ? offset(src, bld, c)
+                              : fs_reg(brw_imm_ud(pad[c])));
 
          return dst;
       }
@@ -939,7 +940,7 @@ namespace brw {
             /* An out of bounds surface access should give zero as result. */
             for (unsigned c = 0; c < size; ++c)
                set_predicate(pred, bld.SEL(offset(tmp, bld, c),
-                                           offset(tmp, bld, c), fs_reg(0)));
+                                           offset(tmp, bld, c), brw_imm_d(0)));
          }
 
          /* Set the register type to D instead of UD if the data type is
@@ -1122,7 +1123,7 @@ namespace brw {
 
          /* An unbound surface access should give zero as result. */
          if (rsize)
-            set_predicate(pred, bld.SEL(tmp, tmp, fs_reg(0)));
+            set_predicate(pred, bld.SEL(tmp, tmp, brw_imm_d(0)));
 
          return tmp;
       }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index a7bd9cea7af..d97fcf33b62 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -165,7 +165,7 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
 	    chan = offset(chan, bld, i);
 
             set_condmod(BRW_CONDITIONAL_GE,
-                        bld.emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f)));
+                        bld.emit(BRW_OPCODE_SEL, chan, chan, brw_imm_f(0.0f)));
 
 	    /* Our parameter comes in as 1.0/width or 1.0/height,
 	     * because that's what people normally want for doing
@@ -203,7 +203,7 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
    const fs_reg dest = vgrf(glsl_type::uvec4_type);
    const fs_reg srcs[] = {
       coordinate, fs_reg(), fs_reg(), fs_reg(), fs_reg(), fs_reg(),
-      sampler, fs_reg(), fs_reg(components), fs_reg(0)
+      sampler, fs_reg(), brw_imm_ud(components), brw_imm_d(0)
    };
    fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs,
                             ARRAY_SIZE(srcs));
@@ -244,7 +244,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
          this->result = res;
 
          for (int i=0; i<4; i++) {
-            bld.MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f));
+            bld.MOV(res, brw_imm_f(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f));
             res = offset(res, bld, 1);
          }
          return;
@@ -256,7 +256,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
        * pass a valid LOD argument.
        */
       assert(lod.file == BAD_FILE);
-      lod = fs_reg(0u);
+      lod = brw_imm_ud(0u);
    }
 
    if (coordinate.file != BAD_FILE) {
@@ -274,7 +274,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
    const fs_reg srcs[] = {
       coordinate, shadow_c, lod, lod2,
       sample_index, mcs, sampler_reg, offset_value,
-      fs_reg(coord_components), fs_reg(grad_components)
+      brw_imm_d(coord_components), brw_imm_d(grad_components)
    };
    enum opcode opcode;
 
@@ -336,7 +336,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
    if (op == ir_txs && is_cube_array) {
       fs_reg depth = offset(dst, bld, 2);
       fs_reg fixed_depth = vgrf(glsl_type::int_type);
-      bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6));
+      bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, brw_imm_d(6));
 
       fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written);
       int components = inst->regs_written / (inst->exec_size / 8);
@@ -367,7 +367,7 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst)
    for (int i = 0; i < 4; i++) {
       fs_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F);
       /* Convert from UNORM to UINT */
-      bld.MUL(dst_f, dst_f, fs_reg((float)((1 << width) - 1)));
+      bld.MUL(dst_f, dst_f, brw_imm_f((1 << width) - 1));
       bld.MOV(dst, dst_f);
 
       if (wa & WA_SIGN) {
@@ -375,8 +375,8 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst)
           * shifting the sign bit into place, then shifting back
           * preserving sign.
           */
-         bld.SHL(dst, dst, fs_reg(32 - width));
-         bld.ASR(dst, dst, fs_reg(32 - width));
+         bld.SHL(dst, dst, brw_imm_d(32 - width));
+         bld.ASR(dst, dst, brw_imm_d(32 - width));
       }
 
       dst = offset(dst, bld, 1);
@@ -440,9 +440,9 @@ fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components,
 	 l = offset(l, bld, i);
 
 	 if (swiz == SWIZZLE_ZERO) {
-            bld.MOV(l, fs_reg(0.0f));
+            bld.MOV(l, brw_imm_f(0.0f));
 	 } else if (swiz == SWIZZLE_ONE) {
-            bld.MOV(l, fs_reg(1.0f));
+            bld.MOV(l, brw_imm_f(1.0f));
 	 } else {
             bld.MOV(l, offset(orig_val, bld,
                                   GET_SWZ(key_tex->swizzles[sampler], i)));
@@ -462,7 +462,7 @@ fs_visitor::emit_dummy_fs()
    const float color[4] = { 1.0, 0.0, 1.0, 0.0 };
    for (int i = 0; i < 4; i++) {
       bld.MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F),
-              fs_reg(color[i]));
+              brw_imm_f(color[i]));
    }
 
    fs_inst *write;
@@ -681,7 +681,7 @@ fs_visitor::emit_alpha_test()
       fs_reg color = offset(outputs[0], bld, 3);
 
       /* f0.1 &= func(color, ref) */
-      cmp = abld.CMP(bld.null_reg_f(), color, fs_reg(key->alpha_test_ref),
+      cmp = abld.CMP(bld.null_reg_f(), color, brw_imm_f(key->alpha_test_ref),
                      cond_for_alpha_func(key->alpha_test_func));
    }
    cmp->predicate = BRW_PREDICATE_NORMAL;
@@ -714,7 +714,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
 
    const fs_reg sources[] = {
       color0, color1, src0_alpha, src_depth, dst_depth, src_stencil,
-      sample_mask, fs_reg(components)
+      sample_mask, brw_imm_ud(components)
    };
    assert(ARRAY_SIZE(sources) - 1 == FB_WRITE_LOGICAL_SRC_COMPONENTS);
    fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(),
@@ -948,12 +948,12 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
 
       fs_reg offset;
       if (gs_vertex_count.file == IMM) {
-         per_slot_offsets = fs_reg(output_vertex_size_owords *
-                                   gs_vertex_count.ud);
+         per_slot_offsets = brw_imm_ud(output_vertex_size_owords *
+                                       gs_vertex_count.ud);
       } else {
          per_slot_offsets = vgrf(glsl_type::int_type);
          bld.MUL(per_slot_offsets, gs_vertex_count,
-                 fs_reg(output_vertex_size_owords));
+                 brw_imm_ud(output_vertex_size_owords));
       }
    }
 
@@ -976,7 +976,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
          }
 
          fs_reg zero(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
-         bld.MOV(zero, fs_reg(0u));
+         bld.MOV(zero, brw_imm_ud(0u));
 
          sources[length++] = zero;
          if (vue_map->slots_valid & VARYING_BIT_LAYER)
@@ -1036,7 +1036,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
             for (unsigned i = 0; i < output_components[varying]; i++)
                sources[length++] = offset(this->outputs[varying], bld, i);
             for (unsigned i = output_components[varying]; i < 4; i++)
-               sources[length++] = fs_reg(0);
+               sources[length++] = brw_imm_d(0);
          }
          break;
       }
@@ -1113,11 +1113,11 @@ fs_visitor::emit_barrier()
    const fs_builder pbld = bld.exec_all().group(8, 0);
 
    /* Clear the message payload */
-   pbld.MOV(payload, fs_reg(0u));
+   pbld.MOV(payload, brw_imm_ud(0u));
 
    /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */
    fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD));
-   pbld.AND(component(payload, 2), r0_2, fs_reg(0x0f000000u));
+   pbld.AND(component(payload, 2), r0_2, brw_imm_ud(0x0f000000u));
 
    /* Emit a gateway "barrier" message using the payload we set up, followed
     * by a wait instruction.
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index 7e977e9e727..0410053ce27 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -36,11 +36,6 @@ public:
    void init();
 
    fs_reg();
-   explicit fs_reg(float f);
-   explicit fs_reg(int32_t i);
-   explicit fs_reg(uint32_t u);
-   explicit fs_reg(uint8_t vf[4]);
-   explicit fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3);
    fs_reg(struct brw_reg reg);
    fs_reg(enum brw_reg_file file, int nr);
    fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type);
diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
index 62d39f70ec4..034d8a507fe 100644
--- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
@@ -107,7 +107,7 @@ TEST_F(cmod_propagation_test, basic)
    fs_reg dest = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
-   fs_reg zero(0.0f);
+   fs_reg zero(brw_imm_f(0.0f));
    bld.ADD(dest, src0, src1);
    bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
 
@@ -139,7 +139,7 @@ TEST_F(cmod_propagation_test, cmp_nonzero)
    fs_reg dest = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
-   fs_reg nonzero(1.0f);
+   fs_reg nonzero(brw_imm_f(1.0f));
    bld.ADD(dest, src0, src1);
    bld.CMP(bld.null_reg_f(), dest, nonzero, BRW_CONDITIONAL_GE);
 
@@ -171,7 +171,7 @@ TEST_F(cmod_propagation_test, non_cmod_instruction)
    const fs_builder &bld = v->bld;
    fs_reg dest = v->vgrf(glsl_type::uint_type);
    fs_reg src0 = v->vgrf(glsl_type::uint_type);
-   fs_reg zero(0u);
+   fs_reg zero(brw_imm_ud(0u));
    bld.FBL(dest, src0);
    bld.CMP(bld.null_reg_ud(), dest, zero, BRW_CONDITIONAL_GE);
 
@@ -205,7 +205,7 @@ TEST_F(cmod_propagation_test, intervening_flag_write)
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
    fs_reg src2 = v->vgrf(glsl_type::float_type);
-   fs_reg zero(0.0f);
+   fs_reg zero(brw_imm_f(0.0f));
    bld.ADD(dest, src0, src1);
    bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE);
    bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
@@ -244,7 +244,7 @@ TEST_F(cmod_propagation_test, intervening_flag_read)
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
    fs_reg src2 = v->vgrf(glsl_type::float_type);
-   fs_reg zero(0.0f);
+   fs_reg zero(brw_imm_f(0.0f));
    bld.ADD(dest0, src0, src1);
    set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
    bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE);
@@ -282,7 +282,7 @@ TEST_F(cmod_propagation_test, intervening_dest_write)
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
    fs_reg src2 = v->vgrf(glsl_type::vec2_type);
-   fs_reg zero(0.0f);
+   fs_reg zero(brw_imm_f(0.0f));
    bld.ADD(offset(dest, bld, 2), src0, src1);
    bld.emit(SHADER_OPCODE_TEX, dest, src2)
       ->regs_written = 4;
@@ -323,7 +323,7 @@ TEST_F(cmod_propagation_test, intervening_flag_read_same_value)
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
    fs_reg src2 = v->vgrf(glsl_type::float_type);
-   fs_reg zero(0.0f);
+   fs_reg zero(brw_imm_f(0.0f));
    set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1));
    set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
    bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE);
@@ -360,7 +360,7 @@ TEST_F(cmod_propagation_test, negate)
    fs_reg dest = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
-   fs_reg zero(0.0f);
+   fs_reg zero(brw_imm_f(0.0f));
    bld.ADD(dest, src0, src1);
    dest.negate = true;
    bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
@@ -425,7 +425,7 @@ TEST_F(cmod_propagation_test, different_types_cmod_with_zero)
    fs_reg dest = v->vgrf(glsl_type::int_type);
    fs_reg src0 = v->vgrf(glsl_type::int_type);
    fs_reg src1 = v->vgrf(glsl_type::int_type);
-   fs_reg zero(0.0f);
+   fs_reg zero(brw_imm_f(0.0f));
    bld.ADD(dest, src0, src1);
    bld.CMP(bld.null_reg_f(), retype(dest, BRW_REGISTER_TYPE_F), zero,
            BRW_CONDITIONAL_GE);
@@ -458,8 +458,8 @@ TEST_F(cmod_propagation_test, andnz_one)
    const fs_builder &bld = v->bld;
    fs_reg dest = v->vgrf(glsl_type::int_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
-   fs_reg zero(0.0f);
-   fs_reg one(1);
+   fs_reg zero(brw_imm_f(0.0f));
+   fs_reg one(brw_imm_d(1));
 
    bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
    set_condmod(BRW_CONDITIONAL_NZ,
@@ -493,8 +493,8 @@ TEST_F(cmod_propagation_test, andnz_non_one)
    const fs_builder &bld = v->bld;
    fs_reg dest = v->vgrf(glsl_type::int_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
-   fs_reg zero(0.0f);
-   fs_reg nonone(38);
+   fs_reg zero(brw_imm_f(0.0f));
+   fs_reg nonone(brw_imm_d(38));
 
    bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
    set_condmod(BRW_CONDITIONAL_NZ,
@@ -528,8 +528,8 @@ TEST_F(cmod_propagation_test, andz_one)
    const fs_builder &bld = v->bld;
    fs_reg dest = v->vgrf(glsl_type::int_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
-   fs_reg zero(0.0f);
-   fs_reg one(1);
+   fs_reg zero(brw_imm_f(0.0f));
+   fs_reg one(brw_imm_d(1));
 
    bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
    set_condmod(BRW_CONDITIONAL_Z,

From 9b978046eb1d1657060365e8dcde4aad41b50af9 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 2 Nov 2015 11:28:35 -0800
Subject: [PATCH 117/335] i965/fs: Use brw_imm_uw().

W/UW immediates are 16-bits, but those 16-bits must be replicated
in the high 16-bits of the 32-bit field.

Remove the useless W/UW immediate saturating code, since we'll now be
using the appropriate immediate (and W/UW immediates in the IR can now
no longer be larger than 16-bits).

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 3 +--
 src/mesa/drivers/dri/i965/brw_shader.cpp | 8 ++------
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 8364bbfc0f4..3394e4a7567 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -454,8 +454,7 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
       tmp.subreg_offset = 2;
       tmp.stride = 2;
 
-      fs_inst *or_inst = bld.OR(tmp, g0, brw_imm_d(0x3f80));
-      or_inst->src[1].type = BRW_REGISTER_TYPE_UW;
+      bld.OR(tmp, g0, brw_imm_uw(0x3f80));
 
       tmp.type = BRW_REGISTER_TYPE_D;
       tmp.subreg_offset = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index d22164874c3..7a236cd9880 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -561,16 +561,12 @@ brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg)
    switch (type) {
    case BRW_REGISTER_TYPE_UD:
    case BRW_REGISTER_TYPE_D:
+   case BRW_REGISTER_TYPE_UW:
+   case BRW_REGISTER_TYPE_W:
    case BRW_REGISTER_TYPE_UQ:
    case BRW_REGISTER_TYPE_Q:
       /* Nothing to do. */
       return false;
-   case BRW_REGISTER_TYPE_UW:
-      sat_imm.ud = CLAMP(imm.ud, 0, USHRT_MAX);
-      break;
-   case BRW_REGISTER_TYPE_W:
-      sat_imm.d = CLAMP(imm.d, SHRT_MIN, SHRT_MAX);
-      break;
    case BRW_REGISTER_TYPE_F:
       sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f);
       break;

From f9a9ba5eac2f1934bd7fecc92cd309f22411164b Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 2 Nov 2015 12:12:44 -0800
Subject: [PATCH 118/335] i965/vec4: Replace src_reg(imm) constructors with
 brw_imm_*().

Cuts 1.5k of .text.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_ir_vec4.h       |  5 -
 src/mesa/drivers/dri/i965/brw_vec4.cpp        | 67 +++----------
 src/mesa/drivers/dri/i965/brw_vec4_builder.h  |  2 +-
 .../drivers/dri/i965/brw_vec4_gs_visitor.cpp  | 38 ++++----
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp    | 78 +++++++--------
 .../dri/i965/brw_vec4_surface_builder.cpp     |  8 +-
 .../drivers/dri/i965/brw_vec4_visitor.cpp     | 94 +++++++++----------
 .../drivers/dri/i965/brw_vec4_vs_visitor.cpp  | 20 ++--
 src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 78 +++++++--------
 .../dri/i965/test_vec4_cmod_propagation.cpp   | 38 ++++----
 .../dri/i965/test_vec4_copy_propagation.cpp   |  2 +-
 .../dri/i965/test_vec4_register_coalesce.cpp  |  4 +-
 12 files changed, 195 insertions(+), 239 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 110e64b979e..e2e66044d3a 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -41,11 +41,6 @@ public:
 
    src_reg(enum brw_reg_file file, int nr, const glsl_type *type);
    src_reg();
-   src_reg(float f);
-   src_reg(uint32_t u);
-   src_reg(int32_t i);
-   src_reg(uint8_t vf[4]);
-   src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3);
    src_reg(struct brw_reg reg);
 
    bool equals(const src_reg &r) const;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 3bcd5cbddf3..06b70778138 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -71,51 +71,6 @@ src_reg::src_reg()
    init();
 }
 
-src_reg::src_reg(float f)
-{
-   init();
-
-   this->file = IMM;
-   this->type = BRW_REGISTER_TYPE_F;
-   this->f = f;
-}
-
-src_reg::src_reg(uint32_t u)
-{
-   init();
-
-   this->file = IMM;
-   this->type = BRW_REGISTER_TYPE_UD;
-   this->ud = u;
-}
-
-src_reg::src_reg(int32_t i)
-{
-   init();
-
-   this->file = IMM;
-   this->type = BRW_REGISTER_TYPE_D;
-   this->d = i;
-}
-
-src_reg::src_reg(uint8_t vf[4])
-{
-   init();
-
-   this->file = IMM;
-   this->type = BRW_REGISTER_TYPE_VF;
-   memcpy(&this->ud, vf, sizeof(unsigned));
-}
-
-src_reg::src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
-{
-   init();
-
-   this->file = IMM;
-   this->type = BRW_REGISTER_TYPE_VF;
-   this->ud = (vf0 <<  0) | (vf1 <<  8) | (vf2 << 16) | (vf3 << 24);
-}
-
 src_reg::src_reg(struct brw_reg reg) :
    backend_reg(reg)
 {
@@ -382,7 +337,9 @@ vec4_visitor::opt_vector_float()
 
       remaining_channels &= ~inst->dst.writemask;
       if (remaining_channels == 0) {
-         vec4_instruction *mov = MOV(inst->dst, imm);
+         unsigned vf;
+         memcpy(&vf, imm, sizeof(vf));
+         vec4_instruction *mov = MOV(inst->dst, brw_imm_vf(vf));
          mov->dst.type = BRW_REGISTER_TYPE_F;
          mov->dst.writemask = WRITEMASK_XYZW;
          inst->insert_after(block, mov);
@@ -657,13 +614,13 @@ vec4_visitor::opt_algebraic()
 	    inst->opcode = BRW_OPCODE_MOV;
 	    switch (inst->src[0].type) {
 	    case BRW_REGISTER_TYPE_F:
-	       inst->src[0] = src_reg(0.0f);
+	       inst->src[0] = brw_imm_f(0.0f);
 	       break;
 	    case BRW_REGISTER_TYPE_D:
-	       inst->src[0] = src_reg(0);
+	       inst->src[0] = brw_imm_d(0);
 	       break;
 	    case BRW_REGISTER_TYPE_UD:
-	       inst->src[0] = src_reg(0u);
+	       inst->src[0] = brw_imm_ud(0u);
 	       break;
 	    default:
 	       unreachable("not reached");
@@ -1232,7 +1189,7 @@ vec4_visitor::eliminate_find_live_channel()
       case SHADER_OPCODE_FIND_LIVE_CHANNEL:
          if (depth == 0) {
             inst->opcode = BRW_OPCODE_MOV;
-            inst->src[0] = src_reg(0);
+            inst->src[0] = brw_imm_d(0);
             inst->force_writemask_all = true;
             progress = true;
          }
@@ -1701,7 +1658,7 @@ vec4_visitor::emit_shader_time_end()
     */
    src_reg reset_end = shader_end_time;
    reset_end.swizzle = BRW_SWIZZLE_ZZZZ;
-   vec4_instruction *test = emit(AND(dst_null_d(), reset_end, src_reg(1u)));
+   vec4_instruction *test = emit(AND(dst_null_ud(), reset_end, brw_imm_ud(1u)));
    test->conditional_mod = BRW_CONDITIONAL_Z;
 
    emit(IF(BRW_PREDICATE_NORMAL));
@@ -1715,12 +1672,12 @@ vec4_visitor::emit_shader_time_end()
     * is 2 cycles.  Remove that overhead, so I can forget about that when
     * trying to determine the time taken for single instructions.
     */
-   emit(ADD(diff, src_reg(diff), src_reg(-2u)));
+   emit(ADD(diff, src_reg(diff), brw_imm_ud(-2u)));
 
    emit_shader_time_write(0, src_reg(diff));
-   emit_shader_time_write(1, src_reg(1u));
+   emit_shader_time_write(1, brw_imm_ud(1u));
    emit(BRW_OPCODE_ELSE);
-   emit_shader_time_write(2, src_reg(1u));
+   emit_shader_time_write(2, brw_imm_ud(1u));
    emit(BRW_OPCODE_ENDIF);
 }
 
@@ -1736,7 +1693,7 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
 
    offset.type = BRW_REGISTER_TYPE_UD;
    int index = shader_time_index * 3 + shader_time_subindex;
-   emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE)));
+   emit(MOV(offset, brw_imm_d(index * SHADER_TIME_STRIDE)));
 
    time.type = BRW_REGISTER_TYPE_UD;
    emit(MOV(time, value));
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
index a76a4ce4639..be1427c7db7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
@@ -484,7 +484,7 @@ namespace brw {
             const dst_reg x_times_one_minus_a = vgrf(dst.type);
 
             MUL(y_times_a, y, a);
-            ADD(one_minus_a, negate(a), src_reg(1.0f));
+            ADD(one_minus_a, negate(a), brw_imm_f(1.0f));
             MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
             return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
          }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 0c49865eee2..89e49964fa2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -153,7 +153,7 @@ vec4_gs_visitor::emit_prolog()
     */
    this->current_annotation = "clear r0.2";
    dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD));
-   vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, 0u);
+   vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, brw_imm_ud(0u));
    inst->force_writemask_all = true;
 
    /* Create a virtual register to hold the vertex count */
@@ -161,7 +161,7 @@ vec4_gs_visitor::emit_prolog()
 
    /* Initialize the vertex_count register to 0 */
    this->current_annotation = "initialize vertex_count";
-   inst = emit(MOV(dst_reg(this->vertex_count), 0u));
+   inst = emit(MOV(dst_reg(this->vertex_count), brw_imm_ud(0u)));
    inst->force_writemask_all = true;
 
    if (c->control_data_header_size_bits > 0) {
@@ -176,7 +176,7 @@ vec4_gs_visitor::emit_prolog()
        */
       if (c->control_data_header_size_bits <= 32) {
          this->current_annotation = "initialize control data bits";
-         inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
+         inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u)));
          inst->force_writemask_all = true;
       }
    }
@@ -274,7 +274,7 @@ vec4_gs_visitor::emit_urb_write_header(int mrf)
    vec4_instruction *inst = emit(MOV(mrf_reg, r0));
    inst->force_writemask_all = true;
    emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count,
-        (uint32_t) gs_prog_data->output_vertex_size_hwords);
+        brw_imm_ud(gs_prog_data->output_vertex_size_hwords));
 }
 
 
@@ -354,11 +354,12 @@ vec4_gs_visitor::emit_control_data_bits()
    src_reg dword_index(this, glsl_type::uint_type);
    if (urb_write_flags) {
       src_reg prev_count(this, glsl_type::uint_type);
-      emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu));
+      emit(ADD(dst_reg(prev_count), this->vertex_count,
+               brw_imm_ud(0xffffffffu)));
       unsigned log2_bits_per_vertex =
          _mesa_fls(c->control_data_bits_per_vertex);
       emit(SHR(dst_reg(dword_index), prev_count,
-               (uint32_t) (6 - log2_bits_per_vertex)));
+               brw_imm_ud(6 - log2_bits_per_vertex)));
    }
 
    /* Start building the URB write message.  The first MRF gets a copy of
@@ -375,8 +376,9 @@ vec4_gs_visitor::emit_control_data_bits()
        * the appropriate OWORD within the control data header.
        */
       src_reg per_slot_offset(this, glsl_type::uint_type);
-      emit(SHR(dst_reg(per_slot_offset), dword_index, 2u));
-      emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u);
+      emit(SHR(dst_reg(per_slot_offset), dword_index, brw_imm_ud(2u)));
+      emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset,
+           brw_imm_ud(1u));
    }
 
    if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) {
@@ -388,10 +390,10 @@ vec4_gs_visitor::emit_control_data_bits()
        * together.
        */
       src_reg channel(this, glsl_type::uint_type);
-      inst = emit(AND(dst_reg(channel), dword_index, 3u));
+      inst = emit(AND(dst_reg(channel), dword_index, brw_imm_ud(3u)));
       inst->force_writemask_all = true;
       src_reg one(this, glsl_type::uint_type);
-      inst = emit(MOV(dst_reg(one), 1u));
+      inst = emit(MOV(dst_reg(one), brw_imm_ud(1u)));
       inst->force_writemask_all = true;
       src_reg channel_mask(this, glsl_type::uint_type);
       inst = emit(SHL(dst_reg(channel_mask), one, channel));
@@ -441,11 +443,11 @@ vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id)
 
    /* reg::sid = stream_id */
    src_reg sid(this, glsl_type::uint_type);
-   emit(MOV(dst_reg(sid), stream_id));
+   emit(MOV(dst_reg(sid), brw_imm_ud(stream_id)));
 
    /* reg:shift_count = 2 * (vertex_count - 1) */
    src_reg shift_count(this, glsl_type::uint_type);
-   emit(SHL(dst_reg(shift_count), this->vertex_count, 1u));
+   emit(SHL(dst_reg(shift_count), this->vertex_count, brw_imm_ud(1u)));
 
    /* Note: we're relying on the fact that the GEN SHL instruction only pays
     * attention to the lower 5 bits of its second source argument, so on this
@@ -503,8 +505,8 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
        *     vertex_count & (32 / bits_per_vertex - 1) == 0
        */
       vec4_instruction *inst =
-         emit(AND(dst_null_d(), this->vertex_count,
-                  (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
+         emit(AND(dst_null_ud(), this->vertex_count,
+                  brw_imm_ud(32 / c->control_data_bits_per_vertex - 1)));
       inst->conditional_mod = BRW_CONDITIONAL_Z;
 
       emit(IF(BRW_PREDICATE_NORMAL));
@@ -512,7 +514,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
          /* If vertex_count is 0, then no control data bits have been
           * accumulated yet, so we skip emitting them.
           */
-         emit(CMP(dst_null_d(), this->vertex_count, 0u,
+         emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u),
                   BRW_CONDITIONAL_NEQ));
          emit(IF(BRW_PREDICATE_NORMAL));
          emit_control_data_bits();
@@ -525,7 +527,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
           * effect of any call to EndPrimitive() that the shader may have
           * made before outputting its first vertex.
           */
-         inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
+         inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u)));
          inst->force_writemask_all = true;
       }
       emit(BRW_OPCODE_ENDIF);
@@ -586,9 +588,9 @@ vec4_gs_visitor::gs_end_primitive()
 
    /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */
    src_reg one(this, glsl_type::uint_type);
-   emit(MOV(dst_reg(one), 1u));
+   emit(MOV(dst_reg(one), brw_imm_ud(1u)));
    src_reg prev_count(this, glsl_type::uint_type);
-   emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu));
+   emit(ADD(dst_reg(prev_count), this->vertex_count, brw_imm_ud(0xffffffffu)));
    src_reg mask(this, glsl_type::uint_type);
    /* Note: we're relying on the fact that the GEN SHL instruction only pays
     * attention to the lower 5 bits of its second source argument, so on this
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 27933d7d61c..3d186b49d4e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -373,7 +373,7 @@ vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
       }
 
       reg.writemask = writemask;
-      emit(MOV(reg, src_reg(instr->value.i[i])));
+      emit(MOV(reg, brw_imm_d(instr->value.i[i])));
 
       remaining &= ~writemask;
    }
@@ -444,10 +444,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
 
       inst->base_mrf = 2;
       inst->mlen = 1; /* always at least one */
-      inst->src[1] = src_reg(index);
+      inst->src[1] = brw_imm_ud(index);
 
       /* MRF for the first parameter */
-      src_reg lod = src_reg(0);
+      src_reg lod = brw_imm_d(0);
       int param_base = inst->base_mrf;
       int writemask = WRITEMASK_X;
       emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
@@ -471,12 +471,12 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       if (const_uniform_block) {
          unsigned index = prog_data->base.binding_table.ssbo_start +
                           const_uniform_block->u[0];
-         surf_index = src_reg(index);
+         surf_index = brw_imm_ud(index);
          brw_mark_surface_used(&prog_data->base, index);
       } else {
          surf_index = src_reg(this, glsl_type::uint_type);
          emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1),
-                  src_reg(prog_data->base.binding_table.ssbo_start)));
+                  brw_imm_ud(prog_data->base.binding_table.ssbo_start)));
          surf_index = emit_uniformize(surf_index);
 
          brw_mark_surface_used(&prog_data->base,
@@ -491,7 +491,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
          emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[2], 1)));
       } else {
          const_offset_bytes = instr->const_index[0];
-         emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+         emit(MOV(dst_reg(offset_reg), brw_imm_ud(const_offset_bytes)));
       }
 
       /* Value */
@@ -566,7 +566,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
                if (skipped_channels > 0) {
                   if (!has_indirect) {
                      const_offset_bytes += 4 * skipped_channels;
-                     offset_reg = src_reg(const_offset_bytes);
+                     offset_reg = brw_imm_ud(const_offset_bytes);
                   } else {
                      emit(ADD(dst_reg(offset_reg), offset_reg,
                               brw_imm_ud(4 * skipped_channels)));
@@ -614,13 +614,13 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       if (const_uniform_block) {
          unsigned index = prog_data->base.binding_table.ssbo_start +
                           const_uniform_block->u[0];
-         surf_index = src_reg(index);
+         surf_index = brw_imm_ud(index);
 
          brw_mark_surface_used(&prog_data->base, index);
       } else {
          surf_index = src_reg(this, glsl_type::uint_type);
          emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1),
-                  src_reg(prog_data->base.binding_table.ssbo_start)));
+                  brw_imm_ud(prog_data->base.binding_table.ssbo_start)));
          surf_index = emit_uniformize(surf_index);
 
          /* Assume this may touch any UBO. It would be nice to provide
@@ -637,7 +637,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
          emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[1], 1)));
       } else {
          const_offset_bytes = instr->const_index[0];
-         emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+         emit(MOV(dst_reg(offset_reg), brw_imm_ud((const_offset_bytes))));
       }
 
       /* Read the vector */
@@ -762,7 +762,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
           */
          const unsigned index = prog_data->base.binding_table.ubo_start +
                                 const_block_index->u[0];
-         surf_index = src_reg(index);
+         surf_index = brw_imm_ud(index);
          brw_mark_surface_used(&prog_data->base, index);
       } else {
          /* The block index is not a constant. Evaluate the index expression
@@ -772,7 +772,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
          surf_index = src_reg(this, glsl_type::uint_type);
          emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], nir_type_int,
                                                    instr->num_components),
-                  src_reg(prog_data->base.binding_table.ubo_start)));
+                  brw_imm_ud(prog_data->base.binding_table.ubo_start)));
          surf_index = emit_uniformize(surf_index);
 
          /* Assume this may touch any UBO. It would be nice to provide
@@ -787,11 +787,11 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       src_reg offset;
 
       if (!has_indirect)  {
-         offset = src_reg(const_offset / 16);
+         offset = brw_imm_ud(const_offset / 16);
       } else {
          offset = src_reg(this, glsl_type::uint_type);
          emit(SHR(dst_reg(offset), get_nir_src(instr->src[1], nir_type_int, 1),
-                  src_reg(4u)));
+                  brw_imm_ud(4u)));
       }
 
       src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
@@ -848,12 +848,12 @@ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
    if (const_surface) {
       unsigned surf_index = prog_data->base.binding_table.ssbo_start +
                             const_surface->u[0];
-      surface = src_reg(surf_index);
+      surface = brw_imm_ud(surf_index);
       brw_mark_surface_used(&prog_data->base, surf_index);
    } else {
       surface = src_reg(this, glsl_type::uint_type);
       emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]),
-               src_reg(prog_data->base.binding_table.ssbo_start)));
+               brw_imm_ud(prog_data->base.binding_table.ssbo_start)));
 
       /* Assume this may touch any UBO. This is the same we do for other
        * UBO/SSBO accesses with non-constant surface.
@@ -1174,8 +1174,8 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
 
       emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz),
                brw_conditional_for_nir_comparison(instr->op)));
-      emit(MOV(dst, src_reg(0)));
-      inst = emit(MOV(dst, src_reg(~0)));
+      emit(MOV(dst, brw_imm_d(0)));
+      inst = emit(MOV(dst, brw_imm_d(~0)));
       inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
       break;
    }
@@ -1192,8 +1192,8 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz),
                brw_conditional_for_nir_comparison(instr->op)));
 
-      emit(MOV(dst, src_reg(0)));
-      inst = emit(MOV(dst, src_reg(~0)));
+      emit(MOV(dst, brw_imm_d(0)));
+      inst = emit(MOV(dst, brw_imm_d(~0)));
       inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
       break;
    }
@@ -1235,11 +1235,11 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       break;
 
    case nir_op_f2b:
-      emit(CMP(dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
+      emit(CMP(dst, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ));
       break;
 
    case nir_op_i2b:
-      emit(CMP(dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+      emit(CMP(dst, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ));
       break;
 
    case nir_op_fnoise1_1:
@@ -1321,9 +1321,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
        * subtract the result from 31 to convert the MSB count into an LSB count.
        */
       src_reg src(dst);
-      emit(CMP(dst_null_d(), src, src_reg(-1), BRW_CONDITIONAL_NZ));
+      emit(CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ));
 
-      inst = emit(ADD(dst, src, src_reg(31)));
+      inst = emit(ADD(dst, src, brw_imm_d(31)));
       inst->predicate = BRW_PREDICATE_NORMAL;
       inst->src[0].negate = true;
       break;
@@ -1364,13 +1364,13 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
        * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
        * zero.
        */
-      emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
+      emit(CMP(dst_null_f(), op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ));
 
       op[0].type = BRW_REGISTER_TYPE_UD;
       dst.type = BRW_REGISTER_TYPE_UD;
-      emit(AND(dst, op[0], src_reg(0x80000000u)));
+      emit(AND(dst, op[0], brw_imm_ud(0x80000000u)));
 
-      inst = emit(OR(dst, src_reg(dst), src_reg(0x3f800000u)));
+      inst = emit(OR(dst, src_reg(dst), brw_imm_ud(0x3f800000u)));
       inst->predicate = BRW_PREDICATE_NORMAL;
       dst.type = BRW_REGISTER_TYPE_F;
 
@@ -1385,9 +1385,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
        *               -> non-negative val generates 0x00000000.
        *  Predicated OR sets 1 if val is positive.
        */
-      emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G));
-      emit(ASR(dst, op[0], src_reg(31)));
-      inst = emit(OR(dst, src_reg(dst), src_reg(1)));
+      emit(CMP(dst_null_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_G));
+      emit(ASR(dst, op[0], brw_imm_d(31)));
+      inst = emit(OR(dst, src_reg(dst), brw_imm_d(1)));
       inst->predicate = BRW_PREDICATE_NORMAL;
       break;
 
@@ -1418,7 +1418,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       break;
 
    case nir_op_bcsel:
-      emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+      emit(CMP(dst_null_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ));
       inst = emit(BRW_OPCODE_SEL, dst, op[1], op[2]);
       switch (dst.writemask) {
       case WRITEMASK_X:
@@ -1465,10 +1465,10 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       unsigned swiz =
          brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);
 
-      emit(CMP(dst_null_d(), swizzle(op[0], swiz), src_reg(0),
+      emit(CMP(dst_null_d(), swizzle(op[0], swiz), brw_imm_d(0),
                BRW_CONDITIONAL_NZ));
-      emit(MOV(dst, src_reg(0)));
-      inst = emit(MOV(dst, src_reg(~0)));
+      emit(MOV(dst, brw_imm_d(0)));
+      inst = emit(MOV(dst, brw_imm_d(~0)));
       inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
       break;
    }
@@ -1502,7 +1502,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
        BRW_NIR_BOOLEAN_NEEDS_RESOLVE) {
       dst_reg masked = dst_reg(this, glsl_type::int_type);
       masked.writemask = dst.writemask;
-      emit(AND(masked, src_reg(dst), src_reg(1)));
+      emit(AND(masked, src_reg(dst), brw_imm_d(1)));
       src_reg masked_neg = src_reg(masked);
       masked_neg.negate = true;
       emit(MOV(retype(dst, BRW_REGISTER_TYPE_D), masked_neg));
@@ -1575,7 +1575,7 @@ void
 vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
 {
    unsigned sampler = instr->sampler_index;
-   src_reg sampler_reg = src_reg(sampler);
+   src_reg sampler_reg = brw_imm_ud(sampler);
    src_reg coordinate;
    const glsl_type *coord_type = NULL;
    src_reg shadow_comparitor;
@@ -1595,7 +1595,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
    if (instr->op == nir_texop_tg4) {
       int swiz = GET_SWZ(key_tex->swizzles[sampler], instr->component);
       if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
-         emit(MOV(dest, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
+         emit(MOV(dest, brw_imm_f(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
          return;
       }
    }
@@ -1658,7 +1658,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
              key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
             mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg);
          } else {
-            mcs = src_reg(0u);
+            mcs = brw_imm_ud(0u);
          }
          mcs = retype(mcs, BRW_REGISTER_TYPE_UD);
          break;
@@ -1686,7 +1686,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
          /* Emit code to evaluate the actual indexing expression */
          src_reg src = get_nir_src(instr->src[i].src, 1);
          src_reg temp(this, glsl_type::uint_type);
-         emit(ADD(dst_reg(temp), src, src_reg(sampler)));
+         emit(ADD(dst_reg(temp), src, brw_imm_ud(sampler)));
          sampler_reg = emit_uniformize(temp);
          break;
       }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
index a7c286d3ac1..28002c56cdc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
@@ -71,7 +71,7 @@ namespace {
 
             bld.MOV(writemask(tmp, mask), src);
             if (n < 4)
-               bld.MOV(writemask(tmp, ~mask), 0);
+               bld.MOV(writemask(tmp, ~mask), brw_imm_d(0));
 
             return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
          }
@@ -143,7 +143,7 @@ namespace brw {
             /* Emit the message send instruction. */
             const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz);
             vec4_instruction *inst =
-               bld.emit(op, dst, src_reg(payload), usurface, arg);
+               bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg));
             inst->mlen = sz;
             inst->regs_written = ret_sz;
             inst->header_size = header_sz;
@@ -235,7 +235,7 @@ namespace brw {
             const vec4_builder ubld = bld.exec_all();
             const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
 
-            ubld.MOV(dst, src_reg(0));
+            ubld.MOV(dst, brw_imm_d(0));
 
             if (bld.shader->devinfo->gen == 7 &&
                 !bld.shader->devinfo->is_haswell) {
@@ -243,7 +243,7 @@ namespace brw {
                 * have no SIMD4x2 variant.  We only use the two X channels
                 * in that case, mask everything else out.
                 */
-               ubld.MOV(writemask(dst, WRITEMASK_W), src_reg(0x11));
+               ubld.MOV(writemask(dst, WRITEMASK_W), brw_imm_d(0x11));
             }
 
             return src_reg(dst);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index fda3d7c4427..08a1f8bb77c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -408,7 +408,7 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
     * You should inspect the disasm output in order to verify that the MOV is
     * not optimized away.
     */
-   emit(MOV(tmp_dst, src_reg(0x12345678u)));
+   emit(MOV(tmp_dst, brw_imm_ud(0x12345678u)));
 #endif
 
    /* Give tmp the form below, where "." means untouched.
@@ -427,7 +427,7 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
     *   0xhhhh0000
     */
    tmp_src.swizzle = BRW_SWIZZLE_YYYY;
-   emit(SHL(dst, tmp_src, src_reg(16u)));
+   emit(SHL(dst, tmp_src, brw_imm_ud(16u)));
 
    /* Finally, give the write-channels of dst the form of packHalf2x16's
     * output:
@@ -466,10 +466,10 @@ vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
    src_reg tmp_src(tmp_dst);
 
    tmp_dst.writemask = WRITEMASK_X;
-   emit(AND(tmp_dst, src0, src_reg(0xffffu)));
+   emit(AND(tmp_dst, src0, brw_imm_ud(0xffffu)));
 
    tmp_dst.writemask = WRITEMASK_Y;
-   emit(SHR(tmp_dst, src0, src_reg(16u)));
+   emit(SHR(tmp_dst, src0, brw_imm_ud(16u)));
 
    dst.writemask = WRITEMASK_XY;
    emit(F16TO32(dst, tmp_src));
@@ -484,7 +484,7 @@ vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0)
     * vector float and a type-converting MOV.
     */
    dst_reg shift(this, glsl_type::uvec4_type);
-   emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78)));
+   emit(MOV(shift, brw_imm_vf4(0x00, 0x60, 0x70, 0x78)));
 
    dst_reg shifted(this, glsl_type::uvec4_type);
    src0.swizzle = BRW_SWIZZLE_XXXX;
@@ -494,7 +494,7 @@ vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0)
    dst_reg f(this, glsl_type::vec4_type);
    emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
 
-   emit(MUL(dst, src_reg(f), src_reg(1.0f / 255.0f)));
+   emit(MUL(dst, src_reg(f), brw_imm_f(1.0f / 255.0f)));
 }
 
 void
@@ -506,7 +506,7 @@ vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0)
     * vector float and a type-converting MOV.
     */
    dst_reg shift(this, glsl_type::uvec4_type);
-   emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78)));
+   emit(MOV(shift, brw_imm_vf4(0x00, 0x60, 0x70, 0x78)));
 
    dst_reg shifted(this, glsl_type::uvec4_type);
    src0.swizzle = BRW_SWIZZLE_XXXX;
@@ -517,11 +517,11 @@ vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0)
    emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
 
    dst_reg scaled(this, glsl_type::vec4_type);
-   emit(MUL(scaled, src_reg(f), src_reg(1.0f / 127.0f)));
+   emit(MUL(scaled, src_reg(f), brw_imm_f(1.0f / 127.0f)));
 
    dst_reg max(this, glsl_type::vec4_type);
-   emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), src_reg(-1.0f));
-   emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), src_reg(1.0f));
+   emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), brw_imm_f(-1.0f));
+   emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), brw_imm_f(1.0f));
 }
 
 void
@@ -532,7 +532,7 @@ vec4_visitor::emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0)
    inst->saturate = true;
 
    dst_reg scaled(this, glsl_type::vec4_type);
-   emit(MUL(scaled, src_reg(saturated), src_reg(255.0f)));
+   emit(MUL(scaled, src_reg(saturated), brw_imm_f(255.0f)));
 
    dst_reg rounded(this, glsl_type::vec4_type);
    emit(RNDE(rounded, src_reg(scaled)));
@@ -548,13 +548,13 @@ void
 vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
 {
    dst_reg max(this, glsl_type::vec4_type);
-   emit_minmax(BRW_CONDITIONAL_GE, max, src0, src_reg(-1.0f));
+   emit_minmax(BRW_CONDITIONAL_GE, max, src0, brw_imm_f(-1.0f));
 
    dst_reg min(this, glsl_type::vec4_type);
-   emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), src_reg(1.0f));
+   emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), brw_imm_f(1.0f));
 
    dst_reg scaled(this, glsl_type::vec4_type);
-   emit(MUL(scaled, src_reg(min), src_reg(127.0f)));
+   emit(MUL(scaled, src_reg(min), brw_imm_f(127.0f)));
 
    dst_reg rounded(this, glsl_type::vec4_type);
    emit(RNDE(rounded, src_reg(scaled)));
@@ -715,7 +715,7 @@ vec4_visitor::emit_lrp(const dst_reg &dst,
       x_times_one_minus_a.writemask = dst.writemask;
 
       emit(MUL(y_times_a, y, a));
-      emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
+      emit(ADD(one_minus_a, negate(a), brw_imm_f(1.0f)));
       emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
       return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
    }
@@ -849,7 +849,7 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
             coordinate));
 
    emit(MOV(dst_reg(MRF, param_base, coordinate_type, zero_mask),
-            src_reg(0)));
+            brw_imm_d(0)));
 
    emit(inst);
    return src_reg(inst->dst);
@@ -889,7 +889,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
     */
    if (op == ir_tex || op == ir_query_levels) {
       assert(lod.file == BAD_FILE);
-      lod = src_reg(0.0f);
+      lod = brw_imm_f(0.0f);
    }
 
    enum opcode opcode;
@@ -959,7 +959,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
 
       if (zero_mask != 0) {
          emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
-                  src_reg(0)));
+                  brw_imm_d(0)));
       }
       /* Load the shadow comparitor */
       if (shadow_comparitor.file != BAD_FILE && op != ir_txd && (op != ir_tg4 || offset_value.file == BAD_FILE)) {
@@ -1058,7 +1058,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
    if (op == ir_txs && is_cube_array) {
       emit_math(SHADER_OPCODE_INT_QUOTIENT,
                 writemask(inst->dst, WRITEMASK_Z),
-                src_reg(inst->dst), src_reg(6));
+                src_reg(inst->dst), brw_imm_d(6));
    }
 
    if (devinfo->gen == 6 && op == ir_tg4) {
@@ -1083,7 +1083,7 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
    dst_f.type = BRW_REGISTER_TYPE_F;
 
    /* Convert from UNORM to UINT */
-   emit(MUL(dst_f, src_reg(dst_f), src_reg((float)((1 << width) - 1))));
+   emit(MUL(dst_f, src_reg(dst_f), brw_imm_f((float)((1 << width) - 1))));
    emit(MOV(dst, src_reg(dst_f)));
 
    if (wa & WA_SIGN) {
@@ -1091,8 +1091,8 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
        * shifting the sign bit into place, then shifting back
        * preserving sign.
        */
-      emit(SHL(dst, src_reg(dst), src_reg(32 - width)));
-      emit(ASR(dst, src_reg(dst), src_reg(32 - width)));
+      emit(SHL(dst, src_reg(dst), brw_imm_d(32 - width)));
+      emit(ASR(dst, src_reg(dst), brw_imm_d(32 - width)));
    }
 }
 
@@ -1168,12 +1168,12 @@ vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest,
 
    if (zero_mask) {
       swizzled_result.writemask = zero_mask;
-      emit(MOV(swizzled_result, src_reg(0.0f)));
+      emit(MOV(swizzled_result, brw_imm_f(0.0f)));
    }
 
    if (one_mask) {
       swizzled_result.writemask = one_mask;
-      emit(MOV(swizzled_result, src_reg(1.0f)));
+      emit(MOV(swizzled_result, brw_imm_f(1.0f)));
    }
 }
 
@@ -1220,7 +1220,7 @@ vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
     */
    vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
                                  src_payload,
-                                 src_reg(surf_index), src_reg(atomic_op));
+                                 brw_imm_ud(surf_index), brw_imm_ud(atomic_op));
    inst->mlen = mlen;
 }
 
@@ -1240,7 +1240,7 @@ vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
     */
    vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
                                  src_reg(offset),
-                                 src_reg(surf_index), src_reg(1));
+                                 brw_imm_ud(surf_index), brw_imm_d(1));
    inst->mlen = 1;
 }
 
@@ -1281,14 +1281,14 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
       dst_reg header1_w = header1;
       header1_w.writemask = WRITEMASK_W;
 
-      emit(MOV(header1, 0u));
+      emit(MOV(header1, brw_imm_ud(0u)));
 
       if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
 	 src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
 
 	 current_annotation = "Point size";
-	 emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
-	 emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
+	 emit(MUL(header1_w, psiz, brw_imm_f((float)(1 << 11))));
+	 emit(AND(header1_w, src_reg(header1_w), brw_imm_d(0x7ff << 8)));
       }
 
       if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) {
@@ -1296,13 +1296,13 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
          dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
          dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
 
-         emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L));
-         emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0));
+         emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
+         emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, brw_imm_d(0));
          emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
 
-         emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L));
-         emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0));
-         emit(SHL(flags1, src_reg(flags1), src_reg(4)));
+         emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
+         emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, brw_imm_d(0));
+         emit(SHL(flags1, src_reg(flags1), brw_imm_d(4)));
          emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
       }
 
@@ -1319,20 +1319,20 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
           output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) {
          src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
          ndc_w.swizzle = BRW_SWIZZLE_WWWW;
-         emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
+         emit(CMP(dst_null_f(), ndc_w, brw_imm_f(0.0f), BRW_CONDITIONAL_L));
          vec4_instruction *inst;
-         inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
+         inst = emit(OR(header1_w, src_reg(header1_w), brw_imm_ud(1u << 6)));
          inst->predicate = BRW_PREDICATE_NORMAL;
          output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F;
-         inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
+         inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], brw_imm_f(0.0f)));
          inst->predicate = BRW_PREDICATE_NORMAL;
       }
 
       emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
    } else if (devinfo->gen < 6) {
-      emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
+      emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u)));
    } else {
-      emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
+      emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), brw_imm_d(0)));
       if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
          dst_reg reg_w = reg;
          reg_w.writemask = WRITEMASK_W;
@@ -1524,13 +1524,13 @@ vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst,
       src_reg index = src_reg(this, glsl_type::int_type);
 
       emit_before(block, inst, ADD(dst_reg(index), *reladdr,
-                                   src_reg(reg_offset)));
+                                   brw_imm_d(reg_offset)));
       emit_before(block, inst, MUL(dst_reg(index), index,
-                                   src_reg(message_header_scale)));
+                                   brw_imm_d(message_header_scale)));
 
       return index;
    } else {
-      return src_reg(reg_offset * message_header_scale);
+      return brw_imm_d(reg_offset * message_header_scale);
    }
 }
 
@@ -1542,24 +1542,24 @@ vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst,
       src_reg index = src_reg(this, glsl_type::int_type);
 
       emit_before(block, inst, ADD(dst_reg(index), *reladdr,
-                                   src_reg(reg_offset)));
+                                   brw_imm_d(reg_offset)));
 
       /* Pre-gen6, the message header uses byte offsets instead of vec4
        * (16-byte) offset units.
        */
       if (devinfo->gen < 6) {
-         emit_before(block, inst, MUL(dst_reg(index), index, src_reg(16)));
+         emit_before(block, inst, MUL(dst_reg(index), index, brw_imm_d(16)));
       }
 
       return index;
    } else if (devinfo->gen >= 8) {
       /* Store the offset in a GRF so we can send-from-GRF. */
       src_reg offset = src_reg(this, glsl_type::int_type);
-      emit_before(block, inst, MOV(dst_reg(offset), src_reg(reg_offset)));
+      emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset)));
       return offset;
    } else {
       int message_header_scale = devinfo->gen < 6 ? 16 : 1;
-      return src_reg(reg_offset * message_header_scale);
+      return brw_imm_d(reg_offset * message_header_scale);
    }
 }
 
@@ -1748,7 +1748,7 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
                                              reg_offset);
 
    emit_pull_constant_load_reg(temp,
-                               src_reg(index),
+                               brw_imm_ud(index),
                                offset,
                                block, inst);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
index 5dd4f98cecc..fd8be7d972c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -50,7 +50,7 @@ vec4_vs_visitor::emit_prolog()
             dst_reg dst = reg;
             dst.type = brw_type_for_base_type(glsl_type::vec4_type);
             dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1;
-            emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
+            emit(MUL(dst, src_reg(dst), brw_imm_f(1.0f / 65536.0f)));
          }
 
          /* Do sign recovery for 2101010 formats if required. */
@@ -58,8 +58,8 @@ vec4_vs_visitor::emit_prolog()
             if (sign_recovery_shift.file == BAD_FILE) {
                /* shift constant: <22,22,22,30> */
                sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
-               emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u)));
-               emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u)));
+               emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), brw_imm_ud(22u)));
+               emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), brw_imm_ud(30u)));
             }
 
             emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
@@ -87,16 +87,16 @@ vec4_vs_visitor::emit_prolog()
                   /* mul constant: 1 / (2^(b-1) - 1) */
                   es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
                   emit(MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ),
-                           src_reg(1.0f / ((1<<9) - 1))));
+                           brw_imm_f(1.0f / ((1<<9) - 1))));
                   emit(MOV(writemask(es3_normalize_factor, WRITEMASK_W),
-                           src_reg(1.0f / ((1<<1) - 1))));
+                           brw_imm_f(1.0f / ((1<<1) - 1))));
                }
 
                dst_reg dst = reg;
                dst.type = brw_type_for_base_type(glsl_type::vec4_type);
                emit(MOV(dst, src_reg(reg_d)));
                emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor)));
-               emit_minmax(BRW_CONDITIONAL_GE, dst, src_reg(dst), src_reg(-1.0f));
+               emit_minmax(BRW_CONDITIONAL_GE, dst, src_reg(dst), brw_imm_f(-1.0f));
             } else {
                /* The following equations are from the OpenGL 3.2 specification:
                 *
@@ -113,9 +113,9 @@ vec4_vs_visitor::emit_prolog()
                   /* 1 / (2^b - 1) for b=<10,10,10,2> */
                   normalize_factor = dst_reg(this, glsl_type::vec4_type);
                   emit(MOV(writemask(normalize_factor, WRITEMASK_XYZ),
-                           src_reg(1.0f / ((1<<10) - 1))));
+                           brw_imm_f(1.0f / ((1<<10) - 1))));
                   emit(MOV(writemask(normalize_factor, WRITEMASK_W),
-                           src_reg(1.0f / ((1<<2) - 1))));
+                           brw_imm_f(1.0f / ((1<<2) - 1))));
                }
 
                dst_reg dst = reg;
@@ -124,8 +124,8 @@ vec4_vs_visitor::emit_prolog()
 
                /* For signed normalization, we want the numerator to be 2c+1. */
                if (wa_flags & BRW_ATTRIB_WA_SIGN) {
-                  emit(MUL(dst, src_reg(dst), src_reg(2.0f)));
-                  emit(ADD(dst, src_reg(dst), src_reg(1.0f)));
+                  emit(MUL(dst, src_reg(dst), brw_imm_f(2.0f)));
+                  emit(ADD(dst, src_reg(dst), brw_imm_f(1.0f)));
                }
 
                emit(MUL(dst, src_reg(dst), src_reg(normalize_factor)));
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
index 2fef188c17e..3840ce0fe57 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -65,7 +65,7 @@ gen6_gs_visitor::emit_prolog()
                                  (prog_data->vue_map.num_slots + 1) *
                                  nir->info.gs.vertices_out);
    this->vertex_output_offset = src_reg(this, glsl_type::uint_type);
-   emit(MOV(dst_reg(this->vertex_output_offset), src_reg(0u)));
+   emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u)));
 
    /* MRF 1 will be the header for all messages (FF_SYNC and URB_WRITES),
     * so initialize it once to R0.
@@ -87,13 +87,13 @@ gen6_gs_visitor::emit_prolog()
     * headers.
     */
    this->first_vertex = src_reg(this, glsl_type::uint_type);
-   emit(MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START));
+   emit(MOV(dst_reg(this->first_vertex), brw_imm_ud(URB_WRITE_PRIM_START)));
 
    /* The FF_SYNC message requires to know the number of primitives generated,
     * so keep a counter for this.
     */
    this->prim_count = src_reg(this, glsl_type::uint_type);
-   emit(MOV(dst_reg(this->prim_count), 0u));
+   emit(MOV(dst_reg(this->prim_count), brw_imm_ud(0u)));
 
    if (gs_prog_data->gen6_xfb_enabled) {
       /* Create a virtual register to hold destination indices in SOL */
@@ -170,7 +170,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
       }
 
       emit(ADD(dst_reg(this->vertex_output_offset),
-               this->vertex_output_offset, 1u));
+               this->vertex_output_offset, brw_imm_ud(1u)));
    }
 
    /* Now buffer flags for this vertex */
@@ -181,9 +181,9 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
       /* If we are outputting points, then every vertex has PrimStart and
        * PrimEnd set.
        */
-      emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
-               URB_WRITE_PRIM_START | URB_WRITE_PRIM_END));
-      emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
+      emit(MOV(dst, brw_imm_d((_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
+                              URB_WRITE_PRIM_START | URB_WRITE_PRIM_END)));
+      emit(ADD(dst_reg(this->prim_count), this->prim_count, brw_imm_ud(1u)));
    } else {
       /* Otherwise, we can only set the PrimStart flag, which we have stored
        * in the first_vertex register. We will have to wait until we execute
@@ -191,11 +191,12 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
        * vertex.
        */
       emit(OR(dst, this->first_vertex,
-              (gs_prog_data->output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
-      emit(MOV(dst_reg(this->first_vertex), 0u));
+              brw_imm_ud(gs_prog_data->output_topology <<
+                         URB_WRITE_PRIM_TYPE_SHIFT)));
+      emit(MOV(dst_reg(this->first_vertex), brw_imm_ud(0u)));
    }
    emit(ADD(dst_reg(this->vertex_output_offset),
-            this->vertex_output_offset, 1u));
+            this->vertex_output_offset, brw_imm_ud(1u)));
 }
 
 void
@@ -218,10 +219,10 @@ gen6_gs_visitor::gs_end_primitive()
     * below).
     */
    unsigned num_output_vertices = nir->info.gs.vertices_out;
-   emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices + 1),
-            BRW_CONDITIONAL_L));
-   vec4_instruction *inst = emit(CMP(dst_null_d(),
-                                     this->vertex_count, 0u,
+   emit(CMP(dst_null_ud(), this->vertex_count,
+            brw_imm_ud(num_output_vertices + 1), BRW_CONDITIONAL_L));
+   vec4_instruction *inst = emit(CMP(dst_null_ud(),
+                                     this->vertex_count, brw_imm_ud(0u),
                                      BRW_CONDITIONAL_NEQ));
    inst->predicate = BRW_PREDICATE_NORMAL;
    emit(IF(BRW_PREDICATE_NORMAL));
@@ -231,19 +232,19 @@ gen6_gs_visitor::gs_end_primitive()
        * vertex.
        */
       src_reg offset(this, glsl_type::uint_type);
-      emit(ADD(dst_reg(offset), this->vertex_output_offset, src_reg(-1)));
+      emit(ADD(dst_reg(offset), this->vertex_output_offset, brw_imm_d(-1)));
 
       src_reg dst(this->vertex_output);
       dst.reladdr = ralloc(mem_ctx, src_reg);
       memcpy(dst.reladdr, &offset, sizeof(src_reg));
 
-      emit(OR(dst_reg(dst), dst, URB_WRITE_PRIM_END));
-      emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
+      emit(OR(dst_reg(dst), dst, brw_imm_d(URB_WRITE_PRIM_END)));
+      emit(ADD(dst_reg(this->prim_count), this->prim_count, brw_imm_ud(1u)));
 
       /* Set the first vertex flag to indicate that the next vertex will start
        * a primitive.
        */
-      emit(MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START));
+      emit(MOV(dst_reg(this->first_vertex), brw_imm_d(URB_WRITE_PRIM_START)));
    }
    emit(BRW_OPCODE_ENDIF);
 }
@@ -262,7 +263,8 @@ gen6_gs_visitor::emit_urb_write_header(int mrf)
     */
    src_reg flags_offset(this, glsl_type::uint_type);
    emit(ADD(dst_reg(flags_offset),
-            this->vertex_output_offset, src_reg(prog_data->vue_map.num_slots)));
+            this->vertex_output_offset,
+            brw_imm_d(prog_data->vue_map.num_slots)));
 
    src_reg flags_data(this->vertex_output);
    flags_data.reladdr = ralloc(mem_ctx, src_reg);
@@ -321,7 +323,7 @@ gen6_gs_visitor::emit_thread_end()
     * points because in the point case we set PrimEnd on all vertices.
     */
    if (nir->info.gs.output_primitive != GL_POINTS) {
-      emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z));
+      emit(CMP(dst_null_ud(), this->first_vertex, brw_imm_ud(0u), BRW_CONDITIONAL_Z));
       emit(IF(BRW_PREDICATE_NORMAL));
       gs_end_primitive();
       emit(BRW_OPCODE_ENDIF);
@@ -347,7 +349,7 @@ gen6_gs_visitor::emit_thread_end()
    int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
 
    /* Issue the FF_SYNC message and obtain the initial VUE handle. */
-   emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_G));
+   emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_G));
    emit(IF(BRW_PREDICATE_NORMAL));
    {
       this->current_annotation = "gen6 thread end: ff_sync";
@@ -364,15 +366,15 @@ gen6_gs_visitor::emit_thread_end()
                      dst_reg(this->temp), this->prim_count, this->svbi);
       } else {
          inst = emit(GS_OPCODE_FF_SYNC,
-                     dst_reg(this->temp), this->prim_count, src_reg(0u));
+                     dst_reg(this->temp), this->prim_count, brw_imm_ud(0u));
       }
       inst->base_mrf = base_mrf;
 
       /* Loop over all buffered vertices and emit URB write messages */
       this->current_annotation = "gen6 thread end: urb writes init";
       src_reg vertex(this, glsl_type::uint_type);
-      emit(MOV(dst_reg(vertex), 0u));
-      emit(MOV(dst_reg(this->vertex_output_offset), 0u));
+      emit(MOV(dst_reg(vertex), brw_imm_ud(0u)));
+      emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u)));
 
       this->current_annotation = "gen6 thread end: urb writes";
       emit(BRW_OPCODE_DO);
@@ -416,7 +418,7 @@ gen6_gs_visitor::emit_thread_end()
 
                mrf++;
                emit(ADD(dst_reg(this->vertex_output_offset),
-                        this->vertex_output_offset, 1u));
+                        this->vertex_output_offset, brw_imm_ud(1u)));
 
                /* If this was max_usable_mrf, we can't fit anything more into
                 * this URB WRITE. Same if we reached the max. message length.
@@ -437,9 +439,9 @@ gen6_gs_visitor::emit_thread_end()
           * writing the next vertex.
           */
          emit(ADD(dst_reg(this->vertex_output_offset),
-                  this->vertex_output_offset, 1u));
+                  this->vertex_output_offset, brw_imm_ud(1u)));
 
-         emit(ADD(dst_reg(vertex), vertex, 1u));
+         emit(ADD(dst_reg(vertex), vertex, brw_imm_ud(1u)));
       }
       emit(BRW_OPCODE_WHILE);
 
@@ -468,8 +470,8 @@ gen6_gs_visitor::emit_thread_end()
    if (gs_prog_data->gen6_xfb_enabled) {
       /* When emitting EOT, set SONumPrimsWritten Increment Value. */
       src_reg data(this, glsl_type::uint_type);
-      emit(AND(dst_reg(data), this->sol_prim_written, src_reg(0xffffu)));
-      emit(SHL(dst_reg(data), data, src_reg(16u)));
+      emit(AND(dst_reg(data), this->sol_prim_written, brw_imm_ud(0xffffu)));
+      emit(SHL(dst_reg(data), data, brw_imm_ud(16u)));
       emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data);
    }
 
@@ -588,8 +590,8 @@ gen6_gs_visitor::xfb_write()
 
    this->current_annotation = "gen6 thread end: svb writes init";
 
-   emit(MOV(dst_reg(this->vertex_output_offset), 0u));
-   emit(MOV(dst_reg(this->sol_prim_written), 0u));
+   emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u)));
+   emit(MOV(dst_reg(this->sol_prim_written), brw_imm_ud(0u)));
 
    /* Check that at least one primitive can be written
     *
@@ -600,7 +602,7 @@ gen6_gs_visitor::xfb_write()
     * transform feedback is in interleaved or separate attribs mode.
     */
    src_reg sol_temp(this, glsl_type::uvec4_type);
-   emit(ADD(dst_reg(sol_temp), this->svbi, src_reg(num_verts)));
+   emit(ADD(dst_reg(sol_temp), this->svbi, brw_imm_ud(num_verts)));
 
    /* Compare SVBI calculated number with the maximum value, which is
     * in R1.4 (previously saved in this->max_svbi) for gen6.
@@ -623,7 +625,7 @@ gen6_gs_visitor::xfb_write()
 
    /* Write transform feedback data for all processed vertices. */
    for (int i = 0; i < (int)nir->info.gs.vertices_out; i++) {
-      emit(MOV(dst_reg(sol_temp), i));
+      emit(MOV(dst_reg(sol_temp), brw_imm_d(i)));
       emit(CMP(dst_null_d(), sol_temp, this->vertex_count,
                BRW_CONDITIONAL_L));
       emit(IF(BRW_PREDICATE_NORMAL));
@@ -644,8 +646,8 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
    /* Check for buffer overflow: we need room to write the complete primitive
     * (all vertices). Otherwise, avoid writing any vertices for it
     */
-   emit(ADD(dst_reg(sol_temp), this->sol_prim_written, 1u));
-   emit(MUL(dst_reg(sol_temp), sol_temp, src_reg(num_verts)));
+   emit(ADD(dst_reg(sol_temp), this->sol_prim_written, brw_imm_ud(1u)));
+   emit(MUL(dst_reg(sol_temp), sol_temp, brw_imm_ud(num_verts)));
    emit(ADD(dst_reg(sol_temp), sol_temp, this->svbi));
    emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
    emit(IF(BRW_PREDICATE_NORMAL));
@@ -683,7 +685,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
          src_reg data(this->vertex_output);
          data.reladdr = ralloc(mem_ctx, src_reg);
          int offset = get_vertex_output_offset_for_varying(vertex, varying);
-         emit(MOV(dst_reg(this->vertex_output_offset), offset));
+         emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_d(offset)));
          memcpy(data.reladdr, &this->vertex_output_offset, sizeof(src_reg));
          data.type = output_reg[varying].type;
 
@@ -710,9 +712,9 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
              */
             emit(ADD(dst_reg(this->destination_indices),
                      this->destination_indices,
-                     src_reg(num_verts)));
+                     brw_imm_ud(num_verts)));
             emit(ADD(dst_reg(this->sol_prim_written),
-                     this->sol_prim_written, 1u));
+                     this->sol_prim_written, brw_imm_ud(1u)));
          }
 
       }
diff --git a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
index 9aa2fcc7907..e5e566c60bc 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
@@ -145,7 +145,7 @@ TEST_F(cmod_propagation_test, basic)
    dst_reg dest = dst_reg(v, glsl_type::float_type);
    src_reg src0 = src_reg(v, glsl_type::float_type);
    src_reg src1 = src_reg(v, glsl_type::float_type);
-   src_reg zero(0.0f);
+   src_reg zero(brw_imm_f(0.0f));
    dst_reg dest_null = bld.null_reg_f();
    dest_null.writemask = WRITEMASK_X;
 
@@ -181,7 +181,7 @@ TEST_F(cmod_propagation_test, basic_different_dst_writemask)
    dst_reg dest = dst_reg(v, glsl_type::float_type);
    src_reg src0 = src_reg(v, glsl_type::float_type);
    src_reg src1 = src_reg(v, glsl_type::float_type);
-   src_reg zero(0.0f);
+   src_reg zero(brw_imm_f(0.0f));
    dst_reg dest_null = bld.null_reg_f();
 
    bld.ADD(dest, src0, src1);
@@ -217,8 +217,8 @@ TEST_F(cmod_propagation_test, andz_one)
    const vec4_builder bld = vec4_builder(v).at_end();
    dst_reg dest = dst_reg(v, glsl_type::int_type);
    src_reg src0 = src_reg(v, glsl_type::float_type);
-   src_reg zero(0.0f);
-   src_reg one(1);
+   src_reg zero(brw_imm_f(0.0f));
+   src_reg one(brw_imm_d(1));
 
    bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
    set_condmod(BRW_CONDITIONAL_Z,
@@ -253,7 +253,7 @@ TEST_F(cmod_propagation_test, non_cmod_instruction)
    const vec4_builder bld = vec4_builder(v).at_end();
    dst_reg dest = dst_reg(v, glsl_type::uint_type);
    src_reg src0 = src_reg(v, glsl_type::uint_type);
-   src_reg zero(0u);
+   src_reg zero(brw_imm_ud(0u));
    bld.FBL(dest, src0);
    bld.CMP(bld.null_reg_ud(), src_reg(dest), zero, BRW_CONDITIONAL_GE);
 
@@ -288,7 +288,7 @@ TEST_F(cmod_propagation_test, intervening_flag_write)
    src_reg src0 = src_reg(v, glsl_type::float_type);
    src_reg src1 = src_reg(v, glsl_type::float_type);
    src_reg src2 = src_reg(v, glsl_type::float_type);
-   src_reg zero(0.0f);
+   src_reg zero(brw_imm_f(0.0f));
    bld.ADD(dest, src0, src1);
    bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE);
    bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_GE);
@@ -328,7 +328,7 @@ TEST_F(cmod_propagation_test, intervening_flag_read)
    src_reg src0 = src_reg(v, glsl_type::float_type);
    src_reg src1 = src_reg(v, glsl_type::float_type);
    src_reg src2 = src_reg(v, glsl_type::float_type);
-   src_reg zero(0.0f);
+   src_reg zero(brw_imm_f(0.0f));
    bld.ADD(dest0, src0, src1);
    set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
    bld.CMP(bld.null_reg_f(), src_reg(dest0), zero, BRW_CONDITIONAL_GE);
@@ -367,7 +367,7 @@ TEST_F(cmod_propagation_test, intervening_dest_write)
    src_reg src0 = src_reg(v, glsl_type::float_type);
    src_reg src1 = src_reg(v, glsl_type::float_type);
    src_reg src2 = src_reg(v, glsl_type::vec2_type);
-   src_reg zero(0.0f);
+   src_reg zero(brw_imm_f(0.0f));
    bld.ADD(offset(dest, 2), src0, src1);
    bld.emit(SHADER_OPCODE_TEX, dest, src2)
       ->regs_written = 4;
@@ -409,7 +409,7 @@ TEST_F(cmod_propagation_test, intervening_flag_read_same_value)
    src_reg src0 = src_reg(v, glsl_type::float_type);
    src_reg src1 = src_reg(v, glsl_type::float_type);
    src_reg src2 = src_reg(v, glsl_type::float_type);
-   src_reg zero(0.0f);
+   src_reg zero(brw_imm_f(0.0f));
    dst_reg dest_null = bld.null_reg_f();
    dest_null.writemask = WRITEMASK_X;
 
@@ -449,7 +449,7 @@ TEST_F(cmod_propagation_test, negate)
    dst_reg dest = dst_reg(v, glsl_type::float_type);
    src_reg src0 = src_reg(v, glsl_type::float_type);
    src_reg src1 = src_reg(v, glsl_type::float_type);
-   src_reg zero(0.0f);
+   src_reg zero(brw_imm_f(0.0f));
    bld.ADD(dest, src0, src1);
    src_reg tmp_src = src_reg(dest);
    tmp_src.negate = true;
@@ -521,7 +521,7 @@ TEST_F(cmod_propagation_test, different_types_cmod_with_zero)
    dst_reg dest = dst_reg(v, glsl_type::int_type);
    src_reg src0 = src_reg(v, glsl_type::int_type);
    src_reg src1 = src_reg(v, glsl_type::int_type);
-   src_reg zero(0.0f);
+   src_reg zero(brw_imm_f(0.0f));
    bld.ADD(dest, src0, src1);
    bld.CMP(bld.null_reg_f(), retype(src_reg(dest), BRW_REGISTER_TYPE_F), zero,
            BRW_CONDITIONAL_GE);
@@ -555,8 +555,8 @@ TEST_F(cmod_propagation_test, andnz_non_one)
    const vec4_builder bld = vec4_builder(v).at_end();
    dst_reg dest = dst_reg(v, glsl_type::int_type);
    src_reg src0 = src_reg(v, glsl_type::float_type);
-   src_reg zero(0.0f);
-   src_reg nonone(38);
+   src_reg zero(brw_imm_f(0.0f));
+   src_reg nonone(brw_imm_d(38));
 
    bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
    set_condmod(BRW_CONDITIONAL_NZ,
@@ -594,7 +594,7 @@ TEST_F(cmod_propagation_test, basic_vec4)
    dst_reg dest = dst_reg(v, glsl_type::vec4_type);
    src_reg src0 = src_reg(v, glsl_type::vec4_type);
    src_reg src1 = src_reg(v, glsl_type::vec4_type);
-   src_reg zero(0.0f);
+   src_reg zero(brw_imm_f(0.0f));
 
    bld.MUL(dest, src0, src1);
    bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_NZ);
@@ -628,7 +628,7 @@ TEST_F(cmod_propagation_test, basic_vec4_different_dst_writemask)
    dest.writemask = WRITEMASK_X;
    src_reg src0 = src_reg(v, glsl_type::vec4_type);
    src_reg src1 = src_reg(v, glsl_type::vec4_type);
-   src_reg zero(0.0f);
+   src_reg zero(brw_imm_f(0.0f));
    dst_reg dest_null = bld.null_reg_f();
 
    bld.MUL(dest, src0, src1);
@@ -668,7 +668,7 @@ TEST_F(cmod_propagation_test, mad_one_component_vec4)
    src_reg src2 = src_reg(v, glsl_type::vec4_type);
    src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX;
    src2.negate = true;
-   src_reg zero(0.0f);
+   src_reg zero(brw_imm_f(0.0f));
    src_reg tmp(dest);
    tmp.swizzle = BRW_SWIZZLE_XXXX;
    dst_reg dest_null = bld.null_reg_f();
@@ -710,7 +710,7 @@ TEST_F(cmod_propagation_test, mad_more_one_component_vec4)
    src_reg src2 = src_reg(v, glsl_type::vec4_type);
    src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX;
    src2.negate = true;
-   src_reg zero(0.0f);
+   src_reg zero(brw_imm_f(0.0f));
    src_reg tmp(dest);
    tmp.swizzle = BRW_SWIZZLE_XXXX;
    dst_reg dest_null = bld.null_reg_f();
@@ -751,7 +751,7 @@ TEST_F(cmod_propagation_test, cmp_mov_vec4)
    src_reg src0 = src_reg(v, glsl_type::ivec4_type);
    src0.swizzle = BRW_SWIZZLE_XXXX;
    src0.file = UNIFORM;
-   src_reg nonone = retype(src_reg(16), BRW_REGISTER_TYPE_D);
+   src_reg nonone = retype(brw_imm_d(16), BRW_REGISTER_TYPE_D);
    src_reg mov_src = src_reg(dest);
    mov_src.swizzle = BRW_SWIZZLE_XXXX;
    dst_reg dest_null = bld.null_reg_d();
@@ -790,7 +790,7 @@ TEST_F(cmod_propagation_test, mul_cmp_different_channels_vec4)
    dst_reg dest = dst_reg(v, glsl_type::vec4_type);
    src_reg src0 = src_reg(v, glsl_type::vec4_type);
    src_reg src1 = src_reg(v, glsl_type::vec4_type);
-   src_reg zero(0.0f);
+   src_reg zero(brw_imm_f(0.0f));
    src_reg cmp_src = src_reg(dest);
    cmp_src.swizzle = BRW_SWIZZLE4(0,1,3,2);
 
diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
index a1f91d9c56a..ede409b6919 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
@@ -162,7 +162,7 @@ TEST_F(copy_propagation_test, test_swizzle_writemask)
                                                       SWIZZLE_X,
                                                       SWIZZLE_Z))));
 
-   v->emit(v->MOV(writemask(a, WRITEMASK_XYZ), src_reg(1.0f)));
+   v->emit(v->MOV(writemask(a, WRITEMASK_XYZ), brw_imm_f(1.0f)));
 
    vec4_instruction *test_mov =
       v->MOV(c, swizzle(src_reg(b), BRW_SWIZZLE4(SWIZZLE_W,
diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
index d84e2e98ec0..90a6bc3618f 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
@@ -135,7 +135,7 @@ TEST_F(register_coalesce_test, test_compute_to_mrf)
    m0.writemask = WRITEMASK_X;
    m0.type = BRW_REGISTER_TYPE_F;
 
-   vec4_instruction *mul = v->emit(v->MUL(temp, something, src_reg(1.0f)));
+   vec4_instruction *mul = v->emit(v->MUL(temp, something, brw_imm_f(1.0f)));
    v->emit(v->MOV(m0, src_reg(temp)));
 
    register_coalesce(v);
@@ -159,7 +159,7 @@ TEST_F(register_coalesce_test, test_multiple_use)
    m1.type = BRW_REGISTER_TYPE_F;
 
    src_reg src = src_reg(temp);
-   vec4_instruction *mul = v->emit(v->MUL(temp, something, src_reg(1.0f)));
+   vec4_instruction *mul = v->emit(v->MUL(temp, something, brw_imm_f(1.0f)));
    src.swizzle = BRW_SWIZZLE_XXXX;
    v->emit(v->MOV(m0, src));
    src.swizzle = BRW_SWIZZLE_XYZW;

From a5b3115f0a9ede775b332b1a669de570668e871c Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 2 Nov 2015 12:25:24 -0800
Subject: [PATCH 119/335] i965: Drop IMM fs_reg/src_reg -> brw_reg conversions.

The previous two commits make this unnecessary.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 .../drivers/dri/i965/brw_fs_generator.cpp     | 32 +------------------
 src/mesa/drivers/dri/i965/brw_vec4.cpp        |  6 +---
 2 files changed, 2 insertions(+), 36 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index e5a286a763b..77969c4dc12 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -89,39 +89,9 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
       brw_reg.abs = reg->abs;
       brw_reg.negate = reg->negate;
       break;
-   case IMM:
-      assert(reg->stride == ((reg->type == BRW_REGISTER_TYPE_V ||
-                              reg->type == BRW_REGISTER_TYPE_UV ||
-                              reg->type == BRW_REGISTER_TYPE_VF) ? 1 : 0));
-
-      switch (reg->type) {
-      case BRW_REGISTER_TYPE_F:
-	 brw_reg = brw_imm_f(reg->f);
-	 break;
-      case BRW_REGISTER_TYPE_D:
-	 brw_reg = brw_imm_d(reg->d);
-	 break;
-      case BRW_REGISTER_TYPE_UD:
-	 brw_reg = brw_imm_ud(reg->ud);
-	 break;
-      case BRW_REGISTER_TYPE_W:
-	 brw_reg = brw_imm_w(reg->d);
-	 break;
-      case BRW_REGISTER_TYPE_UW:
-	 brw_reg = brw_imm_uw(reg->ud);
-	 break;
-      case BRW_REGISTER_TYPE_VF:
-         brw_reg = brw_imm_vf(reg->ud);
-         break;
-      case BRW_REGISTER_TYPE_V:
-         brw_reg = brw_imm_v(reg->ud);
-         break;
-      default:
-	 unreachable("not reached");
-      }
-      break;
    case ARF:
    case FIXED_GRF:
+   case IMM:
       brw_reg = *static_cast<struct brw_reg *>(reg);
       break;
    case BAD_FILE:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 06b70778138..44893e3e593 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1719,11 +1719,6 @@ vec4_visitor::convert_to_hw_regs()
             reg.negate = src.negate;
             break;
 
-         case IMM:
-            reg = brw_imm_reg(src.type);
-            reg.ud = src.ud;
-            break;
-
          case UNIFORM:
             reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg +
                                       (src.nr + src.reg_offset) / 2,
@@ -1740,6 +1735,7 @@ vec4_visitor::convert_to_hw_regs()
 
          case ARF:
          case FIXED_GRF:
+         case IMM:
             continue;
 
          case BAD_FILE:

From 9e40a621c177d595ffd1cf094246219e7067d768 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Thu, 19 Nov 2015 09:51:02 +0100
Subject: [PATCH 120/335] nv50: add NV84_3D macro

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 +-
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c    | 4 ++--
 src/gallium/drivers/nouveau/nv50/nv50_winsys.h | 1 +
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 4e7201d7dd9..cc7984d307b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -686,7 +686,7 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
    BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
    PUSH_DATA (push, 0);
    if (screen->base.class_3d >= NV84_3D_CLASS) {
-      BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1);
+      BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
       PUSH_DATA (push, 0);
    }
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 9aa593f919e..ac0c4d99dbb 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -487,7 +487,7 @@ nv50_draw_arrays(struct nv50_context *nv50,
       BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
       PUSH_DATA (push, 0);
       if (nv50->screen->base.class_3d >= NV84_3D_CLASS) {
-         BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1);
+         BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
          PUSH_DATA (push, 0);
       }
       nv50->state.index_bias = 0;
@@ -613,7 +613,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten,
       BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
       PUSH_DATA (push, index_bias);
       if (nv50->screen->base.class_3d >= NV84_3D_CLASS) {
-         BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1);
+         BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
          PUSH_DATA (push, index_bias);
       }
       nv50->state.index_bias = index_bias;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
index 76f1b41ea70..68002305d72 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
@@ -49,6 +49,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
 
 #define SUBC_3D(m) 3, (m)
 #define NV50_3D(n) SUBC_3D(NV50_3D_##n)
+#define NV84_3D(n) SUBC_3D(NV84_3D_##n)
 #define NVA0_3D(n) SUBC_3D(NVA0_3D_##n)
 
 #define SUBC_2D(m) 4, (m)

From 0cfc1304bece9cbc17a39b009956b2830c6fd2f5 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Thu, 19 Nov 2015 09:51:03 +0100
Subject: [PATCH 121/335] nv50: allow using inline vertex data submit when
 gl_VertexID is used

The hardware can actually generates vertexid when vertices come from
a client-side buffer like when glDrawElements is used.

This doesn't fix (or break) any piglit tests but it improves the
previous attempt of Ilia (c830d19 "nv50: avoid using inline vertex
data submit when gl_VertexID is used")

The only disadvantage is that only works on G84+, but we don't really
care of that weird and old NV50 chipset.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../drivers/nouveau/nv50/nv50_program.c       |  3 +-
 .../drivers/nouveau/nv50/nv50_program.h       |  2 +-
 src/gallium/drivers/nouveau/nv50/nv50_push.c  | 42 ++++++++++++++++++-
 .../nouveau/nv50/nv50_state_validate.c        |  3 +-
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c   | 11 +----
 5 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 48057d20f4e..a4b8ddfda95 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -66,7 +66,6 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
       case TGSI_SEMANTIC_VERTEXID:
          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
-         prog->vp.vertexid = 1;
          continue;
       default:
          break;
@@ -383,6 +382,8 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
    prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
    prog->tls_space = info->bin.tlsSpace;
 
+   prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
+
    if (prog->type == PIPE_SHADER_FRAGMENT) {
       if (info->prop.fp.writesDepth) {
          prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index f0016707163..1de5122a56e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -76,9 +76,9 @@ struct nv50_program {
       ubyte psiz;        /* output slot of point size */
       ubyte bfc[2];      /* indices into varying for FFC (FP) or BFC (VP) */
       ubyte edgeflag;
-      ubyte vertexid;
       ubyte clpd[2];     /* output slot of clip distance[i]'s 1st component */
       ubyte clpd_nr;
+      bool need_vertex_id;
    } vp;
 
    struct {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c
index f31eaa0e314..cbef95d07f6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_push.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c
@@ -24,6 +24,10 @@ struct push_context {
    struct translate *translate;
 
    bool primitive_restart;
+
+   bool need_vertex_id;
+   int32_t index_bias;
+
    uint32_t prim;
    uint32_t restart_index;
    uint32_t instance_id;
@@ -74,6 +78,11 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
 
       size = ctx->vertex_words * nr;
 
+      if (unlikely(ctx->need_vertex_id)) {
+         BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
+         PUSH_DATA (ctx->push, *elts + ctx->index_bias);
+      }
+
       BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
 
       ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id,
@@ -107,6 +116,11 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
 
       size = ctx->vertex_words * nr;
 
+      if (unlikely(ctx->need_vertex_id)) {
+         BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
+         PUSH_DATA (ctx->push, *elts + ctx->index_bias);
+      }
+
       BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
 
       ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id,
@@ -140,6 +154,11 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
 
       size = ctx->vertex_words * nr;
 
+      if (unlikely(ctx->need_vertex_id)) {
+         BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
+         PUSH_DATA (ctx->push, *elts + ctx->index_bias);
+      }
+
       BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
 
       ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id,
@@ -161,10 +180,18 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
 static void
 emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
 {
+   uint32_t elts = 0;
+
    while (count) {
       unsigned push = MIN2(count, ctx->packet_vertex_limit);
       unsigned size = ctx->vertex_words * push;
 
+      if (unlikely(ctx->need_vertex_id)) {
+         /* For non-indexed draws, gl_VertexID goes up after each vertex. */
+         BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
+         PUSH_DATA (ctx->push, elts++);
+      }
+
       BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
 
       ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id,
@@ -216,7 +243,14 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
 
    ctx.push = nv50->base.pushbuf;
    ctx.translate = nv50->vertex->translate;
-   ctx.packet_vertex_limit = nv50->vertex->packet_vertex_limit;
+
+   ctx.need_vertex_id = nv50->screen->base.class_3d >= NV84_3D_CLASS &&
+      nv50->vertprog->vp.need_vertex_id && (nv50->vertex->num_elements < 32);
+   ctx.index_bias = info->index_bias;
+
+   /* For indexed draws, gl_VertexID must be emitted for every vertex. */
+   ctx.packet_vertex_limit =
+      ctx.need_vertex_id ? 1 : nv50->vertex->packet_vertex_limit;
    ctx.vertex_words = nv50->vertex->vertex_size;
 
    assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
@@ -307,4 +341,10 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
       ctx.instance_id++;
       ctx.prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
    }
+
+   if (unlikely(ctx.need_vertex_id)) {
+      /* Reset gl_VertexID to prevent future indexed draws to be confused. */
+      BEGIN_NV04(ctx.push, NV84_3D(VERTEX_ID_BASE), 1);
+      PUSH_DATA (ctx.push, nv50->state.index_bias);
+   }
 }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index b6181edf24f..02a759c23ad 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -503,8 +503,7 @@ static struct state_validate {
     { nv50_validate_samplers,      NV50_NEW_SAMPLERS },
     { nv50_stream_output_validate, NV50_NEW_STRMOUT |
                                    NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
-    { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS |
-                                   NV50_NEW_VERTPROG },
+    { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
     { nv50_validate_min_samples,   NV50_NEW_MIN_SAMPLES },
 };
 #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index ac0c4d99dbb..85878d5fcc7 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -294,8 +294,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
    uint64_t addrs[PIPE_MAX_ATTRIBS];
    uint32_t limits[PIPE_MAX_ATTRIBS];
    struct nouveau_pushbuf *push = nv50->base.pushbuf;
-   struct nv50_vertex_stateobj dummy = {};
-   struct nv50_vertex_stateobj *vertex = nv50->vertex ? nv50->vertex : &dummy;
+   struct nv50_vertex_stateobj *vertex = nv50->vertex;
    struct pipe_vertex_buffer *vb;
    struct nv50_vertex_element *ve;
    uint32_t mask;
@@ -303,14 +302,6 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
    unsigned i;
    const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts);
 
-   /* A vertexid is not generated for inline data uploads. Have to use a
-    * VBO. This check must come after the vertprog has been validated,
-    * otherwise vertexid may be unset.
-    */
-   assert(nv50->vertprog->translated);
-   if (nv50->vertprog->vp.vertexid)
-      nv50->vbo_push_hint = 0;
-
    if (unlikely(vertex->need_conversion))
       nv50->vbo_fifo = ~0;
    else

From 0a4462ad6ee921ed805ad51e330b819b95ee90d6 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Wed, 18 Nov 2015 23:54:25 -0500
Subject: [PATCH 122/335] freedreno/a4xx: support lod_bias

The lower layers assume that we support this, and it's been core since
GL 1.4. This fixes a slew of piglit tests, especially around
tex-miplevel-selection.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h    | 6 ++++++
 src/gallium/drivers/freedreno/a4xx/fd4_texture.c | 1 +
 2 files changed, 7 insertions(+)

diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 76f525cb0c2..07d0db16cde 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -2741,6 +2741,12 @@ static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val)
 {
 	return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK;
 }
+#define A4XX_TEX_SAMP_0_LOD_BIAS__MASK				0xfff80000
+#define A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT				19
+static inline uint32_t A4XX_TEX_SAMP_0_LOD_BIAS(float val)
+{
+	return ((((int32_t)(val * 256.0))) << A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A4XX_TEX_SAMP_0_LOD_BIAS__MASK;
+}
 
 #define REG_A4XX_TEX_SAMP_1					0x00000001
 #define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK			0x0000000e
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index 297854f6505..886b4547929 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -127,6 +127,7 @@ fd4_sampler_state_create(struct pipe_context *pctx,
 		COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS);
 
 	if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+		so->texsamp0 |= A4XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
 		so->texsamp1 |=
 			A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
 			A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);

From b17a405609156dc554d2d7c148c02e24bfdecaca Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 19 Nov 2015 00:06:46 -0500
Subject: [PATCH 123/335] freedreno/a4xx: allow seamless cubemap filtering to
 be enabled per-texture

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h    | 1 +
 src/gallium/drivers/freedreno/a4xx/fd4_texture.c | 1 +
 src/gallium/drivers/freedreno/freedreno_screen.c | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 07d0db16cde..ec002f67980 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -2755,6 +2755,7 @@ static inline uint32_t A4XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val
 {
 	return ((val) << A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK;
 }
+#define A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF			0x00000010
 #define A4XX_TEX_SAMP_1_UNNORM_COORDS				0x00000020
 #define A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR			0x00000040
 #define A4XX_TEX_SAMP_1_MAX_LOD__MASK				0x000fff00
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index 886b4547929..00c257b78e5 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -124,6 +124,7 @@ fd4_sampler_state_create(struct pipe_context *pctx,
 
 	so->texsamp1 =
 //		COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
+		COND(!cso->seamless_cube_map, A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
 		COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS);
 
 	if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 456917730d6..8ab78d8187c 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -178,6 +178,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_CONDITIONAL_RENDER:
 	case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
 	case PIPE_CAP_FAKE_SW_MSAA:
+	case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
 		return is_a3xx(screen) || is_a4xx(screen);
 
 	case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
@@ -193,7 +194,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 
 	case PIPE_CAP_DEPTH_CLIP_DISABLE:
 	case PIPE_CAP_CLIP_HALFZ:
-	case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
 		return is_a3xx(screen);
 
 	case PIPE_CAP_TEXTURE_FLOAT_LINEAR:

From a05e5491c367cdb7db93a2cc47a839329b90e513 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 19 Nov 2015 00:32:39 -0500
Subject: [PATCH 124/335] freedreno/a4xx: add depth clamp and halfz clip

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h       | 2 ++
 src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c | 5 +++++
 src/gallium/drivers/freedreno/freedreno_screen.c    | 6 ++----
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index ec002f67980..ef4f53fa7e5 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -2059,6 +2059,8 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val)
 #define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3				0x00000c8b
 
 #define REG_A4XX_GRAS_CL_CLIP_CNTL				0x00002000
+#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE			0x00008000
+#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z			0x00400000
 
 #define REG_A4XX_GRAS_CLEAR_CNTL				0x00002003
 #define A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR			0x00000001
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
index dc7e98b149d..d894b6b9e09 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
@@ -90,5 +90,10 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
 	if (cso->offset_tri)
 		so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
 
+	if (!cso->depth_clip)
+		so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE;
+	if (cso->clip_halfz)
+		so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z;
+
 	return so;
 }
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 8ab78d8187c..adb0982132d 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -179,6 +179,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
 	case PIPE_CAP_FAKE_SW_MSAA:
 	case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+	case PIPE_CAP_DEPTH_CLIP_DISABLE:
+	case PIPE_CAP_CLIP_HALFZ:
 		return is_a3xx(screen) || is_a4xx(screen);
 
 	case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
@@ -192,10 +194,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 		if (is_a4xx(screen)) return 16383;
 		return 0;
 
-	case PIPE_CAP_DEPTH_CLIP_DISABLE:
-	case PIPE_CAP_CLIP_HALFZ:
-		return is_a3xx(screen);
-
 	case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
 	case PIPE_CAP_CUBE_MAP_ARRAY:
 		return is_a4xx(screen);

From 769b3ab6c5111f50502f9df0e8930c8d13f475c7 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 19 Nov 2015 01:37:14 -0500
Subject: [PATCH 125/335] freedreno/a4xx: fix 5_5_5_1 texture sampler format

This fixes teximage-colors, fbo-generatemipmap-formats, and probably
others (in relation to the RGB5 formats, others still fail).

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index ef4f53fa7e5..a3fb570597f 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -154,7 +154,7 @@ enum a4xx_vtx_fmt {
 
 enum a4xx_tex_fmt {
 	TFMT4_5_6_5_UNORM = 11,
-	TFMT4_5_5_5_1_UNORM = 10,
+	TFMT4_5_5_5_1_UNORM = 9,
 	TFMT4_4_4_4_4_UNORM = 8,
 	TFMT4_X8Z24_UNORM = 71,
 	TFMT4_10_10_10_2_UNORM = 33,

From 006e4f070f08ff1e1731863940bc51de9e97b865 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Mon, 19 Oct 2015 11:57:51 -0400
Subject: [PATCH 126/335] nir: add nir_var_all enum

Otherwise, passing -1 gets you:

  error: invalid conversion from 'int' to 'nir_variable_mode' [-fpermissive]

Signed-off-by: Rob Clark <robdclark@gmail.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir.c                  | 4 ++++
 src/glsl/nir/nir.h                  | 1 +
 src/glsl/nir/nir_lower_io.c         | 2 +-
 src/mesa/drivers/dri/i965/brw_nir.c | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 93c18fbaea5..dba18c96a90 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -107,6 +107,10 @@ void
 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
 {
    switch (var->data.mode) {
+   case nir_var_all:
+      assert(!"invalid mode");
+      break;
+
    case nir_var_local:
       assert(!"nir_shader_add_variable cannot be used for local variables");
       break;
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index e9d722eed7e..09eb712c06a 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -82,6 +82,7 @@ typedef struct {
 } nir_state_slot;
 
 typedef enum {
+   nir_var_all = -1,
    nir_var_shader_in,
    nir_var_shader_out,
    nir_var_global,
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index 00a31458310..5683e69d865 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -197,7 +197,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
 
       nir_variable_mode mode = intrin->variables[0]->var->data.mode;
 
-      if (state->mode != -1 && state->mode != mode)
+      if (state->mode != nir_var_all && state->mode != mode)
          continue;
 
       if (mode != nir_var_shader_in &&
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 7896f29803b..bd91254f5bf 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -290,7 +290,7 @@ brw_create_nir(struct brw_context *brw,
    nir_assign_var_locations(&nir->uniforms,
                             &nir->num_uniforms,
                             is_scalar ? type_size_scalar : type_size_vec4);
-   OPT_V(nir_lower_io, -1, is_scalar ? type_size_scalar : type_size_vec4);
+   OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4);
 
    OPT(nir_remove_dead_variables);
 

From 01e94d8d5d32d0debf35533180bf4633a139dd63 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Mon, 26 Oct 2015 13:29:45 -0400
Subject: [PATCH 127/335] nir/print: show shader name/label if set

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_print.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index f7f5fdf3181..c4160e6b50f 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -977,6 +977,12 @@ nir_print_shader(nir_shader *shader, FILE *fp)
 
    fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage));
 
+   if (shader->info.name)
+      fprintf(fp, "name: %s\n", shader->info.name);
+
+   if (shader->info.label)
+      fprintf(fp, "label: %s\n", shader->info.label);
+
    nir_foreach_variable(var, &shader->uniforms) {
       print_var_decl(var, &state);
    }

From fec9367deb51003ccb74aca796e759502cfedfeb Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Wed, 4 Nov 2015 10:05:32 -0500
Subject: [PATCH 128/335] nir/print: show # of uniforms/inputs/outputs

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/glsl/nir/nir_print.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index c4160e6b50f..54b8cc64a9b 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -983,6 +983,10 @@ nir_print_shader(nir_shader *shader, FILE *fp)
    if (shader->info.label)
       fprintf(fp, "label: %s\n", shader->info.label);
 
+   fprintf(fp, "inputs: %u\n", shader->num_inputs);
+   fprintf(fp, "outputs: %u\n", shader->num_outputs);
+   fprintf(fp, "uniforms: %u\n", shader->num_uniforms);
+
    nir_foreach_variable(var, &shader->uniforms) {
       print_var_decl(var, &state);
    }

From c73f40c4731235153e29222eee8e12241166094a Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Wed, 4 Nov 2015 16:10:52 -0500
Subject: [PATCH 129/335] nir: fix missing increments of num_inputs/num_outputs

Note: not quite perfect, we should use type_size vfunc (in
compiler_options or nir_shader?) to determine how much we
increment num_inputs/outputs/uniforms.  But we don't have
that yet, so let's at least fix things for the existing
users of these passes.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Acked-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_lower_clip.c            | 2 ++
 src/glsl/nir/nir_lower_two_sided_color.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/glsl/nir/nir_lower_clip.c b/src/glsl/nir/nir_lower_clip.c
index 31ccfb2c02b..c58c7785b3f 100644
--- a/src/glsl/nir/nir_lower_clip.c
+++ b/src/glsl/nir/nir_lower_clip.c
@@ -55,9 +55,11 @@ create_clipdist_var(nir_shader *shader, unsigned drvloc,
 
    if (output) {
       exec_list_push_tail(&shader->outputs, &var->node);
+      shader->num_outputs++; /* TODO use type_size() */
    }
    else {
       exec_list_push_tail(&shader->inputs, &var->node);
+      shader->num_inputs++;  /* TODO use type_size() */
    }
    return var;
 }
diff --git a/src/glsl/nir/nir_lower_two_sided_color.c b/src/glsl/nir/nir_lower_two_sided_color.c
index db519bf513b..6995b9d6bc1 100644
--- a/src/glsl/nir/nir_lower_two_sided_color.c
+++ b/src/glsl/nir/nir_lower_two_sided_color.c
@@ -60,6 +60,8 @@ create_input(nir_shader *shader, unsigned drvloc, gl_varying_slot slot)
 
    exec_list_push_tail(&shader->inputs, &var->node);
 
+   shader->num_inputs++;     /* TODO use type_size() */
+
    return var;
 }
 

From acca6c65d3c793885b343aad17cbdbad7fbe1830 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 5 Nov 2015 10:23:48 -0500
Subject: [PATCH 130/335] nir: add nir_ssa_for_alu_src()

Using something like:

   numer = nir_ssa_for_src(bld, alu->src[0].src,
                           nir_ssa_alu_instr_src_components(alu, 0));

for alu src's with swizzle, like:

   vec1 ssa_10 = intrinsic load_uniform () () (0, 0)
   vec2 ssa_11 = intrinsic load_uniform () () (1, 0)
   vec2 ssa_2 = udiv ssa_10.xx, ssa_11

ends up turning into something like:

   vec1 ssa_10 = intrinsic load_uniform () () (0, 0)
   vec2 ssa_11 = intrinsic load_uniform () () (1, 0)
   vec2 ssa_13 = imov ssa_10
   ...

because nir_ssa_for_src() ignore's the original nir_alu_src's swizzle.
Instead for alu instructions, nir_src_for_alu_src() should be used to
ensure the original alu src's swizzle doesn't get lost in translation:

   vec1 ssa_10 = intrinsic load_uniform () () (0, 0)
   vec2 ssa_11 = intrinsic load_uniform () () (1, 0)
   vec2 ssa_13 = imov ssa_10.xx
   ...

v2: check for abs/neg, and re-use existing nir_alu_src

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_builder.h    | 21 +++++++++++++++++++++
 src/glsl/nir/nir_lower_idiv.c |  6 ++----
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index 624329d0a8a..d09f929405b 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -259,6 +259,8 @@ nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
 /**
  * Turns a nir_src into a nir_ssa_def * so it can be passed to
  * nir_build_alu()-based builder calls.
+ *
+ * See nir_ssa_for_alu_src() for alu instructions.
  */
 static inline nir_ssa_def *
 nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
@@ -274,6 +276,25 @@ nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
    return nir_imov_alu(build, alu, num_components);
 }
 
+/**
+ * Similar to nir_ssa_for_src(), but for alu src's, respecting the
+ * nir_alu_src's swizzle.
+ */
+static inline nir_ssa_def *
+nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn)
+{
+   static uint8_t trivial_swizzle[4] = { 0, 1, 2, 3 };
+   nir_alu_src *src = &instr->src[srcn];
+   unsigned num_components = nir_ssa_alu_instr_src_components(instr, srcn);
+
+   if (src->src.is_ssa && (src->src.ssa->num_components == num_components) &&
+       !src->abs && !src->negate &&
+       (memcmp(src->swizzle, trivial_swizzle, num_components) == 0))
+      return src->src.ssa;
+
+   return nir_imov_alu(build, *src, num_components);
+}
+
 static inline nir_ssa_def *
 nir_load_var(nir_builder *build, nir_variable *var)
 {
diff --git a/src/glsl/nir/nir_lower_idiv.c b/src/glsl/nir/nir_lower_idiv.c
index 3580ced0ac0..f64b3eac8a0 100644
--- a/src/glsl/nir/nir_lower_idiv.c
+++ b/src/glsl/nir/nir_lower_idiv.c
@@ -52,10 +52,8 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu)
 
    bld->cursor = nir_before_instr(&alu->instr);
 
-   numer = nir_ssa_for_src(bld, alu->src[0].src,
-                           nir_ssa_alu_instr_src_components(alu, 0));
-   denom = nir_ssa_for_src(bld, alu->src[1].src,
-                           nir_ssa_alu_instr_src_components(alu, 1));
+   numer = nir_ssa_for_alu_src(bld, alu, 0);
+   denom = nir_ssa_for_alu_src(bld, alu, 1);
 
    if (is_signed) {
       af = nir_i2f(bld, numer);

From b1b9f68d4cc29130d0cb6219b02e13c67c0c7f10 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 17 Nov 2015 15:26:27 -0800
Subject: [PATCH 131/335] Import current draft of EXT_shader_samples_identical
 spec

v2: Add Neil to the list of contributors.  I meant to do that before,
but Matt reminded me.

v3: Fix typos noticed by Nicolai.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 docs/specs/EXT_shader_samples_identical.txt | 176 ++++++++++++++++++++
 1 file changed, 176 insertions(+)
 create mode 100644 docs/specs/EXT_shader_samples_identical.txt

diff --git a/docs/specs/EXT_shader_samples_identical.txt b/docs/specs/EXT_shader_samples_identical.txt
new file mode 100644
index 00000000000..a8a901b8bbd
--- /dev/null
+++ b/docs/specs/EXT_shader_samples_identical.txt
@@ -0,0 +1,176 @@
+Name
+
+    EXT_shader_samples_identical
+
+Name Strings
+
+    GL_EXT_shader_samples_identical
+
+Contact
+
+    Ian Romanick, Intel (ian.d.romanick 'at' intel.com)
+
+Contributors
+
+    Chris Forbes, Mesa
+    Magnus Wendt, Intel
+    Neil S. Roberts, Intel
+    Graham Sellers, AMD
+
+Status
+
+    XXX - Not complete yet.
+
+Version
+
+    Last Modified Date: November 19, 2015
+    Revision: 6
+
+Number
+
+    TBD
+
+Dependencies
+
+    OpenGL 3.2, or OpenGL ES 3.1, or ARB_texture_multisample is required.
+
+    This extension is written against the OpenGL 4.5 (Core Profile)
+    Specification
+
+Overview
+
+    Multisampled antialiasing has become a common method for improving the
+    quality of rendered images.  Multisampling differs from supersampling in
+    that the color of a primitive that covers all or part of a pixel is
+    resolved once, regardless of the number of samples covered.  If a large
+    polygon is rendered, the colors of all samples in each interior pixel will
+    be the same.  This suggests a simple compression scheme that can reduce
+    the necessary memory bandwidth requirements.  In one such scheme, each
+    sample is stored in a separate slice of the multisample surface.  An
+    additional multisample control surface (MCS) contains a mapping from pixel
+    samples to slices.
+
+    If all the values stored in the MCS for a particular pixel are the same,
+    then all the samples have the same value.  Applications can take advantage
+    of this information to reduce the bandwidth of reading multisample
+    textures.  A custom multisample resolve filter could optimize resolving
+    pixels where every sample is identical by reading the color once.
+
+    color = texelFetch(sampler, coordinate, 0);
+    if (!textureSamplesIdenticalEXT(sampler, coordinate)) {
+        for (int i = 1; i < MAX_SAMPLES; i++) {
+            vec4 c = texelFetch(sampler, coordinate, i);
+
+            //... accumulate c into color
+
+        }
+    }
+
+New Procedures and Functions
+
+    None.
+
+New Tokens
+
+    None.
+
+Additions to the OpenGL 4.5 (Core Profile) Specification
+
+    None.
+
+Modifications to The OpenGL Shading Language Specification, Version 4.50.5
+
+    Including the following line in a shader can be used to control the
+    language features described in this extension:
+
+        #extension GL_EXT_shader_samples_identical
+
+    A new preprocessor #define is added to the OpenGL Shading Language:
+
+        #define GL_EXT_shader_samples_identical
+
+    Add to the table in section 8.7 "Texture Lookup Functions"
+
+    Syntax:
+
+        bool textureSamplesIdenticalEXT(gsampler2DMS sampler, ivec2 coord)
+
+        bool textureSamplesIdenticalEXT(gsampler2DMSArray sampler,
+                                        ivec3 coord)
+
+    Description:
+
+        Returns true if it can be determined that all samples within the texel
+        of the multisample texture bound to <sampler> at <coord> contain the
+        same values or false if this cannot be determined."
+
+Additions to the AGL/EGL/GLX/WGL Specifications
+
+    None
+
+Errors
+
+    None
+
+New State
+
+    None
+
+New Implementation Dependent State
+
+    None
+
+Issues
+
+    1) What should the new functions be called?
+
+    RESOLVED: textureSamplesIdenticalEXT.  Initially
+    textureAllSamplesIdenticalEXT was considered, but
+    textureSamplesIdenticalEXT is more similar to the existing textureSamples
+    function.
+
+    2) It seems like applications could implement additional optimization if
+       they were provided with raw MCS data.  Should this extension also
+       provide that data?
+
+    There are a number of challenges in providing raw MCS data.  The biggest
+    problem being that the amount of MCS data depends on the number of
+    samples, and that is not known at compile time.  Additionally, without new
+    texelFetch functions, applications would have difficulty utilizing the
+    information.
+
+    Another option is to have a function that returns an array of tuples of
+    sample number and count.  This also has difficulties with the maximum
+    array size not being known at compile time.
+
+    RESOLVED: Do not expose raw MCS data in this extension.
+
+    3) Should this extension also extend SPIR-V?
+
+    RESOLVED: Yes, but this has not yet been written.
+
+    4) Is it possible for textureSamplesIdenticalEXT to report false negatives?
+
+    RESOLVED: Yes.  It is possible that the underlying hardware may not detect
+    that separate writes of the same color to different samples of a pixel are
+    the same.  The shader function is at the whim of the underlying hardware
+    implementation.  It is also possible that a compressed multisample surface
+    is not used.  In that case the function will likely always return false.
+
+Revision History
+
+    Rev  Date        Author    Changes
+    ---  ----------  --------  ---------------------------------------------
+      1  2014/08/20  cforbes   Initial version
+      2  2015/10/23  idr       Change from MESA to EXT.  Rebase on OpenGL 4.5,
+                               and add dependency on OpenGL ES 3.1.  Initial
+                               draft of overview section and issues 1 through
+                               3.
+      3  2015/10/27  idr       Typo fixes.
+      4  2015/11/10  idr       Rename extension from EXT_shader_multisample_compression
+                               to EXT_shader_samples_identical.
+                               Add issue #4.
+      5  2015/11/18  idr       Fix some typos spotted by gsellers.  Change the
+                               name of the name of the function to
+                               textureSamplesIdenticalEXT.
+      6  2015/11/19  idr       Fix more typos spotted by Nicolai Hähnle.

From ff59700d29bb5db0010c3f7508336bf66a03d11f Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 17 Nov 2015 15:32:10 -0800
Subject: [PATCH 132/335] mesa: Extension tracking for
 EXT_shader_samples_indentical

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/main/extensions_table.h | 1 +
 src/mesa/main/mtypes.h           | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 20751688977..7facc7ff3ca 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -212,6 +212,7 @@ EXT(EXT_secondary_color                     , dummy_true
 EXT(EXT_separate_shader_objects             , dummy_true                             ,  x ,  x ,  x , ES2, 2013)
 EXT(EXT_separate_specular_color             , dummy_true                             , GLL,  x ,  x ,  x , 1997)
 EXT(EXT_shader_integer_mix                  , EXT_shader_integer_mix                 , GLL, GLC,  x ,  30, 2013)
+EXT(EXT_shader_samples_identical            , EXT_shader_samples_identical           , GLL, GLC,  x ,  31, 2015)
 EXT(EXT_shadow_funcs                        , ARB_shadow                             , GLL,  x ,  x ,  x , 2002)
 EXT(EXT_stencil_two_side                    , EXT_stencil_two_side                   , GLL,  x ,  x ,  x , 2001)
 EXT(EXT_stencil_wrap                        , dummy_true                             , GLL,  x ,  x ,  x , 2002)
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 4efdf1ee8c1..65276f9c56b 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3761,6 +3761,7 @@ struct gl_extensions
    GLboolean EXT_polygon_offset_clamp;
    GLboolean EXT_provoking_vertex;
    GLboolean EXT_shader_integer_mix;
+   GLboolean EXT_shader_samples_identical;
    GLboolean EXT_stencil_two_side;
    GLboolean EXT_texture_array;
    GLboolean EXT_texture_compression_latc;

From ef54434c52385bea59d47dbfcedc6845f33fb231 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 17 Nov 2015 15:36:15 -0800
Subject: [PATCH 133/335] glsl: Extension tracking for
 EXT_shader_samples_indentical

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/glsl/glcpp/glcpp-parse.y    | 3 +++
 src/glsl/glsl_parser_extras.cpp | 1 +
 src/glsl/glsl_parser_extras.h   | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index ab5ec8450b9..2fd4cf04079 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2512,6 +2512,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 	if (extensions != NULL) {
 	   if (extensions->EXT_shader_integer_mix)
 	      add_builtin_define(parser, "GL_EXT_shader_integer_mix", 1);
+
+	   if (extensions->EXT_shader_samples_identical)
+	      add_builtin_define(parser, "GL_EXT_shader_samples_identical", 1);
 	}
 
 	if (version >= 150)
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 8fb05fae4c1..c54dcfdab7b 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -650,6 +650,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
    EXT(EXT_draw_buffers,               false,  true,     dummy_true),
    EXT(EXT_separate_shader_objects,    false, true,      dummy_true),
    EXT(EXT_shader_integer_mix,         true,  true,      EXT_shader_integer_mix),
+   EXT(EXT_shader_samples_identical,   true,  true,      EXT_shader_samples_identical),
    EXT(EXT_texture_array,              true,  false,     EXT_texture_array),
 };
 
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 17f8490b8da..17ff0b5af79 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -606,6 +606,8 @@ struct _mesa_glsl_parse_state {
    bool EXT_separate_shader_objects_warn;
    bool EXT_shader_integer_mix_enable;
    bool EXT_shader_integer_mix_warn;
+   bool EXT_shader_samples_identical_enable;
+   bool EXT_shader_samples_identical_warn;
    bool EXT_texture_array_enable;
    bool EXT_texture_array_warn;
    /*@}*/

From 8343583557fbbf0e04ce8e834a71f00b219232ce Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 17 Nov 2015 16:54:31 -0800
Subject: [PATCH 134/335] glsl: Add ir_samples_identical opcode

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/glsl/ir.cpp                            |  6 +++++-
 src/glsl/ir.h                              |  2 ++
 src/glsl/ir_clone.cpp                      |  1 +
 src/glsl/ir_equals.cpp                     |  1 +
 src/glsl/ir_hv_accept.cpp                  |  1 +
 src/glsl/ir_print_visitor.cpp              | 10 ++++++++++
 src/glsl/ir_rvalue_visitor.cpp             |  1 +
 src/glsl/opt_tree_grafting.cpp             |  1 +
 src/mesa/program/ir_to_mesa.cpp            |  2 ++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  2 ++
 10 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 80cbdbf613f..f47100ee40f 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1421,7 +1421,7 @@ ir_dereference::is_lvalue() const
 }
 
 
-static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples" };
+static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples", "samples_identical" };
 
 const char *ir_texture::opcode_string()
 {
@@ -1455,6 +1455,10 @@ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type)
    } else if (this->op == ir_lod) {
       assert(type->vector_elements == 2);
       assert(type->base_type == GLSL_TYPE_FLOAT);
+   } else if (this->op == ir_samples_identical) {
+      assert(type == glsl_type::bool_type);
+      assert(sampler->type->base_type == GLSL_TYPE_SAMPLER);
+      assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS);
    } else {
       assert(sampler->type->sampler_type == (int) type->base_type);
       if (sampler->type->sampler_shadow)
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index d59dee1e369..39156e0e98c 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1965,6 +1965,7 @@ enum ir_texture_opcode {
    ir_tg4,		/**< Texture gather */
    ir_query_levels,     /**< Texture levels query */
    ir_texture_samples,  /**< Texture samples query */
+   ir_samples_identical, /**< Query whether all samples are definitely identical. */
 };
 
 
@@ -1991,6 +1992,7 @@ enum ir_texture_opcode {
  * (lod <type> <sampler> <coordinate>)
  * (tg4 <type> <sampler> <coordinate> <offset> <component>)
  * (query_levels <type> <sampler>)
+ * (samples_identical <sampler> <coordinate>)
  */
 class ir_texture : public ir_rvalue {
 public:
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp
index a2cd672d5d6..4484cc9d8a7 100644
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -223,6 +223,7 @@ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const
    case ir_lod:
    case ir_query_levels:
    case ir_texture_samples:
+   case ir_samples_identical:
       break;
    case ir_txb:
       new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht);
diff --git a/src/glsl/ir_equals.cpp b/src/glsl/ir_equals.cpp
index aafcd1f0dae..b86f4ea16bb 100644
--- a/src/glsl/ir_equals.cpp
+++ b/src/glsl/ir_equals.cpp
@@ -157,6 +157,7 @@ ir_texture::equals(const ir_instruction *ir, enum ir_node_type ignore) const
    case ir_lod:
    case ir_query_levels:
    case ir_texture_samples:
+   case ir_samples_identical:
       break;
    case ir_txb:
       if (!lod_info.bias->equals(other->lod_info.bias, ignore))
diff --git a/src/glsl/ir_hv_accept.cpp b/src/glsl/ir_hv_accept.cpp
index 6495cc4581d..213992af28c 100644
--- a/src/glsl/ir_hv_accept.cpp
+++ b/src/glsl/ir_hv_accept.cpp
@@ -195,6 +195,7 @@ ir_texture::accept(ir_hierarchical_visitor *v)
    case ir_lod:
    case ir_query_levels:
    case ir_texture_samples:
+   case ir_samples_identical:
       break;
    case ir_txb:
       s = this->lod_info.bias->accept(v);
diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp
index 42b03fdea52..fd7bc2eea98 100644
--- a/src/glsl/ir_print_visitor.cpp
+++ b/src/glsl/ir_print_visitor.cpp
@@ -268,6 +268,14 @@ void ir_print_visitor::visit(ir_texture *ir)
 {
    fprintf(f, "(%s ", ir->opcode_string());
 
+   if (ir->op == ir_samples_identical) {
+      ir->sampler->accept(this);
+      fprintf(f, " ");
+      ir->coordinate->accept(this);
+      fprintf(f, ")");
+      return;
+   }
+
    print_type(f, ir->type);
    fprintf(f, " ");
 
@@ -334,6 +342,8 @@ void ir_print_visitor::visit(ir_texture *ir)
    case ir_tg4:
       ir->lod_info.component->accept(this);
       break;
+   case ir_samples_identical:
+      unreachable(!"ir_samples_identical was already handled");
    };
    fprintf(f, ")");
 }
diff --git a/src/glsl/ir_rvalue_visitor.cpp b/src/glsl/ir_rvalue_visitor.cpp
index a6966f546bc..6486838b8b8 100644
--- a/src/glsl/ir_rvalue_visitor.cpp
+++ b/src/glsl/ir_rvalue_visitor.cpp
@@ -59,6 +59,7 @@ ir_rvalue_base_visitor::rvalue_visit(ir_texture *ir)
    case ir_lod:
    case ir_query_levels:
    case ir_texture_samples:
+   case ir_samples_identical:
       break;
    case ir_txb:
       handle_rvalue(&ir->lod_info.bias);
diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp
index e38a0e93058..cd58213c019 100644
--- a/src/glsl/opt_tree_grafting.cpp
+++ b/src/glsl/opt_tree_grafting.cpp
@@ -275,6 +275,7 @@ ir_tree_grafting_visitor::visit_enter(ir_texture *ir)
    case ir_lod:
    case ir_query_levels:
    case ir_texture_samples:
+   case ir_samples_identical:
       break;
    case ir_txb:
       if (do_graft(&ir->lod_info.bias))
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 61a5064d074..a0a42daaac5 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1919,6 +1919,8 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
    case ir_query_levels:
       assert(!"Unexpected ir_query_levels opcode");
       break;
+   case ir_samples_identical:
+      unreachable("Unexpected ir_samples_identical opcode");
    case ir_texture_samples:
       unreachable("Unexpected ir_texture_samples opcode");
    }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 3ad1afdecda..40c77258de7 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3236,6 +3236,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
    case ir_texture_samples:
       opcode = TGSI_OPCODE_TXQS;
       break;
+   case ir_samples_identical:
+      unreachable("Unexpected ir_samples_identical opcode");
    }
 
    if (ir->projector) {

From 06c56f443aa1850b2651df3636c42a8740cff847 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 17 Nov 2015 16:59:40 -0800
Subject: [PATCH 135/335] glsl: Add textureSamplesIdenticalEXT built-in
 functions

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/glsl/builtin_functions.cpp | 44 ++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 1824b831f28..881ee2b6b55 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -289,6 +289,20 @@ texture_multisample_array(const _mesa_glsl_parse_state *state)
           state->OES_texture_storage_multisample_2d_array_enable;
 }
 
+static bool
+texture_samples_identical(const _mesa_glsl_parse_state *state)
+{
+   return texture_multisample(state) &&
+          state->EXT_shader_samples_identical_enable;
+}
+
+static bool
+texture_samples_identical_array(const _mesa_glsl_parse_state *state)
+{
+   return texture_multisample_array(state) &&
+          state->EXT_shader_samples_identical_enable;
+}
+
 static bool
 fs_texture_cube_map_array(const _mesa_glsl_parse_state *state)
 {
@@ -724,6 +738,7 @@ private:
 
    BA2(textureQueryLod);
    B1(textureQueryLevels);
+   BA2(textureSamplesIdentical);
    B1(dFdx);
    B1(dFdy);
    B1(fwidth);
@@ -2210,6 +2225,16 @@ builtin_builder::create_builtins()
 
                 NULL);
 
+   add_function("textureSamplesIdenticalEXT",
+                _textureSamplesIdentical(texture_samples_identical, glsl_type::sampler2DMS_type,  glsl_type::ivec2_type),
+                _textureSamplesIdentical(texture_samples_identical, glsl_type::isampler2DMS_type, glsl_type::ivec2_type),
+                _textureSamplesIdentical(texture_samples_identical, glsl_type::usampler2DMS_type, glsl_type::ivec2_type),
+
+                _textureSamplesIdentical(texture_samples_identical_array, glsl_type::sampler2DMSArray_type,  glsl_type::ivec3_type),
+                _textureSamplesIdentical(texture_samples_identical_array, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type),
+                _textureSamplesIdentical(texture_samples_identical_array, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type),
+                NULL);
+
    add_function("texture1D",
                 _texture(ir_tex, v110,         glsl_type::vec4_type,  glsl_type::sampler1D_type, glsl_type::float_type),
                 _texture(ir_txb, v110_fs_only, glsl_type::vec4_type,  glsl_type::sampler1D_type, glsl_type::float_type),
@@ -4684,6 +4709,25 @@ builtin_builder::_textureQueryLevels(const glsl_type *sampler_type)
    return sig;
 }
 
+ir_function_signature *
+builtin_builder::_textureSamplesIdentical(builtin_available_predicate avail,
+                                          const glsl_type *sampler_type,
+                                          const glsl_type *coord_type)
+{
+   ir_variable *s = in_var(sampler_type, "sampler");
+   ir_variable *P = in_var(coord_type, "P");
+   const glsl_type *return_type = glsl_type::bool_type;
+   MAKE_SIG(return_type, avail, 2, s, P);
+
+   ir_texture *tex = new(mem_ctx) ir_texture(ir_samples_identical);
+   tex->coordinate = var_ref(P);
+   tex->set_sampler(var_ref(s), return_type);
+
+   body.emit(ret(tex));
+
+   return sig;
+}
+
 UNOP(dFdx, ir_unop_dFdx, fs_oes_derivatives)
 UNOP(dFdxCoarse, ir_unop_dFdx_coarse, fs_derivative_control)
 UNOP(dFdxFine, ir_unop_dFdx_fine, fs_derivative_control)

From 457bb290efc162ea3c7c51a820ab7cf88a4efb8d Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 17 Nov 2015 17:09:09 -0800
Subject: [PATCH 136/335] nir: Add nir_texop_samples_identical opcode

This is the NIR analog to GLSL IR ir_samples_identical.

v2: Don't add the second nir_tex_src_ms_index parameter.  Suggested by
Ken and Jason.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 3 +++
 src/glsl/nir/glsl_to_nir.cpp                         | 6 ++++++
 src/glsl/nir/nir.h                                   | 4 ++++
 src/glsl/nir/nir_print.c                             | 4 +++-
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp             | 1 +
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp           | 1 +
 6 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 157dc73a3c6..0f5c7e901ba 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1624,6 +1624,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
 	case nir_texop_tg4:
 	case nir_texop_query_levels:
 	case nir_texop_texture_samples:
+	case nir_texop_samples_identical:
 		compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op);
 		return;
 	}
@@ -1889,6 +1890,8 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr)
 		case nir_texop_query_levels:
 			emit_tex_query_levels(ctx, tex);
 			break;
+		case nir_texop_samples_identical:
+			unreachable("nir_texop_samples_identical");
 		default:
 			emit_tex(ctx, tex);
 			break;
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index e149d73e051..18ef4909049 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -1798,6 +1798,11 @@ nir_visitor::visit(ir_texture *ir)
       num_srcs = 0;
       break;
 
+   case ir_samples_identical:
+      op = nir_texop_samples_identical;
+      num_srcs = 1; /* coordinate */
+      break;
+
    default:
       unreachable("not reached");
    }
@@ -1825,6 +1830,7 @@ nir_visitor::visit(ir_texture *ir)
    case GLSL_TYPE_INT:
       instr->dest_type = nir_type_int;
       break;
+   case GLSL_TYPE_BOOL:
    case GLSL_TYPE_UINT:
       instr->dest_type = nir_type_unsigned;
       break;
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 09eb712c06a..087b4537c09 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -956,6 +956,9 @@ typedef enum {
    nir_texop_tg4,                /**< Texture gather */
    nir_texop_query_levels,       /**< Texture levels query */
    nir_texop_texture_samples,    /**< Texture samples query */
+   nir_texop_samples_identical,  /**< Query whether all samples are definitely
+                                  * identical.
+                                  */
 } nir_texop;
 
 typedef struct {
@@ -1029,6 +1032,7 @@ nir_tex_instr_dest_size(nir_tex_instr *instr)
 
    case nir_texop_texture_samples:
    case nir_texop_query_levels:
+   case nir_texop_samples_identical:
       return 1;
 
    default:
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index 54b8cc64a9b..c98a0476ef9 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -512,7 +512,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
    case nir_texop_texture_samples:
       fprintf(fp, "texture_samples ");
       break;
-
+   case nir_texop_samples_identical:
+      fprintf(fp, "samples_identical ");
+      break;
    default:
       unreachable("Invalid texture operation");
       break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 3394e4a7567..8fc7ee42d6b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -2733,6 +2733,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
       inst->base_mrf = -1;
       return;
    }
+   case nir_texop_samples_identical: op = ir_samples_identical; break;
    default:
       unreachable("unknown texture opcode");
    }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 3d186b49d4e..8088767bce6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1545,6 +1545,7 @@ ir_texture_opcode_for_nir_texop(nir_texop texop)
    case nir_texop_txf_ms: op = ir_txf_ms; break;
    case nir_texop_txl: op = ir_txl; break;
    case nir_texop_txs: op = ir_txs; break;
+   case nir_texop_samples_identical: op = ir_samples_identical; break;
    default:
       unreachable("unknown texture opcode");
    }

From 84b6c64efc52948da8db89b8d92d5e744e6cfc95 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 17 Nov 2015 19:31:39 -0800
Subject: [PATCH 137/335] i965/vec4: Handle nir_tex_src_ms_index more like the
 scalar

v2: Rebase on top of f9a9ba5e.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 8088767bce6..63696a7ad5b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1654,14 +1654,6 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
 
       case nir_tex_src_ms_index: {
          sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1);
-         assert(coord_type != NULL);
-         if (devinfo->gen >= 7 &&
-             key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
-            mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg);
-         } else {
-            mcs = brw_imm_ud(0u);
-         }
-         mcs = retype(mcs, BRW_REGISTER_TYPE_UD);
          break;
       }
 
@@ -1703,6 +1695,16 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
       }
    }
 
+   if (instr->op == nir_texop_txf_ms) {
+      assert(coord_type != NULL);
+      if (devinfo->gen >= 7 &&
+          key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
+         mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg);
+      } else {
+         mcs = brw_imm_ud(0u);
+      }
+   }
+
    uint32_t constant_offset = 0;
    for (unsigned i = 0; i < 3; i++) {
       if (instr->const_offset[i] != 0) {

From 99840eb983f74cd447546f7205c8c9f505ef82c8 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 17 Nov 2015 17:57:08 -0800
Subject: [PATCH 138/335] i965: Enable EXT_shader_samples_identical

On the vec4 backend, textureSamplesIdentical() will always return
false.  There are currently no test cases for the vec4 backend, so we
don't have much confidence in any implementation.  We also don't think
anyone is likely to miss it.

v2: Handle immediate value for MCS smarter.  Rebase on changes to
nir_texop_sampels_identical (missing second parameter).  Suggested by
Jason.

v3: Add Neil's code to handle 16x MSAA in the FS.  Also rebase on top of
f9a9ba5e.  Stub out the vec4 implementation.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Signed-off-by: Neil Roberts <neil@linux.intel.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com> [v2]
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz> [v2]
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp      |  4 +++-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp  | 20 +++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp    |  4 +++-
 .../drivers/dri/i965/brw_vec4_visitor.cpp     |  7 +++++++
 src/mesa/drivers/dri/i965/intel_extensions.c  |  1 +
 5 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 8fc7ee42d6b..b6e5ed6a72b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -2625,6 +2625,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
          switch (instr->op) {
          case nir_texop_txf:
          case nir_texop_txf_ms:
+         case nir_texop_samples_identical:
             coordinate = retype(src, BRW_REGISTER_TYPE_D);
             break;
          default:
@@ -2687,7 +2688,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
       }
    }
 
-   if (instr->op == nir_texop_txf_ms) {
+   if (instr->op == nir_texop_txf_ms ||
+       instr->op == nir_texop_samples_identical) {
       if (devinfo->gen >= 7 &&
           key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
          mcs = emit_mcs_fetch(coordinate, instr->coord_components, sampler_reg);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index d97fcf33b62..2e04134318e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -259,6 +259,26 @@ fs_visitor::emit_texture(ir_texture_opcode op,
       lod = brw_imm_ud(0u);
    }
 
+   if (op == ir_samples_identical) {
+      fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 1, 1));
+
+      /* If mcs is an immediate value, it means there is no MCS.  In that case
+       * just return false.
+       */
+      if (mcs.file == BRW_IMMEDIATE_VALUE) {
+         bld.MOV(dst, brw_imm_ud(0u));
+      } else if ((key_tex->msaa_16 & (1 << sampler))) {
+         fs_reg tmp = vgrf(glsl_type::uint_type);
+         bld.OR(tmp, mcs, offset(mcs, bld, 1));
+         bld.CMP(dst, tmp, brw_imm_ud(0u), BRW_CONDITIONAL_EQ);
+      } else {
+         bld.CMP(dst, mcs, brw_imm_ud(0u), BRW_CONDITIONAL_EQ);
+      }
+
+      this->result = dst;
+      return;
+   }
+
    if (coordinate.file != BAD_FILE) {
       /* FINISHME: Texture coordinate rescaling doesn't work with non-constant
        * samplers.  This should only be a problem with GL_CLAMP on Gen7.
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 63696a7ad5b..28656d45b33 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1615,6 +1615,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
          switch (instr->op) {
          case nir_texop_txf:
          case nir_texop_txf_ms:
+         case nir_texop_samples_identical:
             coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D,
                                      src_size);
             coord_type = glsl_type::ivec(src_size);
@@ -1695,7 +1696,8 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
       }
    }
 
-   if (instr->op == nir_texop_txf_ms) {
+   if (instr->op == nir_texop_txf_ms ||
+       instr->op == nir_texop_samples_identical) {
       assert(coord_type != NULL);
       if (devinfo->gen >= 7 &&
           key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 08a1f8bb77c..2e4695a2845 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -909,6 +909,13 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
       unreachable("TXB is not valid for vertex shaders.");
    case ir_lod:
       unreachable("LOD is not valid for vertex shaders.");
+   case ir_samples_identical: {
+      /* There are some challenges implementing this for vec4, and it seems
+       * unlikely to be used anyway.  For now, just return false ways.
+       */
+      emit(MOV(dest, brw_imm_ud(0u)));
+      return;
+   }
    default:
       unreachable("Unrecognized tex op");
    }
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 386b63c123d..2e2459c125b 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -333,6 +333,7 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.ARB_texture_compression_bptc = true;
       ctx->Extensions.ARB_texture_view = true;
       ctx->Extensions.ARB_shader_storage_buffer_object = true;
+      ctx->Extensions.EXT_shader_samples_identical = true;
 
       if (can_do_pipelined_register_writes(brw)) {
          ctx->Extensions.ARB_draw_indirect = true;

From 5a79e0a8e37ea10cfa6fc39f4fbe315f9620fe52 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Thu, 19 Nov 2015 15:31:06 +0000
Subject: [PATCH 139/335] automake: loader: rework the CPPFLAGS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rather than duplicating things, just use the generic AM_CPPFLAGS. This
has the fortunate side-effect of adding VISIBILITY_CFLAGS for the dri3
helper. The latter of which was erroneously exposing some internal
symbols.

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Reported-by: Kai Wasserbäch <kai@dev.carbon-project.org>
Tested-by: Kai Wasserbäch <kai@dev.carbon-project.org>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/loader/Makefile.am | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/src/loader/Makefile.am b/src/loader/Makefile.am
index c0f79475361..67ed776b0c3 100644
--- a/src/loader/Makefile.am
+++ b/src/loader/Makefile.am
@@ -25,18 +25,20 @@ EXTRA_DIST = SConscript
 
 noinst_LTLIBRARIES = libloader.la libloader_dri3_helper.la
 
-libloader_la_CPPFLAGS = \
+AM_CPPFLAGS = \
 	$(DEFINES) \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src \
 	$(VISIBILITY_CFLAGS) \
+	$(LIBDRM_CFLAGS) \
 	$(LIBUDEV_CFLAGS)
 
 libloader_la_SOURCES = $(LOADER_C_FILES)
 libloader_la_LIBADD =
 
 if HAVE_DRICOMMON
-libloader_la_CPPFLAGS += \
+libloader_la_CPPFLAGS = \
+	$(AM_CPPFLAGS) \
 	-I$(top_srcdir)/src/mesa/drivers/dri/common/ \
 	-I$(top_builddir)/src/mesa/drivers/dri/common/ \
 	-I$(top_srcdir)/src/mesa/ \
@@ -49,20 +51,11 @@ libloader_la_CPPFLAGS += \
 endif
 
 if HAVE_LIBDRM
-libloader_la_CPPFLAGS += \
-	$(LIBDRM_CFLAGS)
-
 libloader_la_LIBADD += \
 	$(LIBDRM_LIBS)
 endif
 
 if HAVE_DRI3
-libloader_dri3_helper_la_CPPFLAGS = \
-	$(DEFINES) \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/src \
-	$(LIBDRM_CFLAGS)
-
 libloader_dri3_helper_la_SOURCES = \
 	loader_dri3_helper.c \
 	loader_dri3_helper.h

From 166314dd8829c80f1df5a3b7070b6c86cccf1b9e Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Thu, 19 Nov 2015 15:50:50 +0000
Subject: [PATCH 140/335] automake: egl: add symbols test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Should help us catch issues where we expose any extra symbols by
mistake. Just like the ones fixes with previous commit.

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Tested-by: Kai Wasserbäch <kai@dev.carbon-project.org>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Acked-by: Matt Turner <mattst88@gmail.com>
---
 src/egl/Makefile.am       |  3 +++
 src/egl/egl-symbols-check | 55 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+)
 create mode 100755 src/egl/egl-symbols-check

diff --git a/src/egl/Makefile.am b/src/egl/Makefile.am
index 88fe13acbd4..0b463c8deb0 100644
--- a/src/egl/Makefile.am
+++ b/src/egl/Makefile.am
@@ -119,7 +119,10 @@ egl_HEADERS = \
 	$(top_srcdir)/include/EGL/eglmesaext.h \
 	$(top_srcdir)/include/EGL/eglplatform.h
 
+TESTS = egl-symbols-check
+
 EXTRA_DIST = \
+	egl-symbols-check \
 	SConscript \
 	drivers/haiku \
 	docs \
diff --git a/src/egl/egl-symbols-check b/src/egl/egl-symbols-check
new file mode 100755
index 00000000000..5d46fed57c9
--- /dev/null
+++ b/src/egl/egl-symbols-check
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+FUNCS=$(nm -D --defined-only ${1-.libs/libEGL.so} | grep -o "T .*" | cut -c 3- | while read func; do
+( grep -q "^$func$" || echo $func )  <<EOF
+eglBindAPI
+eglBindTexImage
+eglChooseConfig
+eglClientWaitSync
+eglCopyBuffers
+eglCreateContext
+eglCreateImage
+eglCreatePbufferFromClientBuffer
+eglCreatePbufferSurface
+eglCreatePixmapSurface
+eglCreatePlatformPixmapSurface
+eglCreatePlatformWindowSurface
+eglCreateSync
+eglCreateWindowSurface
+eglDestroyContext
+eglDestroyImage
+eglDestroySurface
+eglDestroySync
+eglGetConfigAttrib
+eglGetConfigs
+eglGetCurrentContext
+eglGetCurrentDisplay
+eglGetCurrentSurface
+eglGetDisplay
+eglGetError
+eglGetPlatformDisplay
+eglGetProcAddress
+eglGetSyncAttrib
+eglInitialize
+eglMakeCurrent
+eglQueryAPI
+eglQueryContext
+eglQueryString
+eglQuerySurface
+eglReleaseTexImage
+eglReleaseThread
+eglSurfaceAttrib
+eglSwapBuffers
+eglSwapInterval
+eglTerminate
+eglWaitClient
+eglWaitGL
+eglWaitNative
+eglWaitSync
+_fini
+_init
+EOF
+done)
+
+test ! -n "$FUNCS" || echo $FUNCS
+test ! -n "$FUNCS"

From 115f1798525899c23249b09aa63113f84328186d Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Thu, 19 Nov 2015 15:34:20 +0000
Subject: [PATCH 141/335] automake: loader: honour the XCB_DRI3 cflags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Without this the compilation will fail, as the headers are installed in
a non-default location.

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Tested-by: Kai Wasserbäch <kai@dev.carbon-project.org>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/loader/Makefile.am | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/loader/Makefile.am b/src/loader/Makefile.am
index 67ed776b0c3..5daa42edd41 100644
--- a/src/loader/Makefile.am
+++ b/src/loader/Makefile.am
@@ -30,6 +30,7 @@ AM_CPPFLAGS = \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src \
 	$(VISIBILITY_CFLAGS) \
+	$(XCB_DRI3_CFLAGS) \
 	$(LIBDRM_CFLAGS) \
 	$(LIBUDEV_CFLAGS)
 

From 7157085140a502a182ff0e23d30bd8e421212c42 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Thu, 19 Nov 2015 15:36:03 +0000
Subject: [PATCH 142/335] automake: loader: don't create an empty dri3 helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Seems that creating an empty one does not fair too well with MacOSX's
ar. Considering that all the users of the helper include it only when
needed, let's reshuffle the makefile.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92985
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Tested-by: Vinson Lee <vlee@freedesktop.org>
Tested-by: Kai Wasserbäch <kai@dev.carbon-project.org>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/loader/Makefile.am | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/loader/Makefile.am b/src/loader/Makefile.am
index 5daa42edd41..5021120c96d 100644
--- a/src/loader/Makefile.am
+++ b/src/loader/Makefile.am
@@ -23,7 +23,7 @@ include Makefile.sources
 
 EXTRA_DIST = SConscript
 
-noinst_LTLIBRARIES = libloader.la libloader_dri3_helper.la
+noinst_LTLIBRARIES = libloader.la
 
 AM_CPPFLAGS = \
 	$(DEFINES) \
@@ -57,6 +57,8 @@ libloader_la_LIBADD += \
 endif
 
 if HAVE_DRI3
+noinst_LTLIBRARIES += libloader_dri3_helper.la
+
 libloader_dri3_helper_la_SOURCES = \
 	loader_dri3_helper.c \
 	loader_dri3_helper.h

From a8f45e0161c13fbda6f3b846219a3eb1d44b25de Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Mon, 9 Nov 2015 11:03:01 +0000
Subject: [PATCH 143/335] st/omx: straighten get/put_screen

The current code is busted in a number of ways.

 - initially checks for omx_display (rather than omx_screen), which may
or may not be around.
 - blindly feeds the empty env variable string to loader_open_device()
 - reads the env variable every time get_screen is called
 - the latter manifests into memory leaks, and other issues as one sets
the variable between two get_screen calls.

Additionally it cleans up a couple of extra bits
 - drops unneeded set/check of omx_display.
 - make the teardown (put_screen) order was not symmetrical to the setup
(get_screen)

v2: Drop the "is empty string" check (Leo)

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
---
 src/gallium/state_trackers/omx/entrypoint.c | 32 ++++++++++-----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/gallium/state_trackers/omx/entrypoint.c b/src/gallium/state_trackers/omx/entrypoint.c
index 7df90b16a84..4716333015b 100644
--- a/src/gallium/state_trackers/omx/entrypoint.c
+++ b/src/gallium/state_trackers/omx/entrypoint.c
@@ -33,6 +33,7 @@
 
 #include <assert.h>
 #include <string.h>
+#include <stdbool.h>
 
 #include <X11/Xlib.h>
 
@@ -73,28 +74,29 @@ int omx_component_library_Setup(stLoaderComponentType **stComponents)
 
 struct vl_screen *omx_get_screen(void)
 {
+   static bool first_time = true;
    pipe_mutex_lock(omx_lock);
 
-   if (!omx_display) {
-      omx_render_node = debug_get_option("OMX_RENDER_NODE", NULL);
-      if (!omx_render_node) {
-         omx_display = XOpenDisplay(NULL);
-         if (!omx_display)
-            goto error;
-      }
-   }
-
    if (!omx_screen) {
+      if (first_time) {
+         omx_render_node = debug_get_option("OMX_RENDER_NODE", NULL);
+         first_time = false;
+      }
       if (omx_render_node) {
          drm_fd = loader_open_device(omx_render_node);
          if (drm_fd < 0)
             goto error;
+
          omx_screen = vl_drm_screen_create(drm_fd);
          if (!omx_screen) {
             close(drm_fd);
             goto error;
          }
       } else {
+         omx_display = XOpenDisplay(NULL);
+         if (!omx_display)
+            goto error;
+
          omx_screen = vl_screen_create(omx_display, 0);
          if (!omx_screen) {
             XCloseDisplay(omx_display);
@@ -117,16 +119,14 @@ void omx_put_screen(void)
 {
    pipe_mutex_lock(omx_lock);
    if ((--omx_usecount) == 0) {
-      if (!omx_render_node) {
-         vl_screen_destroy(omx_screen);
-         if (omx_display)
-            XCloseDisplay(omx_display);
-      } else {
-         close(drm_fd);
+      if (omx_render_node) {
          vl_drm_screen_destroy(omx_screen);
+         close(drm_fd);
+      } else {
+         vl_screen_destroy(omx_screen);
+         XCloseDisplay(omx_display);
       }
       omx_screen = NULL;
-      omx_display = NULL;
    }
    pipe_mutex_unlock(omx_lock);
 }

From c31218cdb342482064345d78ada691521c367446 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Mon, 9 Nov 2015 11:14:56 +0000
Subject: [PATCH 144/335] st/va: trivial cleanup

Drop the temporary variable and fold the two conditional.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/state_trackers/va/context.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c
index 98c4104da48..04ca2f2abca 100644
--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -102,7 +102,6 @@ PUBLIC VAStatus
 VA_DRIVER_INIT_FUNC(VADriverContextP ctx)
 {
    vlVaDriver *drv;
-   int drm_fd;
    struct drm_state *drm_info;
 
    if (!ctx)
@@ -126,19 +125,13 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx)
    case VA_DISPLAY_DRM:
    case VA_DISPLAY_DRM_RENDERNODES: {
       drm_info = (struct drm_state *) ctx->drm_state;
-      if (!drm_info) {
+
+      if (!drm_info || drm_info->fd < 0) {
          FREE(drv);
          return VA_STATUS_ERROR_INVALID_PARAMETER;
       }
 
-      drm_fd = drm_info->fd;
-
-      if (drm_fd < 0) {
-         FREE(drv);
-         return VA_STATUS_ERROR_INVALID_PARAMETER;
-      }
-
-      drv->vscreen = vl_drm_screen_create(drm_fd);
+      drv->vscreen = vl_drm_screen_create(drm_info->fd);
       if (!drv->vscreen)
          goto error_screen;
       }

From 2bd9116b8249fdb1711335711f8daef5f68752a6 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Mon, 9 Nov 2015 11:17:07 +0000
Subject: [PATCH 145/335] auxiliary/vl: rename vl_screen_create to
 vl_dri2_screen_create

In a preparation of having proper multi-platform/backend handling in VL.

With follow up commits we'll introduce a dispatch within vl_screen
similar to the one in pipe_screen. This way any VL state-tracker can
operate seamlessly, considering the backend/platform is properly setup.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/auxiliary/vl/vl_winsys.h        | 6 +++---
 src/gallium/auxiliary/vl/vl_winsys_dri.c    | 2 +-
 src/gallium/state_trackers/omx/entrypoint.c | 2 +-
 src/gallium/state_trackers/va/context.c     | 2 +-
 src/gallium/state_trackers/vdpau/device.c   | 2 +-
 src/gallium/state_trackers/xvmc/context.c   | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_winsys.h b/src/gallium/auxiliary/vl/vl_winsys.h
index df01917466f..9aa65ad3200 100644
--- a/src/gallium/auxiliary/vl/vl_winsys.h
+++ b/src/gallium/auxiliary/vl/vl_winsys.h
@@ -46,9 +46,6 @@ struct vl_screen
    struct pipe_loader_device *dev;
 };
 
-struct vl_screen*
-vl_screen_create(Display *display, int screen);
-
 void vl_screen_destroy(struct vl_screen *vscreen);
 
 struct pipe_resource*
@@ -67,6 +64,9 @@ void*
 vl_screen_get_private(struct vl_screen *vscreen);
 
 struct vl_screen*
+vl_dri2_screen_create(Display *display, int screen);
+
+struct vl_screen *
 vl_drm_screen_create(int fd);
 
 void
diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri.c b/src/gallium/auxiliary/vl/vl_winsys_dri.c
index 3b1b87f9523..46f581601af 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c
@@ -306,7 +306,7 @@ get_xcb_screen(xcb_screen_iterator_t iter, int screen)
 }
 
 struct vl_screen*
-vl_screen_create(Display *display, int screen)
+vl_dri2_screen_create(Display *display, int screen)
 {
    struct vl_dri_screen *scrn;
    const xcb_query_extension_reply_t *extension;
diff --git a/src/gallium/state_trackers/omx/entrypoint.c b/src/gallium/state_trackers/omx/entrypoint.c
index 4716333015b..dd72b296de2 100644
--- a/src/gallium/state_trackers/omx/entrypoint.c
+++ b/src/gallium/state_trackers/omx/entrypoint.c
@@ -97,7 +97,7 @@ struct vl_screen *omx_get_screen(void)
          if (!omx_display)
             goto error;
 
-         omx_screen = vl_screen_create(omx_display, 0);
+         omx_screen = vl_dri2_screen_create(omx_display, 0);
          if (!omx_screen) {
             XCloseDisplay(omx_display);
             goto error;
diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c
index 04ca2f2abca..0709dfbef81 100644
--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -118,7 +118,7 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx)
       return VA_STATUS_ERROR_UNIMPLEMENTED;
    case VA_DISPLAY_GLX:
    case VA_DISPLAY_X11:
-      drv->vscreen = vl_screen_create(ctx->native_dpy, ctx->x11_screen);
+      drv->vscreen = vl_dri2_screen_create(ctx->native_dpy, ctx->x11_screen);
       if (!drv->vscreen)
          goto error_screen;
       break;
diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index 31c95054f56..8fda388b26b 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -63,7 +63,7 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device,
 
    pipe_reference_init(&dev->reference, 1);
 
-   dev->vscreen = vl_screen_create(display, screen);
+   dev->vscreen = vl_dri2_screen_create(display, screen);
    if (!dev->vscreen) {
       ret = VDP_STATUS_RESOURCES;
       goto no_vscreen;
diff --git a/src/gallium/state_trackers/xvmc/context.c b/src/gallium/state_trackers/xvmc/context.c
index 4702b44d1f4..07b33e41f7d 100644
--- a/src/gallium/state_trackers/xvmc/context.c
+++ b/src/gallium/state_trackers/xvmc/context.c
@@ -229,7 +229,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       return BadAlloc;
 
    /* TODO: Reuse screen if process creates another context */
-   vscreen = vl_screen_create(dpy, scrn);
+   vscreen = vl_dri2_screen_create(dpy, scrn);
 
    if (!vscreen) {
       XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL screen.\n");

From 6b152ee7b6121f661cdcfdfafa547457dc27b921 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 6 Nov 2015 22:34:01 +0000
Subject: [PATCH 146/335] auxiliary/vl: add dispatch table

As mentioned previously, it will allow us to use different vl backend in
a generic way from either video state-tracker.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/auxiliary/vl/vl_winsys.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/gallium/auxiliary/vl/vl_winsys.h b/src/gallium/auxiliary/vl/vl_winsys.h
index 9aa65ad3200..29da3203a39 100644
--- a/src/gallium/auxiliary/vl/vl_winsys.h
+++ b/src/gallium/auxiliary/vl/vl_winsys.h
@@ -42,6 +42,23 @@ struct pipe_loader_device;
 
 struct vl_screen
 {
+   void (*destroy)(struct vl_screen *vscreen);
+
+   struct pipe_resource *
+   (*texture_from_drawable)(struct vl_screen *vscreen, void *drawable);
+
+   struct u_rect *
+   (*get_dirty_area)(struct vl_screen *vscreen);
+
+   uint64_t
+   (*get_timestamp)(struct vl_screen *vscreen, void *drawable);
+
+   void
+   (*set_next_timestamp)(struct vl_screen *vscreen, uint64_t stamp);
+
+   void *
+   (*get_private)(struct vl_screen *vscreen);
+
    struct pipe_screen *pscreen;
    struct pipe_loader_device *dev;
 };

From d03d9ecafa6548c30e22e26b14bf552472a9f231 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Mon, 9 Nov 2015 11:18:14 +0000
Subject: [PATCH 147/335] auxiliary/vl/drm: setup the dispatch

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/auxiliary/vl/vl_winsys_drm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/auxiliary/vl/vl_winsys_drm.c b/src/gallium/auxiliary/vl/vl_winsys_drm.c
index 1167fcf6a90..2ebf20c4bd3 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_drm.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_drm.c
@@ -59,6 +59,12 @@ vl_drm_screen_create(int fd)
       return NULL;
    }
 
+   vscreen->destroy = vl_drm_screen_destroy;
+   vscreen->texture_from_drawable = NULL;
+   vscreen->get_dirty_area = NULL;
+   vscreen->get_timestamp = NULL;
+   vscreen->set_next_timestamp = NULL;
+   vscreen->get_private = NULL;
    return vscreen;
 }
 

From 6150d8d4bd64151522615c417f1fe51bff1cdd5f Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Mon, 9 Nov 2015 11:34:48 +0000
Subject: [PATCH 148/335] auxiliary/vl/drm: use a label for the error path

... just like every other place in gallium.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/auxiliary/vl/vl_winsys_drm.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_winsys_drm.c b/src/gallium/auxiliary/vl/vl_winsys_drm.c
index 2ebf20c4bd3..b9efc9a901b 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_drm.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_drm.c
@@ -49,15 +49,11 @@ vl_drm_screen_create(int fd)
    if (pipe_loader_drm_probe_fd(&vscreen->dev, dup(fd))) {
       vscreen->pscreen =
          pipe_loader_create_screen(vscreen->dev, PIPE_SEARCH_DIR);
-      if (!vscreen->pscreen)
-         pipe_loader_release(&vscreen->dev, 1);
    }
 #endif
 
-   if (!vscreen->pscreen) {
-      FREE(vscreen);
-      return NULL;
-   }
+   if (!vscreen->pscreen)
+      goto error;
 
    vscreen->destroy = vl_drm_screen_destroy;
    vscreen->texture_from_drawable = NULL;
@@ -66,6 +62,14 @@ vl_drm_screen_create(int fd)
    vscreen->set_next_timestamp = NULL;
    vscreen->get_private = NULL;
    return vscreen;
+
+error:
+#if !GALLIUM_STATIC_TARGETS
+   if (vscreen->dev)
+      pipe_loader_release(&vscreen->dev, 1);
+#endif // !GALLIUM_STATIC_TARGETS
+   FREE(vscreen);
+   return NULL;
 }
 
 void

From 32094979f70a5f532eb27eb41df4a0eadd2fd1a1 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 6 Nov 2015 22:40:34 +0000
Subject: [PATCH 149/335] auxiliary/vl/dri2: setup the dispatch

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/auxiliary/vl/vl_winsys.h     |  4 ++--
 src/gallium/auxiliary/vl/vl_winsys_dri.c | 19 +++++++++++++------
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_winsys.h b/src/gallium/auxiliary/vl/vl_winsys.h
index 29da3203a39..5390f722d09 100644
--- a/src/gallium/auxiliary/vl/vl_winsys.h
+++ b/src/gallium/auxiliary/vl/vl_winsys.h
@@ -66,13 +66,13 @@ struct vl_screen
 void vl_screen_destroy(struct vl_screen *vscreen);
 
 struct pipe_resource*
-vl_screen_texture_from_drawable(struct vl_screen *vscreen, Drawable drawable);
+vl_screen_texture_from_drawable(struct vl_screen *vscreen, void *drawable);
 
 struct u_rect *
 vl_screen_get_dirty_area(struct vl_screen *vscreen);
 
 uint64_t
-vl_screen_get_timestamp(struct vl_screen *vscreen, Drawable drawable);
+vl_screen_get_timestamp(struct vl_screen *vscreen, void *drawable);
 
 void
 vl_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp);
diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri.c b/src/gallium/auxiliary/vl/vl_winsys_dri.c
index 46f581601af..12f7887e47e 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c
@@ -171,7 +171,7 @@ vl_dri2_set_drawable(struct vl_dri_screen *scrn, Drawable drawable)
 }
 
 struct pipe_resource*
-vl_screen_texture_from_drawable(struct vl_screen *vscreen, Drawable drawable)
+vl_screen_texture_from_drawable(struct vl_screen *vscreen, void *drawable)
 {
    struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
 
@@ -185,11 +185,12 @@ vl_screen_texture_from_drawable(struct vl_screen *vscreen, Drawable drawable)
 
    assert(scrn);
 
-   vl_dri2_set_drawable(scrn, drawable);
+   vl_dri2_set_drawable(scrn, (Drawable)drawable);
    reply = vl_dri2_get_flush_reply(scrn);
    if (!reply) {
       xcb_dri2_get_buffers_cookie_t cookie;
-      cookie = xcb_dri2_get_buffers_unchecked(scrn->conn, drawable, 1, 1, attachments);
+      cookie = xcb_dri2_get_buffers_unchecked(scrn->conn, (Drawable)drawable,
+                                              1, 1, attachments);
       reply = xcb_dri2_get_buffers_reply(scrn->conn, cookie, NULL);
    }
    if (!reply)
@@ -256,7 +257,7 @@ vl_screen_get_dirty_area(struct vl_screen *vscreen)
 }
 
 uint64_t
-vl_screen_get_timestamp(struct vl_screen *vscreen, Drawable drawable)
+vl_screen_get_timestamp(struct vl_screen *vscreen, void *drawable)
 {
    struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
    xcb_dri2_get_msc_cookie_t cookie;
@@ -264,9 +265,9 @@ vl_screen_get_timestamp(struct vl_screen *vscreen, Drawable drawable)
 
    assert(scrn);
 
-   vl_dri2_set_drawable(scrn, drawable);
+   vl_dri2_set_drawable(scrn, (Drawable)drawable);
    if (!scrn->last_ust) {
-      cookie = xcb_dri2_get_msc_unchecked(scrn->conn, drawable);
+      cookie = xcb_dri2_get_msc_unchecked(scrn->conn, (Drawable)drawable);
       reply = xcb_dri2_get_msc_reply(scrn->conn, cookie, NULL);
 
       if (reply) {
@@ -397,6 +398,12 @@ vl_dri2_screen_create(Display *display, int screen)
    if (!scrn->base.pscreen)
       goto release_pipe;
 
+   scrn->base.destroy = vl_screen_destroy;
+   scrn->base.texture_from_drawable = vl_screen_texture_from_drawable;
+   scrn->base.get_dirty_area = vl_screen_get_dirty_area;
+   scrn->base.get_timestamp = vl_screen_get_timestamp;
+   scrn->base.set_next_timestamp = vl_screen_set_next_timestamp;
+   scrn->base.get_private = vl_screen_get_private;
    scrn->base.pscreen->flush_frontbuffer = vl_dri2_flush_frontbuffer;
    vl_compositor_reset_dirty_area(&scrn->dirty_areas[0]);
    vl_compositor_reset_dirty_area(&scrn->dirty_areas[1]);

From 9eb109f4d3b3b145c544eb69ac21cb69e0a5e2be Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 6 Nov 2015 22:45:38 +0000
Subject: [PATCH 150/335] st/omx: use the vl_screen dispatch

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/state_trackers/omx/entrypoint.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/gallium/state_trackers/omx/entrypoint.c b/src/gallium/state_trackers/omx/entrypoint.c
index dd72b296de2..da9ca104d93 100644
--- a/src/gallium/state_trackers/omx/entrypoint.c
+++ b/src/gallium/state_trackers/omx/entrypoint.c
@@ -119,14 +119,13 @@ void omx_put_screen(void)
 {
    pipe_mutex_lock(omx_lock);
    if ((--omx_usecount) == 0) {
-      if (omx_render_node) {
-         vl_drm_screen_destroy(omx_screen);
-         close(drm_fd);
-      } else {
-         vl_screen_destroy(omx_screen);
-         XCloseDisplay(omx_display);
-      }
+      omx_screen->destroy(omx_screen);
       omx_screen = NULL;
+
+      if (omx_render_node)
+         close(drm_fd);
+      else
+         XCloseDisplay(omx_display);
    }
    pipe_mutex_unlock(omx_lock);
 }

From 422356ed2f05ef0f5bcdac13899a529df5497357 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Mon, 9 Nov 2015 11:23:37 +0000
Subject: [PATCH 151/335] st/va: use the vl_screen dispatch

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/state_trackers/va/context.c | 10 ++--------
 src/gallium/state_trackers/va/picture.c |  2 +-
 src/gallium/state_trackers/va/surface.c | 13 ++++++-------
 3 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c
index 0709dfbef81..f0051e5f6a5 100644
--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -175,10 +175,7 @@ error_htab:
    drv->pipe->destroy(drv->pipe);
 
 error_pipe:
-   if (ctx->display_type == VA_DISPLAY_GLX || ctx->display_type == VA_DISPLAY_X11)
-      vl_screen_destroy(drv->vscreen);
-   else
-      vl_drm_screen_destroy(drv->vscreen);
+   drv->vscreen->destroy(drv->vscreen);
 
 error_screen:
    FREE(drv);
@@ -315,10 +312,7 @@ vlVaTerminate(VADriverContextP ctx)
    vl_compositor_cleanup_state(&drv->cstate);
    vl_compositor_cleanup(&drv->compositor);
    drv->pipe->destroy(drv->pipe);
-   if (ctx->display_type == VA_DISPLAY_GLX || ctx->display_type == VA_DISPLAY_X11)
-      vl_screen_destroy(drv->vscreen);
-   else
-      vl_drm_screen_destroy(drv->vscreen);
+   drv->vscreen->destroy(drv->vscreen);
    handle_table_destroy(drv->htab);
    FREE(drv);
 
diff --git a/src/gallium/state_trackers/va/picture.c b/src/gallium/state_trackers/va/picture.c
index 5e7841a0521..a37a9b791db 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -763,7 +763,7 @@ handleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *context, v
    dst_rect.x1 = pipeline_param->output_region->x + pipeline_param->output_region->width;
    dst_rect.y1 = pipeline_param->output_region->y + pipeline_param->output_region->height;
 
-   dirty_area = vl_screen_get_dirty_area(drv->vscreen);
+   dirty_area = drv->vscreen->get_dirty_area(drv->vscreen);
 
    vl_compositor_clear_layers(&drv->cstate);
    vl_compositor_set_buffer_layer(&drv->cstate, &drv->compositor, 0, src_surface->buffer, &src_rect, NULL, VL_COMPOSITOR_WEAVE);
diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c
index 589d6860b6a..c052c8f2284 100644
--- a/src/gallium/state_trackers/va/surface.c
+++ b/src/gallium/state_trackers/va/surface.c
@@ -229,6 +229,7 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface_id, void* draw, short s
    struct pipe_screen *screen;
    struct pipe_resource *tex;
    struct pipe_surface surf_templ, *surf_draw;
+   struct vl_screen *vscreen;
    struct u_rect src_rect, *dirty_area;
    struct u_rect dst_rect = {destx, destx + destw, desty, desty + desth};
    VAStatus status;
@@ -242,17 +243,18 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface_id, void* draw, short s
       return VA_STATUS_ERROR_INVALID_SURFACE;
 
    screen = drv->pipe->screen;
+   vscreen = drv->vscreen;
 
    if(surf->fence) {
       screen->fence_finish(screen, surf->fence, PIPE_TIMEOUT_INFINITE);
       screen->fence_reference(screen, &surf->fence, NULL);
    }
 
-   tex = vl_screen_texture_from_drawable(drv->vscreen, (Drawable)draw);
+   tex = vscreen->texture_from_drawable(vscreen, draw);
    if (!tex)
       return VA_STATUS_ERROR_INVALID_DISPLAY;
 
-   dirty_area = vl_screen_get_dirty_area(drv->vscreen);
+   dirty_area = vscreen->get_dirty_area(vscreen);
 
    memset(&surf_templ, 0, sizeof(surf_templ));
    surf_templ.format = tex->format;
@@ -276,11 +278,8 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface_id, void* draw, short s
    if (status)
       return status;
 
-   screen->flush_frontbuffer
-   (
-      screen, tex, 0, 0,
-      vl_screen_get_private(drv->vscreen), NULL
-   );
+   screen->flush_frontbuffer(screen, tex, 0, 0,
+                             vscreen->get_private(vscreen), NULL);
 
    screen->fence_reference(screen, &surf->fence, NULL);
    drv->pipe->flush(drv->pipe, &surf->fence, 0);

From 4307155127fbd682d04fd23ce4aa80e7da2cb203 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 6 Nov 2015 23:02:14 +0000
Subject: [PATCH 152/335] st/xvmc: use the vl_screen dispatch

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/state_trackers/xvmc/context.c | 10 +++++-----
 src/gallium/state_trackers/xvmc/surface.c | 13 ++++++-------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/gallium/state_trackers/xvmc/context.c b/src/gallium/state_trackers/xvmc/context.c
index 07b33e41f7d..a6991ab8d61 100644
--- a/src/gallium/state_trackers/xvmc/context.c
+++ b/src/gallium/state_trackers/xvmc/context.c
@@ -240,7 +240,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
    pipe = vscreen->pscreen->context_create(vscreen->pscreen, vscreen, 0);
    if (!pipe) {
       XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL context.\n");
-      vl_screen_destroy(vscreen);
+      vscreen->destroy(vscreen);
       FREE(context_priv);
       return BadAlloc;
    }
@@ -258,7 +258,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
    if (!context_priv->decoder) {
       XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL decoder.\n");
       pipe->destroy(pipe);
-      vl_screen_destroy(vscreen);
+      vscreen->destroy(vscreen);
       FREE(context_priv);
       return BadAlloc;
    }
@@ -267,7 +267,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL compositor.\n");
       context_priv->decoder->destroy(context_priv->decoder);
       pipe->destroy(pipe);
-      vl_screen_destroy(vscreen);
+      vscreen->destroy(vscreen);
       FREE(context_priv);
       return BadAlloc;
    }
@@ -277,7 +277,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       vl_compositor_cleanup(&context_priv->compositor);
       context_priv->decoder->destroy(context_priv->decoder);
       pipe->destroy(pipe);
-      vl_screen_destroy(vscreen);
+      vscreen->destroy(vscreen);
       FREE(context_priv);
       return BadAlloc;
    }
@@ -332,7 +332,7 @@ Status XvMCDestroyContext(Display *dpy, XvMCContext *context)
    vl_compositor_cleanup_state(&context_priv->cstate);
    vl_compositor_cleanup(&context_priv->compositor);
    context_priv->pipe->destroy(context_priv->pipe);
-   vl_screen_destroy(context_priv->vscreen);
+   context_priv->vscreen->destroy(context_priv->vscreen);
    FREE(context_priv);
    context->privData = NULL;
 
diff --git a/src/gallium/state_trackers/xvmc/surface.c b/src/gallium/state_trackers/xvmc/surface.c
index 15eae59ff6e..199712ba168 100644
--- a/src/gallium/state_trackers/xvmc/surface.c
+++ b/src/gallium/state_trackers/xvmc/surface.c
@@ -355,6 +355,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    struct pipe_context *pipe;
    struct vl_compositor *compositor;
    struct vl_compositor_state *cstate;
+   struct vl_screen *vscreen;
 
    XvMCSurfacePrivate *surface_priv;
    XvMCContextPrivate *context_priv;
@@ -386,9 +387,10 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    pipe = context_priv->pipe;
    compositor = &context_priv->compositor;
    cstate = &context_priv->cstate;
+   vscreen = context_priv->vscreen;
 
-   tex = vl_screen_texture_from_drawable(context_priv->vscreen, drawable);
-   dirty_area = vl_screen_get_dirty_area(context_priv->vscreen);
+   tex = vscreen->texture_from_drawable(vscreen, (void *)drawable);
+   dirty_area = vscreen->get_dirty_area(vscreen);
 
    memset(&surf_templ, 0, sizeof(surf_templ));
    surf_templ.format = tex->format;
@@ -444,11 +446,8 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display. Pushing to front buffer.\n", surface);
 
-   pipe->screen->flush_frontbuffer
-   (
-      pipe->screen, tex, 0, 0,
-      vl_screen_get_private(context_priv->vscreen), NULL
-   );
+   pipe->screen->flush_frontbuffer(pipe->screen, tex, 0, 0,
+                                   vscreen->get_private(vscreen), NULL);
 
    if(dump_window == -1) {
       dump_window = debug_get_num_option("XVMC_DUMP", 0);

From abbfda60d8b5ff870c75073cd408551c25c6a0b5 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 6 Nov 2015 23:12:13 +0000
Subject: [PATCH 153/335] st/vdpau: use the vl_screen dispatch

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/state_trackers/vdpau/device.c      |  4 ++--
 .../state_trackers/vdpau/presentation.c        | 18 +++++++++---------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index 8fda388b26b..c70cc6e2752 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -136,7 +136,7 @@ no_handle:
 no_resource:
    dev->context->destroy(dev->context);
 no_context:
-   vl_screen_destroy(dev->vscreen);
+   dev->vscreen->destroy(dev->vscreen);
 no_vscreen:
    FREE(dev);
 no_dev:
@@ -227,7 +227,7 @@ vlVdpDeviceFree(vlVdpDevice *dev)
    vl_compositor_cleanup(&dev->compositor);
    pipe_sampler_view_reference(&dev->dummy_sv, NULL);
    dev->context->destroy(dev->context);
-   vl_screen_destroy(dev->vscreen);
+   dev->vscreen->destroy(dev->vscreen);
    FREE(dev);
    vlDestroyHTAB();
 }
diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index e53303708b2..e7f387e6173 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -186,7 +186,8 @@ vlVdpPresentationQueueGetTime(VdpPresentationQueue presentation_queue,
       return VDP_STATUS_INVALID_HANDLE;
 
    pipe_mutex_lock(pq->device->mutex);
-   *current_time = vl_screen_get_timestamp(pq->device->vscreen, pq->drawable);
+   *current_time = pq->device->vscreen->get_timestamp(pq->device->vscreen,
+                                                      (void *)pq->drawable);
    pipe_mutex_unlock(pq->device->mutex);
 
    return VDP_STATUS_OK;
@@ -214,6 +215,7 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
 
    struct vl_compositor *compositor;
    struct vl_compositor_state *cstate;
+   struct vl_screen *vscreen;
 
    pq = vlGetDataHTAB(presentation_queue);
    if (!pq)
@@ -226,15 +228,16 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
    pipe = pq->device->context;
    compositor = &pq->device->compositor;
    cstate = &pq->cstate;
+   vscreen = pq->device->vscreen;
 
    pipe_mutex_lock(pq->device->mutex);
-   tex = vl_screen_texture_from_drawable(pq->device->vscreen, pq->drawable);
+   tex = vscreen->texture_from_drawable(vscreen, (void *)pq->drawable);
    if (!tex) {
       pipe_mutex_unlock(pq->device->mutex);
       return VDP_STATUS_INVALID_HANDLE;
    }
 
-   dirty_area = vl_screen_get_dirty_area(pq->device->vscreen);
+   dirty_area = vscreen->get_dirty_area(vscreen);
 
    memset(&surf_templ, 0, sizeof(surf_templ));
    surf_templ.format = tex->format;
@@ -267,12 +270,9 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
       vl_compositor_render(cstate, compositor, surf_draw, dirty_area, true);
    }
 
-   vl_screen_set_next_timestamp(pq->device->vscreen, earliest_presentation_time);
-   pipe->screen->flush_frontbuffer
-   (
-      pipe->screen, tex, 0, 0,
-      vl_screen_get_private(pq->device->vscreen), NULL
-   );
+   vscreen->set_next_timestamp(vscreen, earliest_presentation_time);
+   pipe->screen->flush_frontbuffer(pipe->screen, tex, 0, 0,
+                                   vscreen->get_private(vscreen), NULL);
 
    pipe->screen->fence_reference(pipe->screen, &surf->fence, NULL);
    pipe->flush(pipe, &surf->fence, 0);

From 4533c022f41ebcaa91f2c22c04824d647c8c9fec Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Mon, 9 Nov 2015 11:24:35 +0000
Subject: [PATCH 154/335] auxiliary/vl/drm: hide internal functions

As of last commit everyone is using the vl_screen dispatch, thus we can
hide this function from the headers and make it static.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/auxiliary/vl/vl_winsys.h     | 3 ---
 src/gallium/auxiliary/vl/vl_winsys_drm.c | 7 +++++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_winsys.h b/src/gallium/auxiliary/vl/vl_winsys.h
index 5390f722d09..8be4692e369 100644
--- a/src/gallium/auxiliary/vl/vl_winsys.h
+++ b/src/gallium/auxiliary/vl/vl_winsys.h
@@ -86,7 +86,4 @@ vl_dri2_screen_create(Display *display, int screen);
 struct vl_screen *
 vl_drm_screen_create(int fd);
 
-void
-vl_drm_screen_destroy(struct vl_screen *vscreen);
-
 #endif
diff --git a/src/gallium/auxiliary/vl/vl_winsys_drm.c b/src/gallium/auxiliary/vl/vl_winsys_drm.c
index b9efc9a901b..c96187bf1c5 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_drm.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_drm.c
@@ -34,7 +34,10 @@
 #include "util/u_memory.h"
 #include "vl/vl_winsys.h"
 
-struct vl_screen*
+static void
+vl_drm_screen_destroy(struct vl_screen *vscreen);
+
+struct vl_screen *
 vl_drm_screen_create(int fd)
 {
    struct vl_screen *vscreen;
@@ -72,7 +75,7 @@ error:
    return NULL;
 }
 
-void
+static void
 vl_drm_screen_destroy(struct vl_screen *vscreen)
 {
    assert(vscreen);

From b31f092bfb48118e671d0705c21b1c6f6f3bef84 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Mon, 9 Nov 2015 11:25:59 +0000
Subject: [PATCH 155/335] auxiliary/vl/dri2: hide internal functions

Analogous to previous commit. While we're here prefix all functions
identically -> vl_dri2_foo

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/auxiliary/vl/vl_winsys.h     | 17 -----------
 src/gallium/auxiliary/vl/vl_winsys_dri.c | 37 +++++++++++++-----------
 2 files changed, 20 insertions(+), 34 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_winsys.h b/src/gallium/auxiliary/vl/vl_winsys.h
index 8be4692e369..7d3c9410537 100644
--- a/src/gallium/auxiliary/vl/vl_winsys.h
+++ b/src/gallium/auxiliary/vl/vl_winsys.h
@@ -63,23 +63,6 @@ struct vl_screen
    struct pipe_loader_device *dev;
 };
 
-void vl_screen_destroy(struct vl_screen *vscreen);
-
-struct pipe_resource*
-vl_screen_texture_from_drawable(struct vl_screen *vscreen, void *drawable);
-
-struct u_rect *
-vl_screen_get_dirty_area(struct vl_screen *vscreen);
-
-uint64_t
-vl_screen_get_timestamp(struct vl_screen *vscreen, void *drawable);
-
-void
-vl_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp);
-
-void*
-vl_screen_get_private(struct vl_screen *vscreen);
-
 struct vl_screen*
 vl_dri2_screen_create(Display *display, int screen);
 
diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri.c b/src/gallium/auxiliary/vl/vl_winsys_dri.c
index 12f7887e47e..098b9a90b84 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c
@@ -75,6 +75,8 @@ struct vl_dri_screen
 
 static const unsigned int attachments[1] = { XCB_DRI2_ATTACHMENT_BUFFER_BACK_LEFT };
 
+static void vl_dri2_screen_destroy(struct vl_screen *vscreen);
+
 static void
 vl_dri2_handle_stamps(struct vl_dri_screen* scrn,
                       uint32_t ust_hi, uint32_t ust_lo,
@@ -170,8 +172,8 @@ vl_dri2_set_drawable(struct vl_dri_screen *scrn, Drawable drawable)
    scrn->drawable = drawable;
 }
 
-struct pipe_resource*
-vl_screen_texture_from_drawable(struct vl_screen *vscreen, void *drawable)
+static struct pipe_resource *
+vl_dri2_screen_texture_from_drawable(struct vl_screen *vscreen, void *drawable)
 {
    struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
 
@@ -248,16 +250,16 @@ vl_screen_texture_from_drawable(struct vl_screen *vscreen, void *drawable)
    return tex;
 }
 
-struct u_rect *
-vl_screen_get_dirty_area(struct vl_screen *vscreen)
+static struct u_rect *
+vl_dri2_screen_get_dirty_area(struct vl_screen *vscreen)
 {
    struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
    assert(scrn);
    return &scrn->dirty_areas[scrn->current_buffer];
 }
 
-uint64_t
-vl_screen_get_timestamp(struct vl_screen *vscreen, void *drawable)
+static uint64_t
+vl_dri2_screen_get_timestamp(struct vl_screen *vscreen, void *drawable)
 {
    struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
    xcb_dri2_get_msc_cookie_t cookie;
@@ -279,8 +281,8 @@ vl_screen_get_timestamp(struct vl_screen *vscreen, void *drawable)
    return scrn->last_ust;
 }
 
-void
-vl_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp)
+static void
+vl_dri2_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp)
 {
    struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
    assert(scrn);
@@ -290,8 +292,8 @@ vl_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp)
       scrn->next_msc = 0;
 }
 
-void*
-vl_screen_get_private(struct vl_screen *vscreen)
+static void *
+vl_dri2_screen_get_private(struct vl_screen *vscreen)
 {
    return vscreen;
 }
@@ -398,12 +400,12 @@ vl_dri2_screen_create(Display *display, int screen)
    if (!scrn->base.pscreen)
       goto release_pipe;
 
-   scrn->base.destroy = vl_screen_destroy;
-   scrn->base.texture_from_drawable = vl_screen_texture_from_drawable;
-   scrn->base.get_dirty_area = vl_screen_get_dirty_area;
-   scrn->base.get_timestamp = vl_screen_get_timestamp;
-   scrn->base.set_next_timestamp = vl_screen_set_next_timestamp;
-   scrn->base.get_private = vl_screen_get_private;
+   scrn->base.destroy = vl_dri2_screen_destroy;
+   scrn->base.texture_from_drawable = vl_dri2_screen_texture_from_drawable;
+   scrn->base.get_dirty_area = vl_dri2_screen_get_dirty_area;
+   scrn->base.get_timestamp = vl_dri2_screen_get_timestamp;
+   scrn->base.set_next_timestamp = vl_dri2_screen_set_next_timestamp;
+   scrn->base.get_private = vl_dri2_screen_get_private;
    scrn->base.pscreen->flush_frontbuffer = vl_dri2_flush_frontbuffer;
    vl_compositor_reset_dirty_area(&scrn->dirty_areas[0]);
    vl_compositor_reset_dirty_area(&scrn->dirty_areas[1]);
@@ -433,7 +435,8 @@ free_screen:
    return NULL;
 }
 
-void vl_screen_destroy(struct vl_screen *vscreen)
+static void
+vl_dri2_screen_destroy(struct vl_screen *vscreen)
 {
    struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
 

From 3afb253e9ba5ea44cf7be34cc2c3e0e358d169d0 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 6 Nov 2015 23:39:01 +0000
Subject: [PATCH 156/335] auxiliary/vl/dri2: coding style fixes

Rewrap long(ish) lines, add space between struct foo and *.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/auxiliary/vl/vl_winsys.h     |  2 +-
 src/gallium/auxiliary/vl/vl_winsys_dri.c | 54 +++++++++++++++---------
 2 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_winsys.h b/src/gallium/auxiliary/vl/vl_winsys.h
index 7d3c9410537..1af7653d650 100644
--- a/src/gallium/auxiliary/vl/vl_winsys.h
+++ b/src/gallium/auxiliary/vl/vl_winsys.h
@@ -63,7 +63,7 @@ struct vl_screen
    struct pipe_loader_device *dev;
 };
 
-struct vl_screen*
+struct vl_screen *
 vl_dri2_screen_create(Display *display, int screen);
 
 struct vl_screen *
diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri.c b/src/gallium/auxiliary/vl/vl_winsys_dri.c
index 098b9a90b84..e0683a5e7d6 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c
@@ -73,26 +73,27 @@ struct vl_dri_screen
    int64_t last_ust, ns_frame, last_msc, next_msc;
 };
 
-static const unsigned int attachments[1] = { XCB_DRI2_ATTACHMENT_BUFFER_BACK_LEFT };
+static const unsigned attachments[1] = { XCB_DRI2_ATTACHMENT_BUFFER_BACK_LEFT };
 
 static void vl_dri2_screen_destroy(struct vl_screen *vscreen);
 
 static void
-vl_dri2_handle_stamps(struct vl_dri_screen* scrn,
+vl_dri2_handle_stamps(struct vl_dri_screen *scrn,
                       uint32_t ust_hi, uint32_t ust_lo,
                       uint32_t msc_hi, uint32_t msc_lo)
 {
    int64_t ust = ((((uint64_t)ust_hi) << 32) | ust_lo) * 1000;
    int64_t msc = (((uint64_t)msc_hi) << 32) | msc_lo;
 
-   if (scrn->last_ust && scrn->last_msc && (ust > scrn->last_ust) && (msc > scrn->last_msc))
+   if (scrn->last_ust && (ust > scrn->last_ust) &&
+       scrn->last_msc && (msc > scrn->last_msc))
       scrn->ns_frame = (ust - scrn->last_ust) / (msc - scrn->last_msc);
 
    scrn->last_ust = ust;
    scrn->last_msc = msc;
 }
 
-static xcb_dri2_get_buffers_reply_t*
+static xcb_dri2_get_buffers_reply_t *
 vl_dri2_get_flush_reply(struct vl_dri_screen *scrn)
 {
    xcb_dri2_wait_sbc_reply_t *wait_sbc_reply;
@@ -122,7 +123,7 @@ vl_dri2_flush_frontbuffer(struct pipe_screen *screen,
                           unsigned level, unsigned layer,
                           void *context_private, struct pipe_box *sub_box)
 {
-   struct vl_dri_screen *scrn = (struct vl_dri_screen*)context_private;
+   struct vl_dri_screen *scrn = (struct vl_dri_screen *)context_private;
    uint32_t msc_hi, msc_lo;
 
    assert(screen);
@@ -134,9 +135,11 @@ vl_dri2_flush_frontbuffer(struct pipe_screen *screen,
    msc_hi = scrn->next_msc >> 32;
    msc_lo = scrn->next_msc & 0xFFFFFFFF;
 
-   scrn->swap_cookie = xcb_dri2_swap_buffers_unchecked(scrn->conn, scrn->drawable, msc_hi, msc_lo, 0, 0, 0, 0);
+   scrn->swap_cookie = xcb_dri2_swap_buffers_unchecked(scrn->conn, scrn->drawable,
+                                                       msc_hi, msc_lo, 0, 0, 0, 0);
    scrn->wait_cookie = xcb_dri2_wait_sbc_unchecked(scrn->conn, scrn->drawable, 0, 0);
-   scrn->buffers_cookie = xcb_dri2_get_buffers_unchecked(scrn->conn, scrn->drawable, 1, 1, attachments);
+   scrn->buffers_cookie = xcb_dri2_get_buffers_unchecked(scrn->conn, scrn->drawable,
+                                                         1, 1, attachments);
 
    scrn->flushed = true;
    scrn->current_buffer = !scrn->current_buffer;
@@ -175,7 +178,7 @@ vl_dri2_set_drawable(struct vl_dri_screen *scrn, Drawable drawable)
 static struct pipe_resource *
 vl_dri2_screen_texture_from_drawable(struct vl_screen *vscreen, void *drawable)
 {
-   struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
+   struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen;
 
    struct winsys_handle dri2_handle;
    struct pipe_resource template, *tex;
@@ -244,7 +247,8 @@ vl_dri2_screen_texture_from_drawable(struct vl_screen *vscreen, void *drawable)
    template.bind = PIPE_BIND_RENDER_TARGET;
    template.flags = 0;
 
-   tex = scrn->base.pscreen->resource_from_handle(scrn->base.pscreen, &template, &dri2_handle);
+   tex = scrn->base.pscreen->resource_from_handle(scrn->base.pscreen, &template,
+                                                  &dri2_handle);
    free(reply);
 
    return tex;
@@ -253,7 +257,7 @@ vl_dri2_screen_texture_from_drawable(struct vl_screen *vscreen, void *drawable)
 static struct u_rect *
 vl_dri2_screen_get_dirty_area(struct vl_screen *vscreen)
 {
-   struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
+   struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen;
    assert(scrn);
    return &scrn->dirty_areas[scrn->current_buffer];
 }
@@ -261,7 +265,7 @@ vl_dri2_screen_get_dirty_area(struct vl_screen *vscreen)
 static uint64_t
 vl_dri2_screen_get_timestamp(struct vl_screen *vscreen, void *drawable)
 {
-   struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
+   struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen;
    xcb_dri2_get_msc_cookie_t cookie;
    xcb_dri2_get_msc_reply_t *reply;
 
@@ -284,10 +288,11 @@ vl_dri2_screen_get_timestamp(struct vl_screen *vscreen, void *drawable)
 static void
 vl_dri2_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp)
 {
-   struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
+   struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen;
    assert(scrn);
    if (stamp && scrn->last_ust && scrn->ns_frame && scrn->last_msc)
-      scrn->next_msc = ((int64_t)stamp - scrn->last_ust + scrn->ns_frame/2) / scrn->ns_frame + scrn->last_msc;
+      scrn->next_msc = ((int64_t)stamp - scrn->last_ust + scrn->ns_frame/2) /
+                       scrn->ns_frame + scrn->last_msc;
    else
       scrn->next_msc = 0;
 }
@@ -308,7 +313,7 @@ get_xcb_screen(xcb_screen_iterator_t iter, int screen)
     return NULL;
 }
 
-struct vl_screen*
+struct vl_screen *
 vl_dri2_screen_create(Display *display, int screen)
 {
    struct vl_dri_screen *scrn;
@@ -323,7 +328,7 @@ vl_dri2_screen_create(Display *display, int screen)
    xcb_generic_error_t *error = NULL;
    char *device_name;
    int fd, device_name_length;
-   unsigned int driverType;
+   unsigned driverType;
 
    drm_magic_t magic;
 
@@ -343,7 +348,9 @@ vl_dri2_screen_create(Display *display, int screen)
    if (!(extension && extension->present))
       goto free_screen;
 
-   dri2_query_cookie = xcb_dri2_query_version (scrn->conn, XCB_DRI2_MAJOR_VERSION, XCB_DRI2_MINOR_VERSION);
+   dri2_query_cookie = xcb_dri2_query_version (scrn->conn,
+                                               XCB_DRI2_MAJOR_VERSION,
+                                               XCB_DRI2_MINOR_VERSION);
    dri2_query = xcb_dri2_query_version_reply (scrn->conn, dri2_query_cookie, &error);
    if (dri2_query == NULL || error != NULL || dri2_query->minor_version < 2)
       goto free_query;
@@ -355,7 +362,7 @@ vl_dri2_screen_create(Display *display, int screen)
    {
       char *prime = getenv("DRI_PRIME");
       if (prime) {
-         unsigned int primeid;
+         unsigned primeid;
          errno = 0;
          primeid = strtoul(prime, NULL, 0);
          if (errno == 0)
@@ -365,9 +372,12 @@ vl_dri2_screen_create(Display *display, int screen)
    }
 #endif
 
-   connect_cookie = xcb_dri2_connect_unchecked(scrn->conn, get_xcb_screen(s, screen)->root, driverType);
+   connect_cookie = xcb_dri2_connect_unchecked(scrn->conn,
+                                               get_xcb_screen(s, screen)->root,
+                                               driverType);
    connect = xcb_dri2_connect_reply(scrn->conn, connect_cookie, NULL);
-   if (connect == NULL || connect->driver_name_length + connect->device_name_length == 0)
+   if (connect == NULL ||
+       connect->driver_name_length + connect->device_name_length == 0)
       goto free_connect;
 
    device_name_length = xcb_dri2_connect_device_name_length(connect);
@@ -384,7 +394,9 @@ vl_dri2_screen_create(Display *display, int screen)
    if (drmGetMagic(fd, &magic))
       goto free_connect;
 
-   authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn, get_xcb_screen(s, screen)->root, magic);
+   authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn,
+                                                         get_xcb_screen(s, screen)->root,
+                                                         magic);
    authenticate = xcb_dri2_authenticate_reply(scrn->conn, authenticate_cookie, NULL);
 
    if (authenticate == NULL || !authenticate->authenticated)
@@ -438,7 +450,7 @@ free_screen:
 static void
 vl_dri2_screen_destroy(struct vl_screen *vscreen)
 {
-   struct vl_dri_screen *scrn = (struct vl_dri_screen*)vscreen;
+   struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen;
 
    assert(vscreen);
 

From 9108a785a0fc6bb47ca577d2e2bdcf3728fead43 Mon Sep 17 00:00:00 2001
From: Gregory Hainaut <gregory.hainaut@gmail.com>
Date: Sun, 25 Oct 2015 15:01:36 +0100
Subject: [PATCH 157/335] glsl: avoid linker and user varying location to
 overlap

Current behavior on the interface matching:

layout (location = 0) out0; // Assigned to VARYING_SLOT_VAR0 by user
out1; // Assigned to VARYING_SLOT_VAR0 by the linker

New behavior on the interface matching:

layout (location = 0) out0; // Assigned to VARYING_SLOT_VAR0 by user
out1; // Assigned to VARYING_SLOT_VAR1 by the linker

v4:
* Fix variable name in assert

Signed-off-by: Gregory Hainaut <gregory.hainaut@gmail.com>
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
---
 src/glsl/link_varyings.cpp | 46 +++++++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
index 7e77a675db1..c0b4b3e820c 100644
--- a/src/glsl/link_varyings.cpp
+++ b/src/glsl/link_varyings.cpp
@@ -766,7 +766,7 @@ public:
                    gl_shader_stage consumer_stage);
    ~varying_matches();
    void record(ir_variable *producer_var, ir_variable *consumer_var);
-   unsigned assign_locations();
+   unsigned assign_locations(uint64_t reserved_slots);
    void store_locations() const;
 
 private:
@@ -986,7 +986,7 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
  * passed to varying_matches::record().
  */
 unsigned
-varying_matches::assign_locations()
+varying_matches::assign_locations(uint64_t reserved_slots)
 {
    /* Sort varying matches into an order that makes them easy to pack. */
    qsort(this->matches, this->num_matches, sizeof(*this->matches),
@@ -1013,6 +1013,10 @@ varying_matches::assign_locations()
           != this->matches[i].packing_class) {
          *location = ALIGN(*location, 4);
       }
+      while ((*location < MAX_VARYING * 4u) &&
+            (reserved_slots & (1u << *location / 4u))) {
+         *location = ALIGN(*location + 1, 4);
+      }
 
       this->matches[i].generic_location = *location;
 
@@ -1375,6 +1379,38 @@ canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode)
    }
 }
 
+/**
+ * Generate a bitfield map of the explicit locations for shader varyings.
+ *
+ * In theory a 32 bits value will be enough but a 64 bits value is future proof.
+ */
+uint64_t
+reserved_varying_slot(struct gl_shader *stage, ir_variable_mode io_mode)
+{
+   assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out);
+   assert(MAX_VARYING <= 64); /* avoid an overflow of the returned value */
+
+   uint64_t slots = 0;
+   int var_slot;
+
+   if (!stage)
+      return slots;
+
+   foreach_in_list(ir_instruction, node, stage->ir) {
+      ir_variable *const var = node->as_variable();
+
+      if (var == NULL || var->data.mode != io_mode || !var->data.explicit_location)
+         continue;
+
+      var_slot = var->data.location - VARYING_SLOT_VAR0;
+      if (var_slot >= 0 && var_slot < MAX_VARYING)
+         slots |= 1u << var_slot;
+   }
+
+   return slots;
+}
+
+
 /**
  * Assign locations for all variables that are produced in one pipeline stage
  * (the "producer") and consumed in the next stage (the "consumer").
@@ -1550,7 +1586,11 @@ assign_varying_locations(struct gl_context *ctx,
          matches.record(matched_candidate->toplevel_var, NULL);
    }
 
-   const unsigned slots_used = matches.assign_locations();
+   const uint64_t reserved_slots =
+      reserved_varying_slot(producer, ir_var_shader_out) |
+      reserved_varying_slot(consumer, ir_var_shader_in);
+
+   const unsigned slots_used = matches.assign_locations(reserved_slots);
    matches.store_locations();
 
    for (unsigned i = 0; i < num_tfeedback_decls; ++i) {

From af272368547600e1db87d4dd5d718e41ea9db6c0 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Thu, 5 Nov 2015 20:22:25 +0000
Subject: [PATCH 158/335] mesa: use the correct string for the ES GL_KHR_debug
 functions

As defined in the spec

    when implemented in an OpenGL ES context, all entry points defined
    by this extension must have a "KHR" suffix.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/mesa/main/errors.c      | 40 ++++++++++++++++++++++++++------
 src/mesa/main/getstring.c   | 10 ++++++--
 src/mesa/main/objectlabel.c | 46 +++++++++++++++++++++++++++++--------
 3 files changed, 77 insertions(+), 19 deletions(-)

diff --git a/src/mesa/main/errors.c b/src/mesa/main/errors.c
index f720de316e4..fe628c39ac5 100644
--- a/src/mesa/main/errors.c
+++ b/src/mesa/main/errors.c
@@ -978,9 +978,13 @@ _mesa_DebugMessageInsert(GLenum source, GLenum type, GLuint id,
                          GLenum severity, GLint length,
                          const GLchar *buf)
 {
-   const char *callerstr = "glDebugMessageInsert";
-
    GET_CURRENT_CONTEXT(ctx);
+   const char *callerstr;
+
+   if (_mesa_is_desktop_gl(ctx))
+      callerstr = "glDebugMessageInsert";
+   else
+      callerstr = "glDebugMessageInsertKHR";
 
    if (!validate_params(ctx, INSERT, callerstr, source, type, severity))
       return; /* GL_INVALID_ENUM */
@@ -1004,15 +1008,21 @@ _mesa_GetDebugMessageLog(GLuint count, GLsizei logSize, GLenum *sources,
 {
    GET_CURRENT_CONTEXT(ctx);
    struct gl_debug_state *debug;
+   const char *callerstr;
    GLuint ret;
 
+   if (_mesa_is_desktop_gl(ctx))
+      callerstr = "glGetDebugMessageLog";
+   else
+      callerstr = "glGetDebugMessageLogKHR";
+
    if (!messageLog)
       logSize = 0;
 
    if (logSize < 0) {
       _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glGetDebugMessageLog(logSize=%d : logSize must not be"
-                  " negative)", logSize);
+                  "%s(logSize=%d : logSize must not be negative)",
+                  callerstr, logSize);
       return 0;
    }
 
@@ -1066,9 +1076,14 @@ _mesa_DebugMessageControl(GLenum gl_source, GLenum gl_type,
    enum mesa_debug_source source = gl_enum_to_debug_source(gl_source);
    enum mesa_debug_type type = gl_enum_to_debug_type(gl_type);
    enum mesa_debug_severity severity = gl_enum_to_debug_severity(gl_severity);
-   const char *callerstr = "glDebugMessageControl";
+   const char *callerstr;
    struct gl_debug_state *debug;
 
+   if (_mesa_is_desktop_gl(ctx))
+      callerstr = "glDebugMessageControl";
+   else
+      callerstr = "glDebugMessageControlKHR";
+
    if (count < 0) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "%s(count=%d : count must not be negative)", callerstr,
@@ -1124,9 +1139,15 @@ _mesa_PushDebugGroup(GLenum source, GLuint id, GLsizei length,
                      const GLchar *message)
 {
    GET_CURRENT_CONTEXT(ctx);
-   const char *callerstr = "glPushDebugGroup";
+   const char *callerstr;
    struct gl_debug_state *debug;
    struct gl_debug_message *emptySlot;
+   GLuint ret;
+
+   if (_mesa_is_desktop_gl(ctx))
+      callerstr = "glPushDebugGroup";
+   else
+      callerstr = "glPushDebugGroupKHR";
 
    switch(source) {
    case GL_DEBUG_SOURCE_APPLICATION:
@@ -1176,10 +1197,15 @@ void GLAPIENTRY
 _mesa_PopDebugGroup(void)
 {
    GET_CURRENT_CONTEXT(ctx);
-   const char *callerstr = "glPopDebugGroup";
+   const char *callerstr;
    struct gl_debug_state *debug;
    struct gl_debug_message *gdmessage, msg;
 
+   if (_mesa_is_desktop_gl(ctx))
+      callerstr = "glPopDebugGroup";
+   else
+      callerstr = "glPopDebugGroupKHR";
+
    debug = _mesa_lock_debug_state(ctx);
    if (!debug)
       return;
diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c
index 9873fdbf1a4..2e339c85e28 100644
--- a/src/mesa/main/getstring.c
+++ b/src/mesa/main/getstring.c
@@ -203,12 +203,18 @@ _mesa_GetPointerv( GLenum pname, GLvoid **params )
 {
    GET_CURRENT_CONTEXT(ctx);
    const GLuint clientUnit = ctx->Array.ActiveTexture;
+   const char *callerstr;
+
+   if (_mesa_is_desktop_gl(ctx))
+      callerstr = "glGetPointerv";
+   else
+      callerstr = "glGetPointervKHR";
 
    if (!params)
       return;
 
    if (MESA_VERBOSE & VERBOSE_API)
-      _mesa_debug(ctx, "glGetPointerv %s\n", _mesa_enum_to_string(pname));
+      _mesa_debug(ctx, "%s %s\n", callerstr, _mesa_enum_to_string(pname));
 
    switch (pname) {
       case GL_VERTEX_ARRAY_POINTER:
@@ -280,7 +286,7 @@ _mesa_GetPointerv( GLenum pname, GLvoid **params )
    return;
 
 invalid_pname:
-   _mesa_error( ctx, GL_INVALID_ENUM, "glGetPointerv" );
+   _mesa_error( ctx, GL_INVALID_ENUM, "%s", callerstr);
    return;
 }
 
diff --git a/src/mesa/main/objectlabel.c b/src/mesa/main/objectlabel.c
index 1019f893ba8..41f370ce485 100644
--- a/src/mesa/main/objectlabel.c
+++ b/src/mesa/main/objectlabel.c
@@ -243,13 +243,19 @@ _mesa_ObjectLabel(GLenum identifier, GLuint name, GLsizei length,
                   const GLchar *label)
 {
    GET_CURRENT_CONTEXT(ctx);
+   const char *callerstr;
    char **labelPtr;
 
-   labelPtr = get_label_pointer(ctx, identifier, name, "glObjectLabel");
+   if (_mesa_is_desktop_gl(ctx))
+      callerstr = "glObjectLabel";
+   else
+      callerstr = "glObjectLabelKHR";
+
+   labelPtr = get_label_pointer(ctx, identifier, name, callerstr);
    if (!labelPtr)
       return;
 
-   set_label(ctx, labelPtr, label, length, "glObjectLabel");
+   set_label(ctx, labelPtr, label, length, callerstr);
 }
 
 void GLAPIENTRY
@@ -257,15 +263,21 @@ _mesa_GetObjectLabel(GLenum identifier, GLuint name, GLsizei bufSize,
                      GLsizei *length, GLchar *label)
 {
    GET_CURRENT_CONTEXT(ctx);
+   const char *callerstr;
    char **labelPtr;
 
+   if (_mesa_is_desktop_gl(ctx))
+      callerstr = "glGetObjectLabel";
+   else
+      callerstr = "glGetObjectLabelKHR";
+
    if (bufSize < 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glGetObjectLabel(bufSize = %d)",
+      _mesa_error(ctx, GL_INVALID_VALUE, "%s(bufSize = %d)", callerstr,
                   bufSize);
       return;
    }
 
-   labelPtr = get_label_pointer(ctx, identifier, name, "glGetObjectLabel");
+   labelPtr = get_label_pointer(ctx, identifier, name, callerstr);
    if (!labelPtr)
       return;
 
@@ -276,17 +288,24 @@ void GLAPIENTRY
 _mesa_ObjectPtrLabel(const void *ptr, GLsizei length, const GLchar *label)
 {
    GET_CURRENT_CONTEXT(ctx);
-   char **labelPtr;
    struct gl_sync_object *const syncObj = (struct gl_sync_object *) ptr;
+   const char *callerstr;
+   char **labelPtr;
+
+   if (_mesa_is_desktop_gl(ctx))
+      callerstr = "glObjectPtrLabel";
+   else
+      callerstr = "glObjectPtrLabelKHR";
 
    if (!_mesa_validate_sync(ctx, syncObj)) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glObjectPtrLabel (not a valid sync object)");
+      _mesa_error(ctx, GL_INVALID_VALUE, "%s (not a valid sync object)",
+                  callerstr);
       return;
    }
 
    labelPtr = &syncObj->Label;
 
-   set_label(ctx, labelPtr, label, length, "glObjectPtrLabel");
+   set_label(ctx, labelPtr, label, length, callerstr);
 }
 
 void GLAPIENTRY
@@ -294,17 +313,24 @@ _mesa_GetObjectPtrLabel(const void *ptr, GLsizei bufSize, GLsizei *length,
                         GLchar *label)
 {
    GET_CURRENT_CONTEXT(ctx);
-   char **labelPtr;
    struct gl_sync_object *const syncObj = (struct gl_sync_object *) ptr;
+   const char *callerstr;
+   char **labelPtr;
+
+   if (_mesa_is_desktop_gl(ctx))
+      callerstr = "glGetObjectPtrLabel";
+   else
+      callerstr = "glGetObjectPtrLabelKHR";
 
    if (bufSize < 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glGetObjectPtrLabel(bufSize = %d)",
+      _mesa_error(ctx, GL_INVALID_VALUE, "%s(bufSize = %d)", callerstr,
                   bufSize);
       return;
    }
 
    if (!_mesa_validate_sync(ctx, syncObj)) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glGetObjectPtrLabel (not a valid sync object)");
+      _mesa_error(ctx, GL_INVALID_VALUE, "%s (not a valid sync object)",
+                  callerstr);
       return;
    }
 

From ab7294668cb245fbe8c925be6fd6f0242bdd6a0f Mon Sep 17 00:00:00 2001
From: Boyan Ding <boyan.j.ding@gmail.com>
Date: Sun, 8 Nov 2015 17:56:40 +0800
Subject: [PATCH 159/335] main: Don't restrict several KHR_debug enum to
 desktop GL

In preparation for supporting GL_KHR_debug in OpenGL ES

v2: add a missing hunk in _mesa_IsEnabled (Emil)

Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/mesa/main/enable.c    | 10 ++--------
 src/mesa/main/getstring.c |  5 +----
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index 42f67990784..a8a667e3c12 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -369,10 +369,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
          break;
       case GL_DEBUG_OUTPUT:
       case GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB:
-         if (!_mesa_is_desktop_gl(ctx))
-            goto invalid_enum_error;
-         else
-            _mesa_set_debug_state_int(ctx, cap, state);
+         _mesa_set_debug_state_int(ctx, cap, state);
          break;
       case GL_DITHER:
          if (ctx->Color.DitherFlag == state)
@@ -1225,10 +1222,7 @@ _mesa_IsEnabled( GLenum cap )
          return ctx->Polygon.CullFlag;
       case GL_DEBUG_OUTPUT:
       case GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB:
-         if (!_mesa_is_desktop_gl(ctx))
-            goto invalid_enum_error;
-         else
-            return (GLboolean) _mesa_get_debug_state_int(ctx, cap);
+         return (GLboolean) _mesa_get_debug_state_int(ctx, cap);
       case GL_DEPTH_TEST:
          return ctx->Depth.Test;
       case GL_DITHER:
diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c
index 2e339c85e28..06ba17c92d6 100644
--- a/src/mesa/main/getstring.c
+++ b/src/mesa/main/getstring.c
@@ -274,10 +274,7 @@ _mesa_GetPointerv( GLenum pname, GLvoid **params )
          break;
       case GL_DEBUG_CALLBACK_FUNCTION_ARB:
       case GL_DEBUG_CALLBACK_USER_PARAM_ARB:
-         if (!_mesa_is_desktop_gl(ctx))
-            goto invalid_pname;
-         else
-            *params = _mesa_get_debug_state_ptr(ctx, pname);
+         *params = _mesa_get_debug_state_ptr(ctx, pname);
          break;
       default:
          goto invalid_pname;

From b8547a50631649bf19fc29cb339bdb3992537607 Mon Sep 17 00:00:00 2001
From: Boyan Ding <boyan.j.ding@gmail.com>
Date: Fri, 20 Nov 2015 11:11:19 +0000
Subject: [PATCH 160/335] mesa: re-enable KHR_debug for ES contexts

With the earlier issues resolved we can expose the extension.

Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 docs/relnotes/11.1.0.html        | 1 +
 src/mesa/main/extensions_table.h | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 2e20bb3444e..2f462f7b28a 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -63,6 +63,7 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_EXT_buffer_storage implemented for when ES 3.1 support is gained</li>
 <li>GL_EXT_draw_elements_base_vertex on all drivers</li>
 <li>GL_EXT_texture_compression_rgtc / latc on freedreno (a3xx & a4xx)</li>
+<li>GL_KHR_debug (GLES)</li>
 <li>GL_NV_conditional_render on freedreno</li>
 <li>GL_OES_draw_elements_base_vertex on all drivers</li>
 <li>EGL_KHR_create_context on softpipe, llvmpipe</li>
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 7facc7ff3ca..051d69a3613 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -258,7 +258,7 @@ EXT(INGR_blend_func_separate                , EXT_blend_func_separate
 EXT(INTEL_performance_query                 , INTEL_performance_query                , GLL, GLC,  x , ES2, 2013)
 
 EXT(KHR_context_flush_control               , dummy_true                             , GLL, GLC,  x , ES2, 2014)
-EXT(KHR_debug                               , dummy_true                             , GLL, GLC,  x ,  x , 2012)
+EXT(KHR_debug                               , dummy_true                             , GLL, GLC, ES1, ES2, 2012)
 EXT(KHR_texture_compression_astc_hdr        , KHR_texture_compression_astc_hdr       , GLL, GLC,  x , ES2, 2012)
 EXT(KHR_texture_compression_astc_ldr        , KHR_texture_compression_astc_ldr       , GLL, GLC,  x , ES2, 2012)
 

From 480ba46bcb44169cefb46a978c24e3f77d144df2 Mon Sep 17 00:00:00 2001
From: Mauro Rossi <issor.oruam@gmail.com>
Date: Sat, 7 Nov 2015 01:23:46 +0100
Subject: [PATCH 161/335] android: export the path of glsl nir headers

The change is necessary to avoid building errors in glsl and i965
modules due to missing glsl_types.h header

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/glsl/Android.gen.mk | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk
index 6898fb0d492..59cc8577a6e 100644
--- a/src/glsl/Android.gen.mk
+++ b/src/glsl/Android.gen.mk
@@ -38,7 +38,8 @@ LOCAL_C_INCLUDES += \
 	$(MESA_TOP)/src/glsl/nir
 
 LOCAL_EXPORT_C_INCLUDE_DIRS += \
-	$(intermediates)/nir
+	$(intermediates)/nir \
+	$(MESA_TOP)/src/glsl/nir
 
 LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
 	$(LIBGLCPP_GENERATED_FILES) \

From 8a94ba5e0cb738c6b1a817ea54afc5816d2bb596 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Tigeot?= <ftigeot@wolfpond.org>
Date: Tue, 17 Nov 2015 18:54:01 +0100
Subject: [PATCH 162/335] xmlconfig: Add support for DragonFly

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/mesa/drivers/dri/common/xmlconfig.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c
index b8ab480ddfe..a8f7c9b854b 100644
--- a/src/mesa/drivers/dri/common/xmlconfig.c
+++ b/src/mesa/drivers/dri/common/xmlconfig.c
@@ -59,6 +59,9 @@ extern char *program_invocation_name, *program_invocation_short_name;
 #elif defined(__NetBSD__) && defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 106000100)
 #    include <stdlib.h>
 #    define GET_PROGRAM_NAME() getprogname()
+#elif defined(__DragonFly__)
+#    include <stdlib.h>
+#    define GET_PROGRAM_NAME() getprogname()
 #elif defined(__APPLE__)
 #    include <stdlib.h>
 #    define GET_PROGRAM_NAME() getprogname()

From 9a93da4e83ee332367a0d6aa601cbcd7f59321b2 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 19 Nov 2015 19:17:04 -0500
Subject: [PATCH 163/335] freedreno/a4xx: add BPTC support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h   | 3 +++
 src/gallium/drivers/freedreno/a4xx/fd4_format.c | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index a3fb570597f..82c19b90b7d 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -195,6 +195,9 @@ enum a4xx_tex_fmt {
 	TFMT4_DXT1 = 86,
 	TFMT4_DXT3 = 87,
 	TFMT4_DXT5 = 88,
+	TFMT4_BPTC_UFLOAT = 97,
+	TFMT4_BPTC_FLOAT = 98,
+	TFMT4_BPTC = 99,
 	TFMT4_ATC_RGB = 100,
 	TFMT4_ATC_RGBA_EXPLICIT = 101,
 	TFMT4_ATC_RGBA_INTERPOLATED = 102,
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index 087d04f3c74..d109f36b53c 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -275,6 +275,11 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	_T(DXT5_RGBA,  DXT5, NONE, WZYX),
 	_T(DXT5_SRGBA, DXT5, NONE, WZYX),
 
+	_T(BPTC_RGBA_UNORM, BPTC,        NONE, WZYX),
+	_T(BPTC_SRGBA,      BPTC,        NONE, WZYX),
+	_T(BPTC_RGB_FLOAT,  BPTC_FLOAT,  NONE, WZYX),
+	_T(BPTC_RGB_UFLOAT, BPTC_UFLOAT, NONE, WZYX),
+
 	/* faked */
 	_T(RGTC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
 	_T(RGTC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX),

From 24dc0316b4d7b29e055f220b23cab7daf4698c0c Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Fri, 20 Nov 2015 04:49:23 +0100
Subject: [PATCH 164/335] gallivm: use sampler index 0 for texel fetches

texel fetches don't use any samplers. Previously we just set the same
number for both texture and sampler unit (as per "ordinary" gl style
sampling where the numbers are always the same) however this would trigger
some assertions checking that the sampler index isn't over PIPE_MAX_SAMPLERS
limit elsewhere with d3d10, so just set to 0.
(Fixing the assertion instead isn't really an option, the sampler isn't
really used but might still pass an out-of-bound pointer around and even
copy some things from it.)

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 7d2cd9a9e73..28c7a86316e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -2608,7 +2608,12 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
    params.type = bld->bld_base.base.type;
    params.sample_key = sample_key;
    params.texture_index = unit;
-   params.sampler_index = unit;
+   /*
+    * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
+    * and trigger some assertions with d3d10 where the sampler view number
+    * can exceed this.
+    */
+   params.sampler_index = 0;
    params.context_ptr = bld->context_ptr;
    params.thread_data_ptr = bld->thread_data_ptr;
    params.coords = coords;

From ddf27a3dd062c78ff49a69a1396be4de9c1b5d37 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Tue, 10 Nov 2015 13:35:01 +0100
Subject: [PATCH 165/335] gallium: remove pipe_driver_query_group_info field
 type

This was only used to implement an unnecessarily restrictive interpretation
of the spec of AMD_performance_monitor. The spec says

  A performance monitor consists of a number of hardware and software
  counters that can be sampled by the GPU and reported back to the
  application.

I guess one could take this as a requirement that counters _must_ be sampled
by the GPU, but then why are they called _software_ counters? Besides,
there's not much reason _not_ to expose all counters that are available,
and this simplifies the code.

v3: add a missing change in the nouveau driver (thanks Samuel Pitoiset)

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tested-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  4 ---
 src/gallium/include/pipe/p_defines.h          |  7 -----
 src/mesa/state_tracker/st_cb_perfmon.c        | 30 -------------------
 3 files changed, 41 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index edde57eb8e2..1f1270e441d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -200,7 +200,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
    if (id == NVC0_HW_SM_QUERY_GROUP) {
       if (screen->compute) {
          info->name = "MP counters";
-         info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
 
          /* Because we can't expose the number of hardware counters needed for
           * each different query, we don't want to allow more than one active
@@ -224,7 +223,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
       if (screen->compute) {
          if (screen->base.class_3d < NVE4_3D_CLASS) {
             info->name = "Performance metrics";
-            info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
             info->max_active_queries = 1;
             info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
             return 1;
@@ -234,7 +232,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
    else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
       info->name = "Driver statistics";
-      info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
       info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
       info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
       return 1;
@@ -245,7 +242,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
    info->name = "this_is_not_the_query_group_you_are_looking_for";
    info->max_active_queries = 0;
    info->num_queries = 0;
-   info->type = 0;
    return 0;
 }
 
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 7240154727e..7f241c8cad4 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -829,12 +829,6 @@ enum pipe_driver_query_type
    PIPE_DRIVER_QUERY_TYPE_HZ           = 6,
 };
 
-enum pipe_driver_query_group_type
-{
-   PIPE_DRIVER_QUERY_GROUP_TYPE_CPU = 0,
-   PIPE_DRIVER_QUERY_GROUP_TYPE_GPU = 1,
-};
-
 /* Whether an average value per frame or a cumulative value should be
  * displayed.
  */
@@ -864,7 +858,6 @@ struct pipe_driver_query_info
 struct pipe_driver_query_group_info
 {
    const char *name;
-   enum pipe_driver_query_group_type type;
    unsigned max_active_queries;
    unsigned num_queries;
 };
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c b/src/mesa/state_tracker/st_cb_perfmon.c
index 1bb5be397ae..4ec6d86d6ba 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -65,27 +65,6 @@ find_query_type(struct pipe_screen *screen, const char *name)
    return type;
 }
 
-/**
- * Return TRUE if the underlying driver expose GPU counters.
- */
-static bool
-has_gpu_counters(struct pipe_screen *screen)
-{
-   int num_groups, gid;
-
-   num_groups = screen->get_driver_query_group_info(screen, 0, NULL);
-   for (gid = 0; gid < num_groups; gid++) {
-      struct pipe_driver_query_group_info group_info;
-
-      if (!screen->get_driver_query_group_info(screen, gid, &group_info))
-         continue;
-
-      if (group_info.type == PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
-         return true;
-   }
-   return false;
-}
-
 static bool
 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
@@ -313,12 +292,6 @@ st_init_perfmon(struct st_context *st)
    if (!screen->get_driver_query_info || !screen->get_driver_query_group_info)
       return false;
 
-   if (!has_gpu_counters(screen)) {
-      /* According to the spec, GL_AMD_performance_monitor must only
-       * expose GPU counters. */
-      return false;
-   }
-
    /* Get the number of available queries. */
    num_counters = screen->get_driver_query_info(screen, 0, NULL);
    if (!num_counters)
@@ -339,9 +312,6 @@ st_init_perfmon(struct st_context *st)
       if (!screen->get_driver_query_group_info(screen, gid, &group_info))
          continue;
 
-      if (group_info.type != PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
-         continue;
-
       g->Name = group_info.name;
       g->MaxActiveCounters = group_info.max_active_queries;
       g->NumCounters = 0;

From 050db20d37307b30b05abca368e865f15daf728c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Tue, 10 Nov 2015 14:41:52 +0100
Subject: [PATCH 166/335] gallium/hud: remove unused field in query_info

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tested-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/gallium/auxiliary/hud/hud_driver_query.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c b/src/gallium/auxiliary/hud/hud_driver_query.c
index f14305ea835..3198ab31140 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -48,7 +48,6 @@ struct query_info {
    /* Ring of queries. If a query is busy, we use another slot. */
    struct pipe_query *query[NUM_QUERIES];
    unsigned head, tail;
-   unsigned num_queries;
 
    uint64_t last_time;
    uint64_t results_cumulative;

From 4e1339691d3e04eb7e90d33ab5900ce1a40e628f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Fri, 6 Nov 2015 14:19:54 +0100
Subject: [PATCH 167/335] st/mesa: map semantic driver query types to
 underlying type

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tested-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/gallium/include/pipe/p_defines.h   | 2 ++
 src/mesa/state_tracker/st_cb_perfmon.c | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 7f241c8cad4..7ed9f6df3f7 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -791,6 +791,8 @@ union pipe_query_result
    /* PIPE_QUERY_PRIMITIVES_GENERATED */
    /* PIPE_QUERY_PRIMITIVES_EMITTED */
    /* PIPE_DRIVER_QUERY_TYPE_UINT64 */
+   /* PIPE_DRIVER_QUERY_TYPE_BYTES */
+   /* PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */
    /* PIPE_DRIVER_QUERY_TYPE_HZ */
    uint64_t u64;
 
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c b/src/mesa/state_tracker/st_cb_perfmon.c
index 4ec6d86d6ba..dedb8f520f4 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -334,6 +334,9 @@ st_init_perfmon(struct st_context *st)
          c->Name = info.name;
          switch (info.type) {
             case PIPE_DRIVER_QUERY_TYPE_UINT64:
+            case PIPE_DRIVER_QUERY_TYPE_BYTES:
+            case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+            case PIPE_DRIVER_QUERY_TYPE_HZ:
                c->Minimum.u64 = 0;
                c->Maximum.u64 = info.max_value.u64 ? info.max_value.u64 : -1;
                c->Type = GL_UNSIGNED_INT64_AMD;

From 0aea83dc4ad8826648be7b400553083e0aeac004 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Thu, 12 Nov 2015 11:53:22 +0100
Subject: [PATCH 168/335] st/mesa: store mapping from perfmon counter to query
 type

Previously, when a performance monitor was initialized, an inner loop through
all driver queries with string comparisons for each enabled performance
monitor counter was used. This hurts when a driver exposes lots of queries.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tested-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/mesa/state_tracker/st_cb_perfmon.c | 74 +++++++++++---------------
 src/mesa/state_tracker/st_cb_perfmon.h | 14 +++++
 src/mesa/state_tracker/st_context.h    |  3 ++
 3 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c b/src/mesa/state_tracker/st_cb_perfmon.c
index dedb8f520f4..80ff1706966 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -36,48 +36,20 @@
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
 
-/**
- * Return a PIPE_QUERY_x type >= PIPE_QUERY_DRIVER_SPECIFIC, or -1 if
- * the driver-specific query doesn't exist.
- */
-static int
-find_query_type(struct pipe_screen *screen, const char *name)
-{
-   int num_queries;
-   int type = -1;
-   int i;
-
-   num_queries = screen->get_driver_query_info(screen, 0, NULL);
-   if (!num_queries)
-      return type;
-
-   for (i = 0; i < num_queries; i++) {
-      struct pipe_driver_query_info info;
-
-      if (!screen->get_driver_query_info(screen, i, &info))
-         continue;
-
-      if (!strncmp(info.name, name, strlen(name))) {
-         type = info.query_type;
-         break;
-      }
-   }
-   return type;
-}
-
 static bool
 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
+   struct st_context *st = st_context(ctx);
    struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
-   struct pipe_screen *screen = st_context(ctx)->pipe->screen;
-   struct pipe_context *pipe = st_context(ctx)->pipe;
+   struct pipe_context *pipe = st->pipe;
    int gid, cid;
 
-   st_flush_bitmap_cache(st_context(ctx));
+   st_flush_bitmap_cache(st);
 
    /* Create a query for each active counter. */
    for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
       const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
+      const struct st_perf_monitor_group *stg = &st->perfmon[gid];
 
       if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
          /* Maximum number of counters reached. Cannot start the session. */
@@ -90,20 +62,17 @@ init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 
       for (cid = 0; cid < g->NumCounters; cid++) {
          const struct gl_perf_monitor_counter *c = &g->Counters[cid];
+         const struct st_perf_monitor_counter *stc = &stg->counters[cid];
          struct st_perf_counter_object *cntr;
-         int query_type;
 
          if (!BITSET_TEST(m->ActiveCounters[gid], cid))
             continue;
 
-         query_type = find_query_type(screen, c->Name);
-         assert(query_type != -1);
-
          cntr = CALLOC_STRUCT(st_perf_counter_object);
          if (!cntr)
             return false;
 
-         cntr->query    = pipe->create_query(pipe, query_type, 0);
+         cntr->query    = pipe->create_query(pipe, stc->query_type, 0);
          cntr->id       = cid;
          cntr->group_id = gid;
 
@@ -286,6 +255,7 @@ st_init_perfmon(struct st_context *st)
    struct gl_perf_monitor_state *perfmon = &st->ctx->PerfMonitor;
    struct pipe_screen *screen = st->pipe->screen;
    struct gl_perf_monitor_group *groups = NULL;
+   struct st_perf_monitor_group *stgroups = NULL;
    int num_counters, num_groups;
    int gid, cid;
 
@@ -304,26 +274,36 @@ st_init_perfmon(struct st_context *st)
    if (!groups)
       return false;
 
+   stgroups = CALLOC(num_groups, sizeof(*stgroups));
+   if (!stgroups)
+      goto fail_only_groups;
+
    for (gid = 0; gid < num_groups; gid++) {
       struct gl_perf_monitor_group *g = &groups[perfmon->NumGroups];
       struct pipe_driver_query_group_info group_info;
       struct gl_perf_monitor_counter *counters = NULL;
+      struct st_perf_monitor_counter *stcounters = NULL;
 
       if (!screen->get_driver_query_group_info(screen, gid, &group_info))
          continue;
 
       g->Name = group_info.name;
       g->MaxActiveCounters = group_info.max_active_queries;
-      g->NumCounters = 0;
-      g->Counters = NULL;
 
       if (group_info.num_queries)
          counters = CALLOC(group_info.num_queries, sizeof(*counters));
       if (!counters)
          goto fail;
+      g->Counters = counters;
+
+      stcounters = CALLOC(group_info.num_queries, sizeof(*stcounters));
+      if (!stcounters)
+         goto fail;
+      stgroups[perfmon->NumGroups].counters = stcounters;
 
       for (cid = 0; cid < num_counters; cid++) {
          struct gl_perf_monitor_counter *c = &counters[g->NumCounters];
+         struct st_perf_monitor_counter *stc = &stcounters[g->NumCounters];
          struct pipe_driver_query_info info;
 
          if (!screen->get_driver_query_info(screen, cid, &info))
@@ -359,18 +339,25 @@ st_init_perfmon(struct st_context *st)
             default:
                unreachable("Invalid driver query type!");
          }
+
+         stc->query_type = info.query_type;
+
          g->NumCounters++;
       }
-      g->Counters = counters;
       perfmon->NumGroups++;
    }
    perfmon->Groups = groups;
+   st->perfmon = stgroups;
 
    return true;
 
 fail:
-   for (gid = 0; gid < num_groups; gid++)
+   for (gid = 0; gid < num_groups; gid++) {
+      FREE(stgroups[gid].counters);
       FREE((void *)groups[gid].Counters);
+   }
+   FREE(stgroups);
+fail_only_groups:
    FREE(groups);
    return false;
 }
@@ -381,8 +368,11 @@ st_destroy_perfmon(struct st_context *st)
    struct gl_perf_monitor_state *perfmon = &st->ctx->PerfMonitor;
    int gid;
 
-   for (gid = 0; gid < perfmon->NumGroups; gid++)
+   for (gid = 0; gid < perfmon->NumGroups; gid++) {
+      FREE(st->perfmon[gid].counters);
       FREE((void *)perfmon->Groups[gid].Counters);
+   }
+   FREE(st->perfmon);
    FREE((void *)perfmon->Groups);
 }
 
diff --git a/src/mesa/state_tracker/st_cb_perfmon.h b/src/mesa/state_tracker/st_cb_perfmon.h
index 0b195de47fe..9864b0a15d2 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.h
+++ b/src/mesa/state_tracker/st_cb_perfmon.h
@@ -43,6 +43,20 @@ struct st_perf_counter_object
    int group_id;
 };
 
+/**
+ * Extra data per counter, supplementing gl_perf_monitor_counter with
+ * driver-specific information.
+ */
+struct st_perf_monitor_counter
+{
+   unsigned query_type;
+};
+
+struct st_perf_monitor_group
+{
+   struct st_perf_monitor_counter *counters;
+};
+
 /**
  * Cast wrapper
  */
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index c243f5cd966..60a9a4bb0d5 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -46,6 +46,7 @@ struct draw_stage;
 struct gen_mipmap_state;
 struct st_context;
 struct st_fragment_program;
+struct st_perf_monitor_group;
 struct u_upload_mgr;
 
 
@@ -217,6 +218,8 @@ struct st_context
    int32_t read_stamp;
 
    struct st_config_options options;
+
+   struct st_perf_monitor_group *perfmon;
 };
 
 

From afa6121b4ef2d249004cff0bb40237c71c61946d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Thu, 12 Nov 2015 12:02:44 +0100
Subject: [PATCH 169/335] st/mesa: use BITSET_FOREACH_SET to loop through
 active perfmon counters

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tested-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/mesa/state_tracker/st_cb_perfmon.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c b/src/mesa/state_tracker/st_cb_perfmon.c
index 80ff1706966..ec12eb22206 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -50,6 +50,7 @@ init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
    for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
       const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
       const struct st_perf_monitor_group *stg = &st->perfmon[gid];
+      BITSET_WORD tmp;
 
       if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
          /* Maximum number of counters reached. Cannot start the session. */
@@ -60,14 +61,10 @@ init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
          return false;
       }
 
-      for (cid = 0; cid < g->NumCounters; cid++) {
-         const struct gl_perf_monitor_counter *c = &g->Counters[cid];
+      BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
          const struct st_perf_monitor_counter *stc = &stg->counters[cid];
          struct st_perf_counter_object *cntr;
 
-         if (!BITSET_TEST(m->ActiveCounters[gid], cid))
-            continue;
-
          cntr = CALLOC_STRUCT(st_perf_counter_object);
          if (!cntr)
             return false;

From c235300bfc3547d418f9a66555a5ee93a15666f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Thu, 12 Nov 2015 12:30:23 +0100
Subject: [PATCH 170/335] st/mesa: maintain active perfmon counters in an array

It is easy enough to pre-determine the required size, and arrays are
generally better behaved especially when they get large.

v2: make sure init_perf_monitor returns true when no counters are active
(spotted by Samuel Pitoiset)

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tested-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/mesa/state_tracker/st_cb_perfmon.c | 81 ++++++++++++++++----------
 src/mesa/state_tracker/st_cb_perfmon.h | 18 +++---
 2 files changed, 58 insertions(+), 41 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c b/src/mesa/state_tracker/st_cb_perfmon.c
index ec12eb22206..8628e2301ff 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,15 +42,14 @@ init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
    struct st_context *st = st_context(ctx);
    struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
    struct pipe_context *pipe = st->pipe;
+   unsigned num_active_counters = 0;
    int gid, cid;
 
    st_flush_bitmap_cache(st);
 
-   /* Create a query for each active counter. */
+   /* Determine the number of active counters. */
    for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
       const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
-      const struct st_perf_monitor_group *stg = &st->perfmon[gid];
-      BITSET_WORD tmp;
 
       if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
          /* Maximum number of counters reached. Cannot start the session. */
@@ -61,19 +60,32 @@ init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
          return false;
       }
 
+      num_active_counters += m->ActiveGroups[gid];
+   }
+
+   if (!num_active_counters)
+      return true;
+
+   stm->active_counters = CALLOC(num_active_counters,
+                                 sizeof(*stm->active_counters));
+   if (!stm->active_counters)
+      return false;
+
+   /* Create a query for each active counter. */
+   for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
+      const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
+      const struct st_perf_monitor_group *stg = &st->perfmon[gid];
+      BITSET_WORD tmp;
+
       BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
          const struct st_perf_monitor_counter *stc = &stg->counters[cid];
-         struct st_perf_counter_object *cntr;
-
-         cntr = CALLOC_STRUCT(st_perf_counter_object);
-         if (!cntr)
-            return false;
+         struct st_perf_counter_object *cntr =
+            &stm->active_counters[stm->num_active_counters];
 
          cntr->query    = pipe->create_query(pipe, stc->query_type, 0);
          cntr->id       = cid;
          cntr->group_id = gid;
-
-         list_addtail(&cntr->list, &stm->active_counters);
+         ++stm->num_active_counters;
       }
    }
    return true;
@@ -83,24 +95,24 @@ static void
 reset_perf_monitor(struct st_perf_monitor_object *stm,
                    struct pipe_context *pipe)
 {
-   struct st_perf_counter_object *cntr, *tmp;
+   unsigned i;
 
-   LIST_FOR_EACH_ENTRY_SAFE(cntr, tmp, &stm->active_counters, list) {
-      if (cntr->query)
-         pipe->destroy_query(pipe, cntr->query);
-      list_del(&cntr->list);
-      free(cntr);
+   for (i = 0; i < stm->num_active_counters; ++i) {
+      struct pipe_query *query = stm->active_counters[i].query;
+      if (query)
+         pipe->destroy_query(pipe, query);
    }
+   FREE(stm->active_counters);
+   stm->active_counters = NULL;
+   stm->num_active_counters = 0;
 }
 
 static struct gl_perf_monitor_object *
 st_NewPerfMonitor(struct gl_context *ctx)
 {
    struct st_perf_monitor_object *stq = ST_CALLOC_STRUCT(st_perf_monitor_object);
-   if (stq) {
-      list_inithead(&stq->active_counters);
+   if (stq)
       return &stq->base;
-   }
    return NULL;
 }
 
@@ -119,9 +131,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
    struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
    struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
-   if (LIST_IS_EMPTY(&stm->active_counters)) {
+   if (!stm->num_active_counters) {
       /* Create a query for each active counter before starting
        * a new monitoring session. */
       if (!init_perf_monitor(ctx, m))
@@ -129,8 +141,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
    }
 
    /* Start the query for each active counter. */
-   LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) {
-      if (!pipe->begin_query(pipe, cntr->query))
+   for (i = 0; i < stm->num_active_counters; ++i) {
+      struct pipe_query *query = stm->active_counters[i].query;
+      if (!pipe->begin_query(pipe, query))
           goto fail;
    }
    return true;
@@ -146,11 +159,13 @@ st_EndPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
    struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
    struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
    /* Stop the query for each active counter. */
-   LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list)
-      pipe->end_query(pipe, cntr->query);
+   for (i = 0; i < stm->num_active_counters; ++i) {
+      struct pipe_query *query = stm->active_counters[i].query;
+      pipe->end_query(pipe, query);
+   }
 }
 
 static void
@@ -174,16 +189,17 @@ st_IsPerfMonitorResultAvailable(struct gl_context *ctx,
 {
    struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
    struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
-   if (LIST_IS_EMPTY(&stm->active_counters))
+   if (!stm->num_active_counters)
       return false;
 
    /* The result of a monitoring session is only available if the query of
     * each active counter is idle. */
-   LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) {
+   for (i = 0; i < stm->num_active_counters; ++i) {
+      struct pipe_query *query = stm->active_counters[i].query;
       union pipe_query_result result;
-      if (!pipe->get_query_result(pipe, cntr->query, FALSE, &result)) {
+      if (!pipe->get_query_result(pipe, query, FALSE, &result)) {
          /* The query is busy. */
          return false;
       }
@@ -200,7 +216,7 @@ st_GetPerfMonitorResult(struct gl_context *ctx,
 {
    struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
    struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
    /* Copy data to the supplied array (data).
     *
@@ -210,7 +226,8 @@ st_GetPerfMonitorResult(struct gl_context *ctx,
    GLsizei offset = 0;
 
    /* Read query results for each active counter. */
-   LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) {
+   for (i = 0; i < stm->num_active_counters; ++i) {
+      struct st_perf_counter_object *cntr = &stm->active_counters[i];
       union pipe_query_result result = { 0 };
       int gid, cid;
       GLenum type;
diff --git a/src/mesa/state_tracker/st_cb_perfmon.h b/src/mesa/state_tracker/st_cb_perfmon.h
index 9864b0a15d2..79e0421dba2 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.h
+++ b/src/mesa/state_tracker/st_cb_perfmon.h
@@ -26,21 +26,21 @@
 
 #include "util/list.h"
 
+struct st_perf_counter_object
+{
+   struct pipe_query *query;
+   int id;
+   int group_id;
+};
+
 /**
  * Subclass of gl_perf_monitor_object
  */
 struct st_perf_monitor_object
 {
    struct gl_perf_monitor_object base;
-   struct list_head active_counters;
-};
-
-struct st_perf_counter_object
-{
-   struct list_head list;
-   struct pipe_query *query;
-   int id;
-   int group_id;
+   unsigned num_active_counters;
+   struct st_perf_counter_object *active_counters;
 };
 
 /**

From d61d4df02e568d314c4e763ba9b5bdd57aef98c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Tue, 10 Nov 2015 14:06:59 +0100
Subject: [PATCH 171/335] gallium: add the concept of batch queries

Some drivers (in particular radeon[si], but also freedreno judging from
a quick grep) may want to expose performance counters that cannot be
individually enabled or disabled.

Allow such drivers to mark driver-specific queries as requiring a new
type of batch query object that is used to start and stop a list of queries
simultaneously.

v3: adjust recently added nv50 queries

v2: documentation for create_batch_query

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tested-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/gallium/drivers/nouveau/nv50/nv50_query.c |  1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  1 +
 src/gallium/include/pipe/p_context.h          | 19 +++++++++++++
 src/gallium/include/pipe/p_defines.h          | 27 ++++++++++++++-----
 4 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 643d430f1bc..6b3e49a25a9 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -174,6 +174,7 @@ nv50_screen_get_driver_query_info(struct pipe_screen *pscreen,
    info->max_value.u64 = 0;
    info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
    info->group_id = -1;
+   info->flags = 0;
 
    return nv50_hw_get_driver_query_info(screen, id, info);
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index 1f1270e441d..d992b10a23c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -162,6 +162,7 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
    info->max_value.u64 = 0;
    info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
    info->group_id = -1;
+   info->flags = 0;
 
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
    if (id < num_sw_queries)
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 27f358f8fb9..be7447de67d 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -116,6 +116,25 @@ struct pipe_context {
                                        unsigned query_type,
                                        unsigned index );
 
+   /**
+    * Create a query object that queries all given query types simultaneously.
+    *
+    * This can only be used for those query types for which
+    * get_driver_query_info indicates that it must be used. Only one batch
+    * query object may be active at a time.
+    *
+    * There may be additional constraints on which query types can be used
+    * together, in particular those that are implied by
+    * get_driver_query_group_info.
+    *
+    * \param num_queries the number of query types
+    * \param query_types array of \p num_queries query types
+    * \return a query object, or NULL on error.
+    */
+   struct pipe_query *(*create_batch_query)( struct pipe_context *pipe,
+                                             unsigned num_queries,
+                                             unsigned *query_types );
+
    void (*destroy_query)(struct pipe_context *pipe,
                          struct pipe_query *q);
 
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 7ed9f6df3f7..b3c8b9f7360 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -775,6 +775,16 @@ struct pipe_query_data_pipeline_statistics
    uint64_t cs_invocations; /**< Num compute shader invocations. */
 };
 
+/**
+ * For batch queries.
+ */
+union pipe_numeric_type_union
+{
+   uint64_t u64;
+   uint32_t u32;
+   float f;
+};
+
 /**
  * Query result (returned by pipe_context::get_query_result).
  */
@@ -811,6 +821,9 @@ union pipe_query_result
 
    /* PIPE_QUERY_PIPELINE_STATISTICS */
    struct pipe_query_data_pipeline_statistics pipeline_statistics;
+
+   /* batch queries */
+   union pipe_numeric_type_union batch[0];
 };
 
 union pipe_color_union
@@ -840,12 +853,13 @@ enum pipe_driver_query_result_type
    PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE = 1,
 };
 
-union pipe_numeric_type_union
-{
-   uint64_t u64;
-   uint32_t u32;
-   float f;
-};
+/**
+ * Some hardware requires some hardware-specific queries to be submitted
+ * as batched queries. The corresponding query objects are created using
+ * create_batch_query, and at most one such query may be active at
+ * any time.
+ */
+#define PIPE_DRIVER_QUERY_FLAG_BATCH     (1 << 0)
 
 struct pipe_driver_query_info
 {
@@ -855,6 +869,7 @@ struct pipe_driver_query_info
    enum pipe_driver_query_type type;
    enum pipe_driver_query_result_type result_type;
    unsigned group_id;
+   unsigned flags;
 };
 
 struct pipe_driver_query_group_info

From 424a614ff1105dcb5195178cb8f04ac46b8c0d8a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Tue, 10 Nov 2015 17:04:32 +0100
Subject: [PATCH 172/335] gallium/hud: add support for batch queries

v2 + v3: be more defensive about allocations

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tested-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/gallium/auxiliary/hud/hud_context.c      |  24 +-
 src/gallium/auxiliary/hud/hud_driver_query.c | 270 ++++++++++++++++---
 src/gallium/auxiliary/hud/hud_private.h      |  13 +-
 3 files changed, 261 insertions(+), 46 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c
index 24a68dd2574..efceb85e38d 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -60,6 +60,7 @@ struct hud_context {
    struct cso_context *cso;
    struct u_upload_mgr *uploader;
 
+   struct hud_batch_query_context *batch_query;
    struct list_head pane_list;
 
    /* states */
@@ -523,6 +524,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
    hud_alloc_vertices(hud, &hud->text, 4 * 512, 4 * sizeof(float));
 
    /* prepare all graphs */
+   hud_batch_query_update(hud->batch_query);
+
    LIST_FOR_EACH_ENTRY(pane, &hud->pane_list, head) {
       LIST_FOR_EACH_ENTRY(gr, &pane->graph_list, head) {
          gr->query_new_value(gr);
@@ -916,17 +919,21 @@ hud_parse_env_var(struct hud_context *hud, const char *env)
       }
       else if (strcmp(name, "samples-passed") == 0 &&
                has_occlusion_query(hud->pipe->screen)) {
-         hud_pipe_query_install(pane, hud->pipe, "samples-passed",
+         hud_pipe_query_install(&hud->batch_query, pane, hud->pipe,
+                                "samples-passed",
                                 PIPE_QUERY_OCCLUSION_COUNTER, 0, 0,
                                 PIPE_DRIVER_QUERY_TYPE_UINT64,
-                                PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+                                PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+                                0);
       }
       else if (strcmp(name, "primitives-generated") == 0 &&
                has_streamout(hud->pipe->screen)) {
-         hud_pipe_query_install(pane, hud->pipe, "primitives-generated",
+         hud_pipe_query_install(&hud->batch_query, pane, hud->pipe,
+                                "primitives-generated",
                                 PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0,
                                 PIPE_DRIVER_QUERY_TYPE_UINT64,
-                                PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+                                PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+                                0);
       }
       else {
          boolean processed = FALSE;
@@ -951,17 +958,19 @@ hud_parse_env_var(struct hud_context *hud, const char *env)
                if (strcmp(name, pipeline_statistics_names[i]) == 0)
                   break;
             if (i < Elements(pipeline_statistics_names)) {
-               hud_pipe_query_install(pane, hud->pipe, name,
+               hud_pipe_query_install(&hud->batch_query, pane, hud->pipe, name,
                                       PIPE_QUERY_PIPELINE_STATISTICS, i,
                                       0, PIPE_DRIVER_QUERY_TYPE_UINT64,
-                                      PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+                                      PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+                                      0);
                processed = TRUE;
             }
          }
 
          /* driver queries */
          if (!processed) {
-            if (!hud_driver_query_install(pane, hud->pipe, name)){
+            if (!hud_driver_query_install(&hud->batch_query, pane, hud->pipe,
+                                          name)) {
                fprintf(stderr, "gallium_hud: unknown driver query '%s'\n", name);
             }
          }
@@ -1322,6 +1331,7 @@ hud_destroy(struct hud_context *hud)
       FREE(pane);
    }
 
+   hud_batch_query_cleanup(&hud->batch_query);
    pipe->delete_fs_state(pipe, hud->fs_color);
    pipe->delete_fs_state(pipe, hud->fs_text);
    pipe->delete_vs_state(pipe, hud->vs);
diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c b/src/gallium/auxiliary/hud/hud_driver_query.c
index 3198ab31140..d7b1f11ed56 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -34,13 +34,164 @@
 #include "hud/hud_private.h"
 #include "pipe/p_screen.h"
 #include "os/os_time.h"
+#include "util/u_math.h"
 #include "util/u_memory.h"
 #include <stdio.h>
 
+// Must be a power of two
 #define NUM_QUERIES 8
 
+struct hud_batch_query_context {
+   struct pipe_context *pipe;
+   unsigned num_query_types;
+   unsigned allocated_query_types;
+   unsigned *query_types;
+
+   boolean failed;
+   struct pipe_query *query[NUM_QUERIES];
+   union pipe_query_result *result[NUM_QUERIES];
+   unsigned head, pending, results;
+};
+
+void
+hud_batch_query_update(struct hud_batch_query_context *bq)
+{
+   struct pipe_context *pipe;
+
+   if (!bq || bq->failed)
+      return;
+
+   pipe = bq->pipe;
+
+   if (bq->query[bq->head])
+      pipe->end_query(pipe, bq->query[bq->head]);
+
+   bq->results = 0;
+
+   while (bq->pending) {
+      unsigned idx = (bq->head - bq->pending + 1) % NUM_QUERIES;
+      struct pipe_query *query = bq->query[idx];
+
+      if (!bq->result[idx])
+         bq->result[idx] = MALLOC(sizeof(bq->result[idx]->batch[0]) *
+                                  bq->num_query_types);
+      if (!bq->result[idx]) {
+         fprintf(stderr, "gallium_hud: out of memory.\n");
+         bq->failed = TRUE;
+         return;
+      }
+
+      if (!pipe->get_query_result(pipe, query, FALSE, bq->result[idx]))
+         break;
+
+      ++bq->results;
+      --bq->pending;
+   }
+
+   bq->head = (bq->head + 1) % NUM_QUERIES;
+
+   if (bq->pending == NUM_QUERIES) {
+      fprintf(stderr,
+              "gallium_hud: all queries busy after %i frames, dropping data.\n",
+              NUM_QUERIES);
+
+      assert(bq->query[bq->head]);
+
+      pipe->destroy_query(bq->pipe, bq->query[bq->head]);
+      bq->query[bq->head] = NULL;
+   }
+
+   ++bq->pending;
+
+   if (!bq->query[bq->head]) {
+      bq->query[bq->head] = pipe->create_batch_query(pipe,
+                                                     bq->num_query_types,
+                                                     bq->query_types);
+
+      if (!bq->query[bq->head]) {
+         fprintf(stderr,
+                 "gallium_hud: create_batch_query failed. You may have "
+                 "selected too many or incompatible queries.\n");
+         bq->failed = TRUE;
+         return;
+      }
+   }
+
+   if (!pipe->begin_query(pipe, bq->query[bq->head])) {
+      fprintf(stderr,
+              "gallium_hud: could not begin batch query. You may have "
+              "selected too many or incompatible queries.\n");
+      bq->failed = TRUE;
+   }
+}
+
+static boolean
+batch_query_add(struct hud_batch_query_context **pbq,
+                struct pipe_context *pipe, unsigned query_type,
+                unsigned *result_index)
+{
+   struct hud_batch_query_context *bq = *pbq;
+   unsigned i;
+
+   if (!bq) {
+      bq = CALLOC_STRUCT(hud_batch_query_context);
+      if (!bq)
+         return false;
+      bq->pipe = pipe;
+      *pbq = bq;
+   }
+
+   for (i = 0; i < bq->num_query_types; ++i) {
+      if (bq->query_types[i] == query_type) {
+         *result_index = i;
+         return true;
+      }
+   }
+
+   if (bq->num_query_types == bq->allocated_query_types) {
+      unsigned new_alloc = MAX2(16, bq->allocated_query_types * 2);
+      unsigned *new_query_types
+         = REALLOC(bq->query_types,
+                   bq->allocated_query_types * sizeof(unsigned),
+                   new_alloc * sizeof(unsigned));
+      if (!new_query_types)
+         return false;
+      bq->query_types = new_query_types;
+      bq->allocated_query_types = new_alloc;
+   }
+
+   bq->query_types[bq->num_query_types] = query_type;
+   *result_index = bq->num_query_types++;
+   return true;
+}
+
+void
+hud_batch_query_cleanup(struct hud_batch_query_context **pbq)
+{
+   struct hud_batch_query_context *bq = *pbq;
+   unsigned idx;
+
+   if (!bq)
+      return;
+
+   *pbq = NULL;
+
+   if (bq->query[bq->head] && !bq->failed)
+      bq->pipe->end_query(bq->pipe, bq->query[bq->head]);
+
+   for (idx = 0; idx < NUM_QUERIES; ++idx) {
+      if (bq->query[idx])
+         bq->pipe->destroy_query(bq->pipe, bq->query[idx]);
+      FREE(bq->result[idx]);
+   }
+
+   FREE(bq->query_types);
+   FREE(bq);
+}
+
 struct query_info {
    struct pipe_context *pipe;
+   struct hud_batch_query_context *batch;
    unsigned query_type;
    unsigned result_index; /* unit depends on query_type */
    enum pipe_driver_query_result_type result_type;
@@ -55,11 +206,26 @@ struct query_info {
 };
 
 static void
-query_new_value(struct hud_graph *gr)
+query_new_value_batch(struct query_info *info)
+{
+   struct hud_batch_query_context *bq = info->batch;
+   unsigned result_index = info->result_index;
+   unsigned idx = (bq->head - bq->pending) % NUM_QUERIES;
+   unsigned results = bq->results;
+
+   while (results) {
+      info->results_cumulative += bq->result[idx]->batch[result_index].u64;
+      ++info->num_results;
+
+      --results;
+      idx = (idx - 1) % NUM_QUERIES;
+   }
+}
+
+static void
+query_new_value_normal(struct query_info *info)
 {
-   struct query_info *info = gr->query_data;
    struct pipe_context *pipe = info->pipe;
-   uint64_t now = os_time_get();
 
    if (info->last_time) {
       if (info->query[info->head])
@@ -106,30 +272,9 @@ query_new_value(struct hud_graph *gr)
             break;
          }
       }
-
-      if (info->num_results && info->last_time + gr->pane->period <= now) {
-         uint64_t value;
-
-         switch (info->result_type) {
-         default:
-         case PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE:
-            value = info->results_cumulative / info->num_results;
-            break;
-         case PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE:
-            value = info->results_cumulative;
-            break;
-         }
-
-         hud_graph_add_value(gr, value);
-
-         info->last_time = now;
-         info->results_cumulative = 0;
-         info->num_results = 0;
-      }
    }
    else {
       /* initialize */
-      info->last_time = now;
       info->query[info->head] = pipe->create_query(pipe, info->query_type, 0);
    }
 
@@ -137,12 +282,50 @@ query_new_value(struct hud_graph *gr)
       pipe->begin_query(pipe, info->query[info->head]);
 }
 
+static void
+query_new_value(struct hud_graph *gr)
+{
+   struct query_info *info = gr->query_data;
+   uint64_t now = os_time_get();
+
+   if (info->batch) {
+      query_new_value_batch(info);
+   } else {
+      query_new_value_normal(info);
+   }
+
+   if (!info->last_time) {
+      info->last_time = now;
+      return;
+   }
+
+   if (info->num_results && info->last_time + gr->pane->period <= now) {
+      uint64_t value;
+
+      switch (info->result_type) {
+      default:
+      case PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE:
+         value = info->results_cumulative / info->num_results;
+         break;
+      case PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE:
+         value = info->results_cumulative;
+         break;
+      }
+
+      hud_graph_add_value(gr, value);
+
+      info->last_time = now;
+      info->results_cumulative = 0;
+      info->num_results = 0;
+   }
+}
+
 static void
 free_query_info(void *ptr)
 {
    struct query_info *info = ptr;
 
-   if (info->last_time) {
+   if (!info->batch && info->last_time) {
       struct pipe_context *pipe = info->pipe;
       int i;
 
@@ -158,11 +341,13 @@ free_query_info(void *ptr)
 }
 
 void
-hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
+hud_pipe_query_install(struct hud_batch_query_context **pbq,
+                       struct hud_pane *pane, struct pipe_context *pipe,
                        const char *name, unsigned query_type,
                        unsigned result_index,
                        uint64_t max_value, enum pipe_driver_query_type type,
-                       enum pipe_driver_query_result_type result_type)
+                       enum pipe_driver_query_result_type result_type,
+                       unsigned flags)
 {
    struct hud_graph *gr;
    struct query_info *info;
@@ -174,28 +359,40 @@ hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
    strncpy(gr->name, name, sizeof(gr->name));
    gr->name[sizeof(gr->name) - 1] = '\0';
    gr->query_data = CALLOC_STRUCT(query_info);
-   if (!gr->query_data) {
-      FREE(gr);
-      return;
-   }
+   if (!gr->query_data)
+      goto fail_gr;
 
    gr->query_new_value = query_new_value;
    gr->free_query_data = free_query_info;
 
    info = gr->query_data;
    info->pipe = pipe;
-   info->query_type = query_type;
-   info->result_index = result_index;
    info->result_type = result_type;
 
+   if (flags & PIPE_DRIVER_QUERY_FLAG_BATCH) {
+      if (!batch_query_add(pbq, pipe, query_type, &info->result_index))
+         goto fail_info;
+      info->batch = *pbq;
+   } else {
+      info->query_type = query_type;
+      info->result_index = result_index;
+   }
+
    hud_pane_add_graph(pane, gr);
    if (pane->max_value < max_value)
       hud_pane_set_max_value(pane, max_value);
    pane->type = type;
+   return;
+
+fail_info:
+   FREE(info);
+fail_gr:
+   FREE(gr);
 }
 
 boolean
-hud_driver_query_install(struct hud_pane *pane, struct pipe_context *pipe,
+hud_driver_query_install(struct hud_batch_query_context **pbq,
+                         struct hud_pane *pane, struct pipe_context *pipe,
                          const char *name)
 {
    struct pipe_screen *screen = pipe->screen;
@@ -219,8 +416,9 @@ hud_driver_query_install(struct hud_pane *pane, struct pipe_context *pipe,
    if (!found)
       return FALSE;
 
-   hud_pipe_query_install(pane, pipe, query.name, query.query_type, 0,
-                          query.max_value.u64, query.type, query.result_type);
+   hud_pipe_query_install(pbq, pane, pipe, query.name, query.query_type, 0,
+                          query.max_value.u64, query.type, query.result_type,
+                          query.flags);
 
    return TRUE;
 }
diff --git a/src/gallium/auxiliary/hud/hud_private.h b/src/gallium/auxiliary/hud/hud_private.h
index 01caf7b8b2c..4a788bba456 100644
--- a/src/gallium/auxiliary/hud/hud_private.h
+++ b/src/gallium/auxiliary/hud/hud_private.h
@@ -80,19 +80,26 @@ void hud_pane_set_max_value(struct hud_pane *pane, uint64_t value);
 void hud_graph_add_value(struct hud_graph *gr, uint64_t value);
 
 /* graphs/queries */
+struct hud_batch_query_context;
+
 #define ALL_CPUS ~0 /* optionally set as cpu_index */
 
 int hud_get_num_cpus(void);
 
 void hud_fps_graph_install(struct hud_pane *pane);
 void hud_cpu_graph_install(struct hud_pane *pane, unsigned cpu_index);
-void hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
+void hud_pipe_query_install(struct hud_batch_query_context **pbq,
+                            struct hud_pane *pane, struct pipe_context *pipe,
                             const char *name, unsigned query_type,
                             unsigned result_index,
                             uint64_t max_value,
                             enum pipe_driver_query_type type,
-                            enum pipe_driver_query_result_type result_type);
-boolean hud_driver_query_install(struct hud_pane *pane,
+                            enum pipe_driver_query_result_type result_type,
+                            unsigned flags);
+boolean hud_driver_query_install(struct hud_batch_query_context **pbq,
+                                 struct hud_pane *pane,
                                  struct pipe_context *pipe, const char *name);
+void hud_batch_query_update(struct hud_batch_query_context *bq);
+void hud_batch_query_cleanup(struct hud_batch_query_context **pbq);
 
 #endif

From 6a14a39fab805b1471101afc6d6d1d843b5f16ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Thu, 12 Nov 2015 15:09:21 +0100
Subject: [PATCH 173/335] st/mesa: add support for batch driver queries to
 perfmon

v2 + v3: forgot null-pointer checks (spotted by Samuel Pitoiset)

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tested-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 src/mesa/state_tracker/st_cb_perfmon.c | 83 +++++++++++++++++++++++---
 src/mesa/state_tracker/st_cb_perfmon.h |  6 ++
 2 files changed, 82 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c b/src/mesa/state_tracker/st_cb_perfmon.c
index 8628e2301ff..8fdf0e8497f 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,7 +42,10 @@ init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
    struct st_context *st = st_context(ctx);
    struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
    struct pipe_context *pipe = st->pipe;
+   unsigned *batch = NULL;
    unsigned num_active_counters = 0;
+   unsigned max_batch_counters = 0;
+   unsigned num_batch_counters = 0;
    int gid, cid;
 
    st_flush_bitmap_cache(st);
@@ -50,6 +53,7 @@ init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
    /* Determine the number of active counters. */
    for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
       const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
+      const struct st_perf_monitor_group *stg = &st->perfmon[gid];
 
       if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
          /* Maximum number of counters reached. Cannot start the session. */
@@ -61,6 +65,8 @@ init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
       }
 
       num_active_counters += m->ActiveGroups[gid];
+      if (stg->has_batch)
+         max_batch_counters += m->ActiveGroups[gid];
    }
 
    if (!num_active_counters)
@@ -71,6 +77,12 @@ init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
    if (!stm->active_counters)
       return false;
 
+   if (max_batch_counters) {
+      batch = CALLOC(max_batch_counters, sizeof(*batch));
+      if (!batch)
+         return false;
+   }
+
    /* Create a query for each active counter. */
    for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
       const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
@@ -82,13 +94,35 @@ init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
          struct st_perf_counter_object *cntr =
             &stm->active_counters[stm->num_active_counters];
 
-         cntr->query    = pipe->create_query(pipe, stc->query_type, 0);
          cntr->id       = cid;
          cntr->group_id = gid;
+         if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH) {
+            cntr->batch_index = num_batch_counters;
+            batch[num_batch_counters++] = stc->query_type;
+         } else {
+            cntr->query = pipe->create_query(pipe, stc->query_type, 0);
+            if (!cntr->query)
+               goto fail;
+         }
          ++stm->num_active_counters;
       }
    }
+
+   /* Create the batch query. */
+   if (num_batch_counters) {
+      stm->batch_query = pipe->create_batch_query(pipe, num_batch_counters,
+                                                  batch);
+      stm->batch_result = CALLOC(num_batch_counters, sizeof(stm->batch_result->batch[0]));
+      if (!stm->batch_query || !stm->batch_result)
+         goto fail;
+   }
+
+   FREE(batch);
    return true;
+
+fail:
+   FREE(batch);
+   return false;
 }
 
 static void
@@ -105,6 +139,13 @@ reset_perf_monitor(struct st_perf_monitor_object *stm,
    FREE(stm->active_counters);
    stm->active_counters = NULL;
    stm->num_active_counters = 0;
+
+   if (stm->batch_query) {
+      pipe->destroy_query(pipe, stm->batch_query);
+      stm->batch_query = NULL;
+   }
+   FREE(stm->batch_result);
+   stm->batch_result = NULL;
 }
 
 static struct gl_perf_monitor_object *
@@ -143,9 +184,13 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
    /* Start the query for each active counter. */
    for (i = 0; i < stm->num_active_counters; ++i) {
       struct pipe_query *query = stm->active_counters[i].query;
-      if (!pipe->begin_query(pipe, query))
+      if (query && !pipe->begin_query(pipe, query))
           goto fail;
    }
+
+   if (stm->batch_query && !pipe->begin_query(pipe, stm->batch_query))
+      goto fail;
+
    return true;
 
 fail:
@@ -164,8 +209,12 @@ st_EndPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
    /* Stop the query for each active counter. */
    for (i = 0; i < stm->num_active_counters; ++i) {
       struct pipe_query *query = stm->active_counters[i].query;
-      pipe->end_query(pipe, query);
+      if (query)
+         pipe->end_query(pipe, query);
    }
+
+   if (stm->batch_query)
+      pipe->end_query(pipe, stm->batch_query);
 }
 
 static void
@@ -199,11 +248,16 @@ st_IsPerfMonitorResultAvailable(struct gl_context *ctx,
    for (i = 0; i < stm->num_active_counters; ++i) {
       struct pipe_query *query = stm->active_counters[i].query;
       union pipe_query_result result;
-      if (!pipe->get_query_result(pipe, query, FALSE, &result)) {
+      if (query && !pipe->get_query_result(pipe, query, FALSE, &result)) {
          /* The query is busy. */
          return false;
       }
    }
+
+   if (stm->batch_query &&
+       !pipe->get_query_result(pipe, stm->batch_query, FALSE, stm->batch_result))
+      return false;
+
    return true;
 }
 
@@ -224,6 +278,11 @@ st_GetPerfMonitorResult(struct gl_context *ctx,
     * active counter. The API allows counters to appear in any order.
     */
    GLsizei offset = 0;
+   bool have_batch_query = false;
+
+   if (stm->batch_query)
+      have_batch_query = pipe->get_query_result(pipe, stm->batch_query, TRUE,
+                                                stm->batch_result);
 
    /* Read query results for each active counter. */
    for (i = 0; i < stm->num_active_counters; ++i) {
@@ -236,8 +295,14 @@ st_GetPerfMonitorResult(struct gl_context *ctx,
       gid  = cntr->group_id;
       type = ctx->PerfMonitor.Groups[gid].Counters[cid].Type;
 
-      if (!pipe->get_query_result(pipe, cntr->query, TRUE, &result))
-         continue;
+      if (cntr->query) {
+         if (!pipe->get_query_result(pipe, cntr->query, TRUE, &result))
+            continue;
+      } else {
+         if (!have_batch_query)
+            continue;
+         result.batch[0] = stm->batch_result->batch[cntr->batch_index];
+      }
 
       data[offset++] = gid;
       data[offset++] = cid;
@@ -294,6 +359,7 @@ st_init_perfmon(struct st_context *st)
 
    for (gid = 0; gid < num_groups; gid++) {
       struct gl_perf_monitor_group *g = &groups[perfmon->NumGroups];
+      struct st_perf_monitor_group *stg = &stgroups[perfmon->NumGroups];
       struct pipe_driver_query_group_info group_info;
       struct gl_perf_monitor_counter *counters = NULL;
       struct st_perf_monitor_counter *stcounters = NULL;
@@ -313,7 +379,7 @@ st_init_perfmon(struct st_context *st)
       stcounters = CALLOC(group_info.num_queries, sizeof(*stcounters));
       if (!stcounters)
          goto fail;
-      stgroups[perfmon->NumGroups].counters = stcounters;
+      stg->counters = stcounters;
 
       for (cid = 0; cid < num_counters; cid++) {
          struct gl_perf_monitor_counter *c = &counters[g->NumCounters];
@@ -355,6 +421,9 @@ st_init_perfmon(struct st_context *st)
          }
 
          stc->query_type = info.query_type;
+         stc->flags = info.flags;
+         if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH)
+            stg->has_batch = true;
 
          g->NumCounters++;
       }
diff --git a/src/mesa/state_tracker/st_cb_perfmon.h b/src/mesa/state_tracker/st_cb_perfmon.h
index 79e0421dba2..29732866bf8 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.h
+++ b/src/mesa/state_tracker/st_cb_perfmon.h
@@ -31,6 +31,7 @@ struct st_perf_counter_object
    struct pipe_query *query;
    int id;
    int group_id;
+   unsigned batch_index;
 };
 
 /**
@@ -41,6 +42,9 @@ struct st_perf_monitor_object
    struct gl_perf_monitor_object base;
    unsigned num_active_counters;
    struct st_perf_counter_object *active_counters;
+
+   struct pipe_query *batch_query;
+   union pipe_query_result *batch_result;
 };
 
 /**
@@ -50,11 +54,13 @@ struct st_perf_monitor_object
 struct st_perf_monitor_counter
 {
    unsigned query_type;
+   unsigned flags;
 };
 
 struct st_perf_monitor_group
 {
    struct st_perf_monitor_counter *counters;
+   bool has_batch;
 };
 
 /**

From 8a125afa6e88a3eeddba8c7fdc1a75c9b99d5489 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Wed, 18 Nov 2015 18:40:22 +0100
Subject: [PATCH 174/335] radeon: ensure that timing/profiling queries are
 suspended on flush
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The queries_suspended_for_flush flag is redundant because suspended queries
are not removed from their respective linked list.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 13 ++++++-------
 src/gallium/drivers/radeon/r600_pipe_common.h |  2 --
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 9cb30c753cc..7464f677398 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -27,6 +27,7 @@
 #include "r600_pipe_common.h"
 #include "r600_cs.h"
 #include "tgsi/tgsi_parse.h"
+#include "util/list.h"
 #include "util/u_draw_quad.h"
 #include "util/u_memory.h"
 #include "util/u_format_s3tc.h"
@@ -135,12 +136,10 @@ static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
 void r600_preflush_suspend_features(struct r600_common_context *ctx)
 {
 	/* suspend queries */
-	ctx->queries_suspended_for_flush = false;
-	if (ctx->num_cs_dw_nontimer_queries_suspend) {
+	if (!LIST_IS_EMPTY(&ctx->active_nontimer_queries))
 		r600_suspend_nontimer_queries(ctx);
+	if (!LIST_IS_EMPTY(&ctx->active_timer_queries))
 		r600_suspend_timer_queries(ctx);
-		ctx->queries_suspended_for_flush = true;
-	}
 
 	ctx->streamout.suspended = false;
 	if (ctx->streamout.begin_emitted) {
@@ -157,10 +156,10 @@ void r600_postflush_resume_features(struct r600_common_context *ctx)
 	}
 
 	/* resume queries */
-	if (ctx->queries_suspended_for_flush) {
-		r600_resume_nontimer_queries(ctx);
+	if (!LIST_IS_EMPTY(&ctx->active_timer_queries))
 		r600_resume_timer_queries(ctx);
-	}
+	if (!LIST_IS_EMPTY(&ctx->active_nontimer_queries))
+		r600_resume_nontimer_queries(ctx);
 }
 
 static void r600_flush_from_st(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index aa047119cb7..fbdc5c410ae 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -393,8 +393,6 @@ struct r600_common_context {
 	struct list_head		active_timer_queries;
 	unsigned			num_cs_dw_nontimer_queries_suspend;
 	unsigned			num_cs_dw_timer_queries_suspend;
-	/* If queries have been suspended. */
-	bool				queries_suspended_for_flush;
 	/* Additional hardware info. */
 	unsigned			backend_mask;
 	unsigned			max_db; /* for OQ */

From 108013b8e5c593f9039335010672466ef6ac6010 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 19 Nov 2015 12:41:28 -0700
Subject: [PATCH 175/335] mesa: whitespaces fixes in
 _mesa_one_time_init_extension_overrides()

Trivial.
---
 src/mesa/main/extensions.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index e94d2b74749..01cfdf1a4ec 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -300,12 +300,13 @@ _mesa_one_time_init_extension_overrides(void)
    /* Copy env_const because strtok() is destructive. */
    env = strdup(env_const);
 
-   if (env == NULL || extra_extensions == NULL ||
-           cant_disable_extensions == NULL) {
-       free(env);
-       free(extra_extensions);
-       free(cant_disable_extensions);
-       return;
+   if (env == NULL ||
+       extra_extensions == NULL ||
+       cant_disable_extensions == NULL) {
+      free(env);
+      free(extra_extensions);
+      free(cant_disable_extensions);
+      return;
    }
 
    for (ext = strtok(env, " "); ext != NULL; ext = strtok(NULL, " ")) {

From 0743e14aeef8f91bdd1fbd05f615458cf2b01d41 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 20 Nov 2015 09:34:11 -0700
Subject: [PATCH 176/335] mesa: remove unused var in _mesa_PushDebugGroup()

Trivial.
---
 src/mesa/main/errors.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/main/errors.c b/src/mesa/main/errors.c
index fe628c39ac5..366b119aba3 100644
--- a/src/mesa/main/errors.c
+++ b/src/mesa/main/errors.c
@@ -1142,7 +1142,6 @@ _mesa_PushDebugGroup(GLenum source, GLuint id, GLsizei length,
    const char *callerstr;
    struct gl_debug_state *debug;
    struct gl_debug_message *emptySlot;
-   GLuint ret;
 
    if (_mesa_is_desktop_gl(ctx))
       callerstr = "glPushDebugGroup";

From c45b4257c26b93043508e55c6a1aeb3a8b14eee9 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 20 Nov 2015 15:15:18 +0000
Subject: [PATCH 177/335] automake: use static llvm for make distcheck

With llvm 3.7 semi-dropping the autoconf build, we rely on their cmake
build. With the latter of which annoyingly using another (busted?)
SONAME.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 Makefile.am | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile.am b/Makefile.am
index 149610c7c69..a9ed31ee123 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -32,6 +32,7 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \
 	--enable-vdpau \
 	--enable-xa \
 	--enable-xvmc \
+	--disable-llvm-shared-libs \
 	--with-egl-platforms=x11,wayland,drm \
 	--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \
 	--with-gallium-drivers=i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast

From ae6d6941f6656494adafb025475f83e02c6dd684 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 20 Nov 2015 14:59:27 +0000
Subject: [PATCH 178/335] glsl: move builtin_type_macros.h into the correct
 list

Commit b9b40ef9b76 moved the file, but forgot to update the reference in
the makefile. Thus the out of tree build was busted :\

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/glsl/Makefile.sources | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index d9db5f61e04..133d06afa3b 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -22,6 +22,7 @@ NIR_FILES = \
 	nir/glsl_to_nir.h \
 	nir/glsl_types.cpp \
 	nir/glsl_types.h \
+	nir/builtin_type_macros.h \
 	nir/nir.c \
 	nir/nir.h \
 	nir/nir_array.h \
@@ -99,7 +100,6 @@ LIBGLSL_FILES = \
 	blob.c \
 	blob.h \
 	builtin_functions.cpp \
-	builtin_type_macros.h \
 	builtin_types.cpp \
 	builtin_variables.cpp \
 	glsl_parser_extras.cpp \

From 8fdb54879963892cd9c3d0ca09172f2b3533043f Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 20 Nov 2015 15:12:56 +0000
Subject: [PATCH 179/335] egl: don't forget to ship platform_x11_dri3.h into
 the tarball

Should have been a part of f35198badeb

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/egl/Makefile.am | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/egl/Makefile.am b/src/egl/Makefile.am
index 0b463c8deb0..6953d44e607 100644
--- a/src/egl/Makefile.am
+++ b/src/egl/Makefile.am
@@ -57,7 +57,9 @@ dri2_backend_FILES += drivers/dri2/platform_x11.c
 
 if HAVE_DRI3
 dri3_backend_FILES += \
-	drivers/dri2/platform_x11_dri3.c
+	drivers/dri2/platform_x11_dri3.c \
+	drivers/dri2/platform_x11_dri3.h
+
 libEGL_la_LIBADD += $(top_builddir)/src/loader/libloader_dri3_helper.la
 endif
 endif

From 99d92de5d0af8b926db20d4b2aecbe37b58c758c Mon Sep 17 00:00:00 2001
From: Leo Liu <leo.liu@amd.com>
Date: Tue, 3 Nov 2015 13:33:03 -0500
Subject: [PATCH 180/335] radeon/vce: add new firmware interface support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add new interface to create and encode

Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeon/Makefile.sources |   1 +
 src/gallium/drivers/radeon/radeon_vce.c     |  21 +-
 src/gallium/drivers/radeon/radeon_vce.h     |   3 +
 src/gallium/drivers/radeon/radeon_vce_52.c  | 242 ++++++++++++++++++++
 4 files changed, 262 insertions(+), 5 deletions(-)
 create mode 100644 src/gallium/drivers/radeon/radeon_vce_52.c

diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources
index d840ff8ca54..1dbad2f39e3 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -14,6 +14,7 @@ C_SOURCES := \
 	radeon_uvd.h \
 	radeon_vce_40_2_2.c \
 	radeon_vce_50.c \
+	radeon_vce_52.c \
 	radeon_vce.c \
 	radeon_vce.h \
 	radeon_video.c \
diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
index 0dac6fbbdce..b2b084e0f2e 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -49,6 +49,7 @@
 #define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8))
 #define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8))
 #define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8))
+#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8))
 
 /**
  * flush commands to the hardware
@@ -478,6 +479,10 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
 		radeon_vce_50_init(enc);
 		break;
 
+	case FW_52_0_3:
+		radeon_vce_52_init(enc);
+		break;
+
 	default:
 		goto error;
 	}
@@ -500,11 +505,17 @@ error:
  */
 bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
 {
-	return rscreen->info.vce_fw_version == FW_40_2_2 ||
-		rscreen->info.vce_fw_version == FW_50_0_1 ||
-		rscreen->info.vce_fw_version == FW_50_1_2 ||
-		rscreen->info.vce_fw_version == FW_50_10_2 ||
-		rscreen->info.vce_fw_version == FW_50_17_3;
+	switch (rscreen->info.vce_fw_version) {
+	case FW_40_2_2:
+	case FW_50_0_1:
+	case FW_50_1_2:
+	case FW_50_10_2:
+	case FW_50_17_3:
+	case FW_52_0_3:
+		return true;
+	default:
+		return false;
+	}
 }
 
 /**
diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h
index 624bda479f8..25e2133521f 100644
--- a/src/gallium/drivers/radeon/radeon_vce.h
+++ b/src/gallium/drivers/radeon/radeon_vce.h
@@ -140,4 +140,7 @@ void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
 /* init vce fw 50 specific callbacks */
 void radeon_vce_50_init(struct rvce_encoder *enc);
 
+/* init vce fw 52 specific callbacks */
+void radeon_vce_52_init(struct rvce_encoder *enc);
+
 #endif
diff --git a/src/gallium/drivers/radeon/radeon_vce_52.c b/src/gallium/drivers/radeon/radeon_vce_52.c
new file mode 100644
index 00000000000..fbae1f97f41
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_vce_52.c
@@ -0,0 +1,242 @@
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "r600_pipe_common.h"
+#include "radeon_video.h"
+#include "radeon_vce.h"
+
+static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
+
+static void create(struct rvce_encoder *enc)
+{
+	enc->task_info(enc, 0x00000000, 0, 0, 0);
+
+	RVCE_BEGIN(0x01000001); // create cmd
+	RVCE_CS(0x00000000); // encUseCircularBuffer
+	RVCE_CS(profiles[enc->base.profile -
+		PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE]); // encProfile
+	RVCE_CS(enc->base.level); // encLevel
+	RVCE_CS(0x00000000); // encPicStructRestriction
+	RVCE_CS(enc->base.width); // encImageWidth
+	RVCE_CS(enc->base.height); // encImageHeight
+	RVCE_CS(enc->luma->level[0].pitch_bytes); // encRefPicLumaPitch
+	RVCE_CS(enc->chroma->level[0].pitch_bytes); // encRefPicChromaPitch
+	RVCE_CS(align(enc->luma->npix_y, 16) / 8); // encRefYHeightInQw
+	RVCE_CS(0x00000000); // encRefPic(Addr|Array)Mode, encPicStructRestriction, disableRDO
+
+	RVCE_CS(0x00000000); // encPreEncodeContextBufferOffset
+	RVCE_CS(0x00000000); // encPreEncodeInputLumaBufferOffset
+	RVCE_CS(0x00000000); // encPreEncodeInputChromaBufferOffs
+	RVCE_CS(0x00000000); // encPreEncodeMode|ChromaFlag|VBAQMode|SceneChangeSensitivity
+	RVCE_END();
+}
+
+static void encode(struct rvce_encoder *enc)
+{
+	signed luma_offset, chroma_offset, bs_offset;
+	unsigned dep, bs_idx = enc->bs_idx++;
+	int i;
+
+	if (enc->dual_inst) {
+		if (bs_idx == 0)
+			dep = 1;
+		else if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR)
+			dep = 0;
+		else
+			dep = 2;
+	} else
+		dep = 0;
+
+	enc->task_info(enc, 0x00000003, dep, 0, bs_idx);
+
+	RVCE_BEGIN(0x05000001); // context buffer
+	RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo
+	RVCE_END();
+
+	bs_offset = -(signed)(bs_idx * enc->bs_size);
+
+	RVCE_BEGIN(0x05000004); // video bitstream buffer
+	RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, bs_offset); // videoBitstreamRingAddressHi/Lo
+	RVCE_CS(enc->bs_size); // videoBitstreamRingSize
+	RVCE_END();
+
+	if (enc->dual_pipe) {
+		unsigned aux_offset = enc->cpb.res->buf->size -
+			RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
+		RVCE_BEGIN(0x05000002); // auxiliary buffer
+		for (i = 0; i < 8; ++i) {
+			RVCE_CS(aux_offset);
+			aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE;
+		}
+		for (i = 0; i < 8; ++i)
+			RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE);
+		RVCE_END();
+	}
+
+	RVCE_BEGIN(0x03000001); // encode
+	RVCE_CS(enc->pic.frame_num ? 0x0 : 0x11); // insertHeaders
+	RVCE_CS(0x00000000); // pictureStructure
+	RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize
+	RVCE_CS(0x00000000); // forceRefreshMap
+	RVCE_CS(0x00000000); // insertAUD
+	RVCE_CS(0x00000000); // endOfSequence
+	RVCE_CS(0x00000000); // endOfStream
+	RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+		enc->luma->level[0].offset); // inputPictureLumaAddressHi/Lo
+	RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+		enc->chroma->level[0].offset); // inputPictureChromaAddressHi/Lo
+	RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
+	RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
+	RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
+	if (enc->dual_pipe)
+		RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
+	else
+		RVCE_CS(0x00010000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
+	RVCE_CS(0x00000000); // encInputPicTileConfig
+	RVCE_CS(enc->pic.picture_type); // encPicType
+	RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag
+	RVCE_CS(0x00000000); // encIdrPicId
+	RVCE_CS(0x00000000); // encMGSKeyPic
+	RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag
+	RVCE_CS(0x00000000); // encTemporalLayerIndex
+	RVCE_CS(0x00000000); // num_ref_idx_active_override_flag
+	RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1
+	RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1
+
+	i = enc->pic.frame_num - enc->pic.ref_idx_l0;
+	if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) {
+		RVCE_CS(0x00000001); // encRefListModificationOp
+		RVCE_CS(i - 1);      // encRefListModificationNum
+	} else {
+		RVCE_CS(0x00000000); // encRefListModificationOp
+		RVCE_CS(0x00000000); // encRefListModificationNum
+	}
+
+	for (i = 0; i < 3; ++i) {
+		RVCE_CS(0x00000000); // encRefListModificationOp
+		RVCE_CS(0x00000000); // encRefListModificationNum
+	}
+	for (i = 0; i < 4; ++i) {
+		RVCE_CS(0x00000000); // encDecodedPictureMarkingOp
+		RVCE_CS(0x00000000); // encDecodedPictureMarkingNum
+		RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx
+		RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp
+		RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum
+	}
+
+	// encReferencePictureL0[0]
+	RVCE_CS(0x00000000); // pictureStructure
+	if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
+	   enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
+		struct rvce_cpb_slot *l0 = l0_slot(enc);
+		rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
+		RVCE_CS(l0->picture_type); // encPicType
+		RVCE_CS(l0->frame_num); // frameNumber
+		RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
+		RVCE_CS(luma_offset); // lumaOffset
+		RVCE_CS(chroma_offset); // chromaOffset
+	} else {
+		RVCE_CS(0x00000000); // encPicType
+		RVCE_CS(0x00000000); // frameNumber
+		RVCE_CS(0x00000000); // pictureOrderCount
+		RVCE_CS(0xffffffff); // lumaOffset
+		RVCE_CS(0xffffffff); // chromaOffset
+	}
+
+	// encReferencePictureL0[1]
+	RVCE_CS(0x00000000); // pictureStructure
+	RVCE_CS(0x00000000); // encPicType
+	RVCE_CS(0x00000000); // frameNumber
+	RVCE_CS(0x00000000); // pictureOrderCount
+	RVCE_CS(0xffffffff); // lumaOffset
+	RVCE_CS(0xffffffff); // chromaOffset
+
+	// encReferencePictureL1[0]
+	RVCE_CS(0x00000000); // pictureStructure
+	if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
+		struct rvce_cpb_slot *l1 = l1_slot(enc);
+		rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
+		RVCE_CS(l1->picture_type); // encPicType
+		RVCE_CS(l1->frame_num); // frameNumber
+		RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
+		RVCE_CS(luma_offset); // lumaOffset
+		RVCE_CS(chroma_offset); // chromaOffset
+	} else {
+		RVCE_CS(0x00000000); // encPicType
+		RVCE_CS(0x00000000); // frameNumber
+		RVCE_CS(0x00000000); // pictureOrderCount
+		RVCE_CS(0xffffffff); // lumaOffset
+		RVCE_CS(0xffffffff); // chromaOffset
+	}
+
+	rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
+	RVCE_CS(luma_offset); // encReconstructedLumaOffset
+	RVCE_CS(chroma_offset); // encReconstructedChromaOffset
+	RVCE_CS(0x00000000); // encColocBufferOffset
+	RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset
+	RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset
+	RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset
+	RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset
+	RVCE_CS(0x00000000); // pictureCount
+	RVCE_CS(enc->pic.frame_num); // frameNumber
+	RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount
+	RVCE_CS(0x00000000); // numIPicRemainInRCGOP
+	RVCE_CS(0x00000000); // numPPicRemainInRCGOP
+	RVCE_CS(0x00000000); // numBPicRemainInRCGOP
+	RVCE_CS(0x00000000); // numIRPicRemainInRCGOP
+	RVCE_CS(0x00000000); // enableIntraRefresh
+
+	RVCE_CS(0x00000000); // aq_variance_en
+	RVCE_CS(0x00000000); // aq_block_size
+	RVCE_CS(0x00000000); // aq_mb_variance_sel
+	RVCE_CS(0x00000000); // aq_frame_variance_sel
+	RVCE_CS(0x00000000); // aq_param_a
+	RVCE_CS(0x00000000); // aq_param_b
+	RVCE_CS(0x00000000); // aq_param_c
+	RVCE_CS(0x00000000); // aq_param_d
+	RVCE_CS(0x00000000); // aq_param_e
+
+	RVCE_CS(0x00000000); // contextInSFB
+	RVCE_END();
+}
+
+void radeon_vce_52_init(struct rvce_encoder *enc)
+{
+	radeon_vce_50_init(enc);
+
+	enc->create = create;
+	enc->encode = encode;
+}

From 8762570cc5382730afda9954c53dead5a56f398a Mon Sep 17 00:00:00 2001
From: Leo Liu <leo.liu@amd.com>
Date: Tue, 10 Nov 2015 13:34:17 -0500
Subject: [PATCH 181/335] radeon/vce: disable two pipe mode for stoney
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only one encoding pipe available for Stoney

Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/drivers/radeon/radeon_vce.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
index b2b084e0f2e..8a60441c056 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -406,7 +406,8 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
 		enc->use_vm = true;
 	if ((rscreen->info.drm_major > 2) || (rscreen->info.drm_minor >= 42))
 		enc->use_vui = true;
-	if (rscreen->info.family >= CHIP_TONGA)
+	if (rscreen->info.family >= CHIP_TONGA &&
+             rscreen->info.family != CHIP_STONEY)
 		enc->dual_pipe = true;
 	/* TODO enable B frame with dual instance */
 	if ((rscreen->info.family >= CHIP_TONGA) &&

From 2f7d2fd9979ce111af9c3a79b967d4efc029ab60 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Fri, 20 Nov 2015 11:36:41 -0800
Subject: [PATCH 182/335] docs: Add GL_EXT_shader_samples_identical to the
 release notes

Trivial

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
---
 docs/relnotes/11.1.0.html | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 2f462f7b28a..d317bfbbddb 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -70,6 +70,7 @@ Note: some of the new features are only available with certain drivers.
 <li>EGL_KHR_gl_colorspace on softpipe, llvmpipe</li>
 <li>new virgl gallium driver for qemu virtio-gpu</li>
 <li>16x multisampling on i965 (gen9+)</li>
+<li>GL_EXT_shader_samples_identical on i965.</li>
 </ul>
 
 <h2>Bug fixes</h2>

From d23aa634e0d45bbeda0f48033cc42656259ce0ef Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 14 Apr 2015 14:57:51 -0700
Subject: [PATCH 183/335] i965/skl: Add fast color clear infrastructure

Patch was originally called:
i965/skl: Enable fast color clears on SKL

Skylake introduces some differences in the way that fast clears are programmed
and in the restrictions for using fast clears. Since some of these are
non-obvious, and fast clears are currently disabled globally, we can enable the
simple stuff here and leave the weirder stuff and separately reviewable work.

Based on a patch originally from Kristian.

Note that within this patch the change in scaling factors could be achieved with
this hunk instead. I've opted to keep things more like how the docs describe it
however.
   --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
   +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
   @@ -150,9 +150,13 @@ intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
          /* In release builds, fall through */
       case I915_TILING_Y:
          *width_px = 32 / mt->cpp;
   -      *height = 4;
   +      if (brw->gen >= 9)
   +         *height = 2;
   +      else
   +         *height = 4;

v2: Add braces for the multiline (Matt + Chad)
Comment updates (requested by Chad)
Modified commit message
Commit message from Chad explaining the MCS height change (Chad)

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Neil Roberts <neil@linux.intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 .../drivers/dri/i965/brw_meta_fast_clear.c    | 55 +++++++++++++------
 .../drivers/dri/i965/gen8_surface_state.c     | 16 +++++-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 17 ++++++
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 13 ++++-
 4 files changed, 81 insertions(+), 20 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index 211c0a44162..938e028f58d 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -204,7 +204,7 @@ brw_draw_rectlist(struct gl_context *ctx, struct rect *rect, int num_instances)
 }
 
 static void
-get_fast_clear_rect(struct gl_framebuffer *fb,
+get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
                     struct intel_renderbuffer *irb, struct rect *rect)
 {
    unsigned int x_align, y_align;
@@ -228,7 +228,14 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
        */
       intel_get_non_msrt_mcs_alignment(irb->mt, &x_align, &y_align);
       x_align *= 16;
-      y_align *= 32;
+
+      /* SKL+ line alignment requirement for Y-tiled are half those of the prior
+       * generations.
+       */
+      if (brw->gen >= 9)
+         y_align *= 16;
+      else
+         y_align *= 32;
 
       /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
        * Target(s)", beneath the "Fast Color Clear" bullet (p327):
@@ -265,8 +272,10 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
        *     terms of (width,height) of the RT.
        *
        *     MSAA  Width of Clear Rect  Height of Clear Rect
+       *      2X     Ceil(1/8*width)      Ceil(1/2*height)
        *      4X     Ceil(1/8*width)      Ceil(1/2*height)
        *      8X     Ceil(1/2*width)      Ceil(1/2*height)
+       *     16X         width            Ceil(1/2*height)
        *
        * The text "with upper left co-ordinate to coincide with actual
        * rectangle being cleared" is a little confusing--it seems to imply
@@ -289,6 +298,9 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
       case 8:
          x_scaledown = 2;
          break;
+      case 16:
+         x_scaledown = 1;
+         break;
       default:
          unreachable("Unexpected sample count for fast clear");
       }
@@ -357,18 +369,25 @@ is_color_fast_clear_compatible(struct brw_context *brw,
 
 /**
  * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
- * SURFACE_STATE.
+ * SURFACE_STATE (DWORD 12-15 on SKL+).
  */
-static uint32_t
-compute_fast_clear_color_bits(const union gl_color_union *color)
+static void
+set_fast_clear_color(struct brw_context *brw,
+                     struct intel_mipmap_tree *mt,
+                     const union gl_color_union *color)
 {
-   uint32_t bits = 0;
-   for (int i = 0; i < 4; i++) {
-      /* Testing for non-0 works for integer and float colors */
-      if (color->f[i] != 0.0f)
-         bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
+   if (brw->gen >= 9) {
+      mt->gen9_fast_clear_color = *color;
+   } else {
+      mt->fast_clear_color_value = 0;
+      for (int i = 0; i < 4; i++) {
+         /* Testing for non-0 works for integer and float colors */
+         if (color->f[i] != 0.0f) {
+             mt->fast_clear_color_value |=
+                1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
+         }
+      }
    }
-   return bits;
 }
 
 static const uint32_t fast_clear_color[4] = { ~0, ~0, ~0, ~0 };
@@ -510,8 +529,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
 
       switch (clear_type) {
       case FAST_CLEAR:
-         irb->mt->fast_clear_color_value =
-            compute_fast_clear_color_bits(&ctx->Color.ClearColor);
+         set_fast_clear_color(brw, irb->mt, &ctx->Color.ClearColor);
          irb->need_downsample = true;
 
          /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the
@@ -527,7 +545,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
          irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
          irb->need_downsample = true;
          fast_clear_buffers |= 1 << index;
-         get_fast_clear_rect(fb, irb, &fast_clear_rect);
+         get_fast_clear_rect(brw, fb, irb, &fast_clear_rect);
          break;
 
       case REP_CLEAR:
@@ -662,8 +680,9 @@ get_resolve_rect(struct brw_context *brw,
     *
     * The scaledown factors in the table that follows are related to the
     * alignment size returned by intel_get_non_msrt_mcs_alignment() by a
-    * multiplier.  For IVB and HSW, we divide by two, for BDW we multiply
-    * by 8 and 16 and 8 and 8 for SKL.
+    * multiplier. For IVB and HSW, we divide by two, for BDW we multiply
+    * by 8 and 16. Similar to the fast clear, SKL eases the BDW vertical scaling
+    * by a factor of 2.
     */
 
    intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align);
@@ -709,6 +728,10 @@ brw_meta_resolve_color(struct brw_context *brw,
 
    brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
 
+   /* SKL+ also has a resolve mode for compressed render targets and thus more
+    * bits to let us select the type of resolve.  For fast clear resolves, it
+    * turns out we can use the same value as pre-SKL though.
+    */
    set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE);
 
    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index 140a6544983..69098583357 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -187,7 +187,21 @@ gen8_emit_fast_clear_color(struct brw_context *brw,
                            struct intel_mipmap_tree *mt,
                            uint32_t *surf)
 {
-   surf[7] |= mt->fast_clear_color_value;
+   if (brw->gen >= 9) {
+#define check_fast_clear_val(x) \
+      assert(mt->gen9_fast_clear_color.f[x] == 0.0 || \
+             mt->gen9_fast_clear_color.f[x] == 1.0)
+      check_fast_clear_val(0);
+      check_fast_clear_val(1);
+      check_fast_clear_val(2);
+      check_fast_clear_val(3);
+#undef check_fast_clear_val
+      surf[12] = mt->gen9_fast_clear_color.ui[0];
+      surf[13] = mt->gen9_fast_clear_color.ui[1];
+      surf[14] = mt->gen9_fast_clear_color.ui[2];
+      surf[15] = mt->gen9_fast_clear_color.ui[3];
+   } else
+      surf[7] |= mt->fast_clear_color_value;
 }
 
 static void
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b1a7632d82f..4c3f2c00d6f 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -192,6 +192,12 @@ intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
  *
  *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
  *       64bpp, and 128bpp.
+ *
+ * From the Skylake documentation, it is made clear that X-tiling is no longer
+ * supported:
+ *
+ *     - MCS and Lossless compression is supported for TiledY/TileYs/TileYf
+ *     non-MSRTs only.
  */
 static bool
 intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
@@ -1495,6 +1501,17 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
    intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height);
    unsigned width_divisor = block_width_px * 4;
    unsigned height_divisor = block_height * 8;
+
+   /* The Skylake MCS is twice as tall as the Broadwell MCS.
+    *
+    * In pre-Skylake, each bit in the MCS contained the state of 2 cachelines
+    * in the main surface. In Skylake, it's two bits.  The extra bit
+    * doubles the MCS height, not width, because in Skylake the MCS is always
+    * Y-tiled.
+    */
+   if (brw->gen >= 9)
+      height_divisor /= 2;
+
    unsigned mcs_width =
       ALIGN(mt->logical_width0, width_divisor) / width_divisor;
    unsigned mcs_height =
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 805cd714d88..64f73ea9ae5 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -633,15 +633,22 @@ struct intel_mipmap_tree
     * The SURFACE_STATE bits associated with the last fast color clear to this
     * color mipmap tree, if any.
     *
-    * This value will only ever contain ones in bits 28-31, so it is safe to
-    * OR into dword 7 of SURFACE_STATE.
+    * Prior to GEN9 there is a single bit for RGBA clear values which gives you
+    * the option of 2^4 clear colors. Each bit determines if the color channel
+    * is fully saturated or unsaturated (Cherryview does add a 32b value per
+    * channel, but it is globally applied instead of being part of the render
+    * surface state). Starting with GEN9, the surface state accepts a 32b value
+    * for each color channel.
     *
     * @see RENDER_SURFACE_STATE.RedClearColor
     * @see RENDER_SURFACE_STATE.GreenClearColor
     * @see RENDER_SURFACE_STATE.BlueClearColor
     * @see RENDER_SURFACE_STATE.AlphaClearColor
     */
-   uint32_t fast_clear_color_value;
+   union {
+      uint32_t fast_clear_color_value;
+      union gl_color_union gen9_fast_clear_color;
+   };
 
    /**
     * Disable allocation of auxiliary buffers, such as the HiZ buffer and MCS

From 9d94eeb8a42bc78ebd9bb249eff61618d54f92e4 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 10 Nov 2015 12:16:54 -0800
Subject: [PATCH 184/335] i965: Add lossless compression to surface format
 table

Background: Prior to Skylake and since Ivybridge Intel hardware has had the
ability to use a MCS (Multisample Control Surface) as auxiliary data in
"compression" operations on the surface. This reduces memory bandwidth.  This
hardware was either used for MSAA compression, or fast clear operations. On
Gen8, a similar mechanism exists to allow the hiz buffer to be sampled from, and
therefore this feature is sometimes referred to more generally as "AUX buffers".

Skylake adds the ability to have the display engine directly source compressed
surfaces on top of the ability to sample from them. Inference dictates that
enabling this display features adds a restriction to the formats which could
actually be compressed. This is backed up by a blurb in the AUX_CCS_D section
from the RENDER_SURFACE_STATE: "In addition, if the surface is bound to the
sampling engine, Surface Format must be supported for Render Target Compression
for surfaces bound to the sampling engine." The current set of surfaces seems
to be a subset as compared to previous gens (see the next patch). Also, if I had
to guess I would guess that future gens add support for more surface formats. To
make handling this a bit easier to read, and more future proof, the support for
this is moved into the surface formats table.

Along with the modifications to the table, a helper function is also provided to
determine if a surface is CCS_E compatible. Because fast clears are currently
disabled on SKL, we can plumb the helper all the way through here, and not
actually have anything break.

v2:
- rename ccs to ccs_e; Requested-by: Chad
- rename lossless_compression to lossless_compression Requested-by: Chad
- change meaning of brw_losslessly_compressible_format Requested-by: Chad
  - related changes to the code to reflect this.
- remove excess ccs (Chad)

v3:
- Commit message changes (Topi)
- Const some things which could be const (Topi)

Requested-by: Chad Versace <chad.versace@intel.com>
Requested-by: Neil Roberts <neil@linux.intel.com>
Signed-off-by: Ben Widawsky <benjamin.widawsky@intel.com>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/drivers/dri/i965/brw_context.h       |   2 +
 .../drivers/dri/i965/brw_surface_formats.c    | 525 +++++++++---------
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c |   7 +-
 3 files changed, 282 insertions(+), 252 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 8d6bc196401..fe45edb89ff 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1467,6 +1467,8 @@ void brw_upload_image_surfaces(struct brw_context *brw,
 /* brw_surface_formats.c */
 bool brw_render_target_supported(struct brw_context *brw,
                                  struct gl_renderbuffer *rb);
+bool brw_losslessly_compressible_format(struct brw_context *brw,
+                                        uint32_t brw_format);
 uint32_t brw_depth_format(struct brw_context *brw, mesa_format format);
 mesa_format brw_lower_mesa_image_format(const struct brw_device_info *devinfo,
                                         mesa_format format);
diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c
index 97fff60f3e5..944074d1de3 100644
--- a/src/mesa/drivers/dri/i965/brw_surface_formats.c
+++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c
@@ -39,14 +39,15 @@ struct surface_format_info {
    int input_vb;
    int streamed_output_vb;
    int color_processing;
+   int lossless_compression;
    const char *name;
 };
 
 /* This macro allows us to write the table almost as it appears in the PRM,
  * while restructuring it to turn it into the C code we want.
  */
-#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, sf) \
-   [BRW_SURFACEFORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, #sf},
+#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, ccs_e, sf) \
+   [BRW_SURFACEFORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, ccs_e, #sf},
 
 #define Y 0
 #define x 999
@@ -74,6 +75,7 @@ struct surface_format_info {
  * VB    - Input Vertex Buffer
  * SO    - Steamed Output Vertex Buffers (transform feedback)
  * color - Color Processing
+ * ccs_e - Lossless Compression Support (gen9+ only)
  * sf    - Surface Format
  *
  * See page 88 of the Sandybridge PRM VOL4_Part1 PDF.
@@ -84,257 +86,258 @@ struct surface_format_info {
  * - VOL2_Part1 section 2.5.11 Format Conversion (vertex fetch).
  * - VOL4_Part1 section 2.12.2.1.2 Sampler Output Channel Mapping.
  * - VOL4_Part1 section 3.9.11 Render Target Write.
+ * - Render Target Surface Types [SKL+]
  */
 const struct surface_format_info surface_formats[] = {
-/* smpl filt shad CK  RT  AB  VB  SO  color */
-   SF( Y, 50,  x,  x,  Y,  Y,  Y,  Y,  x, R32G32B32A32_FLOAT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x, R32G32B32A32_SINT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x, R32G32B32A32_UINT)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R32G32B32A32_UNORM)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R32G32B32A32_SNORM)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R64G64_FLOAT)
-   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x, R32G32B32X32_FLOAT)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R32G32B32A32_SSCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R32G32B32A32_USCALED)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R32G32B32A32_SFIXED)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R64G64_PASSTHRU)
-   SF( Y, 50,  x,  x,  x,  x,  Y,  Y,  x, R32G32B32_FLOAT)
-   SF( Y,  x,  x,  x,  x,  x,  Y,  Y,  x, R32G32B32_SINT)
-   SF( Y,  x,  x,  x,  x,  x,  Y,  Y,  x, R32G32B32_UINT)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R32G32B32_UNORM)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R32G32B32_SNORM)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R32G32B32_SSCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R32G32B32_USCALED)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R32G32B32_SFIXED)
-   SF( Y,  Y,  x,  x,  Y, 45,  Y,  x, 60, R16G16B16A16_UNORM)
-   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x, R16G16B16A16_SNORM)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, R16G16B16A16_SINT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, R16G16B16A16_UINT)
-   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x, R16G16B16A16_FLOAT)
-   SF( Y, 50,  x,  x,  Y,  Y,  Y,  Y,  x, R32G32_FLOAT)
-   SF( Y, 70,  x,  x,  Y,  Y,  Y,  Y,  x, R32G32_FLOAT_LD)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x, R32G32_SINT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x, R32G32_UINT)
-   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, R32_FLOAT_X8X24_TYPELESS)
-   SF( Y,  x,  x,  x,  x,  x,  x,  x,  x, X32_TYPELESS_G8X24_UINT)
-   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x, L32A32_FLOAT)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R32G32_UNORM)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R32G32_SNORM)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R64_FLOAT)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, R16G16B16X16_UNORM)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, R16G16B16X16_FLOAT)
-   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x, A32X32_FLOAT)
-   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x, L32X32_FLOAT)
-   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x, I32X32_FLOAT)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R16G16B16A16_SSCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R16G16B16A16_USCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R32G32_SSCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R32G32_USCALED)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R32G32_SFIXED)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R64_PASSTHRU)
-   SF( Y,  Y,  x,  Y,  Y,  Y,  Y,  x, 60, B8G8R8A8_UNORM)
-   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x,  x, B8G8R8A8_UNORM_SRGB)
-/* smpl filt shad CK  RT  AB  VB  SO  color */
-   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x, 60, R10G10B10A2_UNORM)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x, 60, R10G10B10A2_UNORM_SRGB)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, R10G10B10A2_UINT)
-   SF( Y,  Y,  x,  x,  x,  Y,  Y,  x,  x, R10G10B10_SNORM_A2_UNORM)
-   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x, 60, R8G8B8A8_UNORM)
-   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x, 60, R8G8B8A8_UNORM_SRGB)
-   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x, R8G8B8A8_SNORM)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, R8G8B8A8_SINT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, R8G8B8A8_UINT)
-   SF( Y,  Y,  x,  x,  Y, 45,  Y,  x,  x, R16G16_UNORM)
-   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x, R16G16_SNORM)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, R16G16_SINT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, R16G16_UINT)
-   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x, R16G16_FLOAT)
-   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x, 60, B10G10R10A2_UNORM)
-   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x, 60, B10G10R10A2_UNORM_SRGB)
-   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x, R11G11B10_FLOAT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x, R32_SINT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x, R32_UINT)
-   SF( Y, 50,  Y,  x,  Y,  Y,  Y,  Y,  x, R32_FLOAT)
-   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, R24_UNORM_X8_TYPELESS)
-   SF( Y,  x,  x,  x,  x,  x,  x,  x,  x, X24_TYPELESS_G8_UINT)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, L16A16_UNORM)
-   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, I24X8_UNORM)
-   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, L24X8_UNORM)
-   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, A24X8_UNORM)
-   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, I32_FLOAT)
-   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, L32_FLOAT)
-   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, A32_FLOAT)
-   SF( Y,  Y,  x,  Y,  x,  x,  x,  x, 60, B8G8R8X8_UNORM)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, B8G8R8X8_UNORM_SRGB)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, R8G8B8X8_UNORM)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, R8G8B8X8_UNORM_SRGB)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, R9G9B9E5_SHAREDEXP)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, B10G10R10X2_UNORM)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, L16A16_FLOAT)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R32_UNORM)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R32_SNORM)
-/* smpl filt shad CK  RT  AB  VB  SO  color */
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R10G10B10X2_USCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R8G8B8A8_SSCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R8G8B8A8_USCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R16G16_SSCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R16G16_USCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R32_SSCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R32_USCALED)
-   SF( Y,  Y,  x,  Y,  Y,  Y,  x,  x,  x, B5G6R5_UNORM)
-   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x,  x, B5G6R5_UNORM_SRGB)
-   SF( Y,  Y,  x,  Y,  Y,  Y,  x,  x,  x, B5G5R5A1_UNORM)
-   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x,  x, B5G5R5A1_UNORM_SRGB)
-   SF( Y,  Y,  x,  Y,  Y,  Y,  x,  x,  x, B4G4R4A4_UNORM)
-   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x,  x, B4G4R4A4_UNORM_SRGB)
-   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x, R8G8_UNORM)
-   SF( Y,  Y,  x,  Y,  Y, 60,  Y,  x,  x, R8G8_SNORM)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, R8G8_SINT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, R8G8_UINT)
-   SF( Y,  Y,  Y,  x,  Y, 45,  Y,  x, 70, R16_UNORM)
-   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x, R16_SNORM)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, R16_SINT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, R16_UINT)
-   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x, R16_FLOAT)
-   SF(50, 50,  x,  x,  x,  x,  x,  x,  x, A8P8_UNORM_PALETTE0)
-   SF(50, 50,  x,  x,  x,  x,  x,  x,  x, A8P8_UNORM_PALETTE1)
-   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x, I16_UNORM)
-   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x, L16_UNORM)
-   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x, A16_UNORM)
-   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x, L8A8_UNORM)
-   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x, I16_FLOAT)
-   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x, L16_FLOAT)
-   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x, A16_FLOAT)
-   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, L8A8_UNORM_SRGB)
-   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x, R5G5_SNORM_B6_UNORM)
-   SF( x,  x,  x,  x,  Y,  Y,  x,  x,  x, B5G5R5X1_UNORM)
-   SF( x,  x,  x,  x,  Y,  Y,  x,  x,  x, B5G5R5X1_UNORM_SRGB)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R8G8_SSCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R8G8_USCALED)
-/* smpl filt shad CK  RT  AB  VB  SO  color */
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R16_SSCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R16_USCALED)
-   SF(50, 50,  x,  x,  x,  x,  x,  x,  x, P8A8_UNORM_PALETTE0)
-   SF(50, 50,  x,  x,  x,  x,  x,  x,  x, P8A8_UNORM_PALETTE1)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, A1B5G5R5_UNORM)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, A4B4G4R4_UNORM)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, L8A8_UINT)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, L8A8_SINT)
-   SF( Y,  Y,  x, 45,  Y,  Y,  Y,  x,  x, R8_UNORM)
-   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x, R8_SNORM)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, R8_SINT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, R8_UINT)
-   SF( Y,  Y,  x,  Y,  Y,  Y,  x,  x,  x, A8_UNORM)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, I8_UNORM)
-   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x, L8_UNORM)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, P4A4_UNORM)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, A4P4_UNORM)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R8_SSCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R8_USCALED)
-   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, P8_UNORM_PALETTE0)
-   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, L8_UNORM_SRGB)
-   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, P8_UNORM_PALETTE1)
-   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, P4A4_UNORM_PALETTE1)
-   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, A4P4_UNORM_PALETTE1)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, Y8_SNORM)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, L8_UINT)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, L8_SINT)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, I8_UINT)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, I8_SINT)
-   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, DXT1_RGB_SRGB)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, R1_UINT)
-   SF( Y,  Y,  x,  Y,  Y,  x,  x,  x, 60, YCRCB_NORMAL)
-   SF( Y,  Y,  x,  Y,  Y,  x,  x,  x, 60, YCRCB_SWAPUVY)
-   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, P2_UNORM_PALETTE0)
-   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, P2_UNORM_PALETTE1)
-   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x, BC1_UNORM)
-   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x, BC2_UNORM)
-   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x, BC3_UNORM)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BC4_UNORM)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BC5_UNORM)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BC1_UNORM_SRGB)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BC2_UNORM_SRGB)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BC3_UNORM_SRGB)
-   SF( Y,  x,  x,  x,  x,  x,  x,  x,  x, MONO8)
-   SF( Y,  Y,  x,  x,  Y,  x,  x,  x, 60, YCRCB_SWAPUV)
-   SF( Y,  Y,  x,  x,  Y,  x,  x,  x, 60, YCRCB_SWAPY)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, DXT1_RGB)
-/* smpl filt shad CK  RT  AB  VB  SO  color */
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, FXT1)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R8G8B8_UNORM)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R8G8B8_SNORM)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R8G8B8_SSCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R8G8B8_USCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R64G64B64A64_FLOAT)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R64G64B64_FLOAT)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BC4_SNORM)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BC5_SNORM)
-   SF(50, 50,  x,  x,  x,  x, 60,  x,  x, R16G16B16_FLOAT)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R16G16B16_UNORM)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R16G16B16_SNORM)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R16G16B16_SSCALED)
-   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, R16G16B16_USCALED)
-   SF(70, 70,  x,  x,  x,  x,  x,  x,  x, BC6H_SF16)
-   SF(70, 70,  x,  x,  x,  x,  x,  x,  x, BC7_UNORM)
-   SF(70, 70,  x,  x,  x,  x,  x,  x,  x, BC7_UNORM_SRGB)
-   SF(70, 70,  x,  x,  x,  x,  x,  x,  x, BC6H_UF16)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, PLANAR_420_8)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R8G8B8_UNORM_SRGB)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, ETC1_RGB8)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, ETC2_RGB8)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, EAC_R11)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, EAC_RG11)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, EAC_SIGNED_R11)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, EAC_SIGNED_RG11)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, ETC2_SRGB8)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R16G16B16_UINT)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R16G16B16_SINT)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R32_SFIXED)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R10G10B10A2_SNORM)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R10G10B10A2_USCALED)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R10G10B10A2_SSCALED)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R10G10B10A2_SINT)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, B10G10R10A2_SNORM)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, B10G10R10A2_USCALED)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, B10G10R10A2_SSCALED)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, B10G10R10A2_UINT)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, B10G10R10A2_SINT)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R64G64B64A64_PASSTHRU)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R64G64B64_PASSTHRU)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, ETC2_RGB8_PTA)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, ETC2_SRGB8_PTA)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, ETC2_EAC_RGBA8)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, ETC2_EAC_SRGB8_A8)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R8G8B8_UINT)
-   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, R8G8B8_SINT)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_4x4_FLT16)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_5x4_FLT16)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_5x5_FLT16)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_6x5_FLT16)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_6x6_FLT16)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_8x5_FLT16)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_8x6_FLT16)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_8x8_FLT16)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_10x5_FLT16)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_10x6_FLT16)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_10x8_FLT16)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_10x10_FLT16)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_12x10_FLT16)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_12x12_FLT16)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_4x4_U8sRGB)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_5x4_U8sRGB)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_5x5_U8sRGB)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_6x5_U8sRGB)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_6x6_U8sRGB)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_8x5_U8sRGB)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_8x6_U8sRGB)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_8x8_U8sRGB)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_10x5_U8sRGB)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_10x6_U8sRGB)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_10x8_U8sRGB)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_10x10_U8sRGB)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_12x10_U8sRGB)
-   SF(80, 80,  x,  x,  x,  x,  x,  x,  x, ASTC_LDR_2D_12x12_U8sRGB)
+/* smpl filt shad CK  RT  AB  VB  SO  color ccs_e */
+   SF( Y, 50,  x,  x,  Y,  Y,  Y,  Y,  x,    x,   R32G32B32A32_FLOAT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,    x,   R32G32B32A32_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,    x,   R32G32B32A32_UINT)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32B32A32_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32B32A32_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R64G64_FLOAT)
+   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x,    x,   R32G32B32X32_FLOAT)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32B32A32_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32B32A32_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R32G32B32A32_SFIXED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R64G64_PASSTHRU)
+   SF( Y, 50,  x,  x,  x,  x,  Y,  Y,  x,    x,   R32G32B32_FLOAT)
+   SF( Y,  x,  x,  x,  x,  x,  Y,  Y,  x,    x,   R32G32B32_SINT)
+   SF( Y,  x,  x,  x,  x,  x,  Y,  Y,  x,    x,   R32G32B32_UINT)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32B32_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32B32_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32B32_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32B32_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R32G32B32_SFIXED)
+   SF( Y,  Y,  x,  x,  Y, 45,  Y,  x, 60,    x,   R16G16B16A16_UNORM)
+   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x,    x,   R16G16B16A16_SNORM)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R16G16B16A16_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R16G16B16A16_UINT)
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x,    x,   R16G16B16A16_FLOAT)
+   SF( Y, 50,  x,  x,  Y,  Y,  Y,  Y,  x,    x,   R32G32_FLOAT)
+   SF( Y, 70,  x,  x,  Y,  Y,  Y,  Y,  x,    x,   R32G32_FLOAT_LD)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,    x,   R32G32_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,    x,   R32G32_UINT)
+   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x,    x,   R32_FLOAT_X8X24_TYPELESS)
+   SF( Y,  x,  x,  x,  x,  x,  x,  x,  x,    x,   X32_TYPELESS_G8X24_UINT)
+   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x,    x,   L32A32_FLOAT)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R64_FLOAT)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   R16G16B16X16_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   R16G16B16X16_FLOAT)
+   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x,    x,   A32X32_FLOAT)
+   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x,    x,   L32X32_FLOAT)
+   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x,    x,   I32X32_FLOAT)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R16G16B16A16_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R16G16B16A16_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R32G32_SFIXED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R64_PASSTHRU)
+   SF( Y,  Y,  x,  Y,  Y,  Y,  Y,  x, 60,    x,   B8G8R8A8_UNORM)
+   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x,  x,    x,   B8G8R8A8_UNORM_SRGB)
+/* smpl filt shad CK  RT  AB  VB  SO  color ccs_e */
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x, 60,    x,   R10G10B10A2_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x, 60,    x,   R10G10B10A2_UNORM_SRGB)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R10G10B10A2_UINT)
+   SF( Y,  Y,  x,  x,  x,  Y,  Y,  x,  x,    x,   R10G10B10_SNORM_A2_UNORM)
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x, 60,    x,   R8G8B8A8_UNORM)
+   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x, 60,    x,   R8G8B8A8_UNORM_SRGB)
+   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x,    x,   R8G8B8A8_SNORM)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R8G8B8A8_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R8G8B8A8_UINT)
+   SF( Y,  Y,  x,  x,  Y, 45,  Y,  x,  x,    x,   R16G16_UNORM)
+   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x,    x,   R16G16_SNORM)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R16G16_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R16G16_UINT)
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x,    x,   R16G16_FLOAT)
+   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x, 60,    x,   B10G10R10A2_UNORM)
+   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x, 60,    x,   B10G10R10A2_UNORM_SRGB)
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x,    x,   R11G11B10_FLOAT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,    x,   R32_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,    x,   R32_UINT)
+   SF( Y, 50,  Y,  x,  Y,  Y,  Y,  Y,  x,    x,   R32_FLOAT)
+   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x,    x,   R24_UNORM_X8_TYPELESS)
+   SF( Y,  x,  x,  x,  x,  x,  x,  x,  x,    x,   X24_TYPELESS_G8_UINT)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   L16A16_UNORM)
+   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x,    x,   I24X8_UNORM)
+   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x,    x,   L24X8_UNORM)
+   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x,    x,   A24X8_UNORM)
+   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x,    x,   I32_FLOAT)
+   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x,    x,   L32_FLOAT)
+   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x,    x,   A32_FLOAT)
+   SF( Y,  Y,  x,  Y,  x,  x,  x,  x, 60,    x,   B8G8R8X8_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   B8G8R8X8_UNORM_SRGB)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   R8G8B8X8_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   R8G8B8X8_UNORM_SRGB)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   R9G9B9E5_SHAREDEXP)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   B10G10R10X2_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   L16A16_FLOAT)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32_SNORM)
+/* smpl filt shad CK  RT  AB  VB  SO  color ccs_e */
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R10G10B10X2_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R8G8B8A8_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R8G8B8A8_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R16G16_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R16G16_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32_USCALED)
+   SF( Y,  Y,  x,  Y,  Y,  Y,  x,  x,  x,    x,   B5G6R5_UNORM)
+   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x,  x,    x,   B5G6R5_UNORM_SRGB)
+   SF( Y,  Y,  x,  Y,  Y,  Y,  x,  x,  x,    x,   B5G5R5A1_UNORM)
+   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x,  x,    x,   B5G5R5A1_UNORM_SRGB)
+   SF( Y,  Y,  x,  Y,  Y,  Y,  x,  x,  x,    x,   B4G4R4A4_UNORM)
+   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x,  x,    x,   B4G4R4A4_UNORM_SRGB)
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x,    x,   R8G8_UNORM)
+   SF( Y,  Y,  x,  Y,  Y, 60,  Y,  x,  x,    x,   R8G8_SNORM)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R8G8_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R8G8_UINT)
+   SF( Y,  Y,  Y,  x,  Y, 45,  Y,  x, 70,    x,   R16_UNORM)
+   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x,    x,   R16_SNORM)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R16_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R16_UINT)
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x,    x,   R16_FLOAT)
+   SF(50, 50,  x,  x,  x,  x,  x,  x,  x,    x,   A8P8_UNORM_PALETTE0)
+   SF(50, 50,  x,  x,  x,  x,  x,  x,  x,    x,   A8P8_UNORM_PALETTE1)
+   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x,    x,   I16_UNORM)
+   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x,    x,   L16_UNORM)
+   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x,    x,   A16_UNORM)
+   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x,    x,   L8A8_UNORM)
+   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x,    x,   I16_FLOAT)
+   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x,    x,   L16_FLOAT)
+   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x,    x,   A16_FLOAT)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x,    x,   L8A8_UNORM_SRGB)
+   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x,    x,   R5G5_SNORM_B6_UNORM)
+   SF( x,  x,  x,  x,  Y,  Y,  x,  x,  x,    x,   B5G5R5X1_UNORM)
+   SF( x,  x,  x,  x,  Y,  Y,  x,  x,  x,    x,   B5G5R5X1_UNORM_SRGB)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R8G8_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R8G8_USCALED)
+/* smpl filt shad CK  RT  AB  VB  SO  color ccs_e */
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R16_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R16_USCALED)
+   SF(50, 50,  x,  x,  x,  x,  x,  x,  x,    x,   P8A8_UNORM_PALETTE0)
+   SF(50, 50,  x,  x,  x,  x,  x,  x,  x,    x,   P8A8_UNORM_PALETTE1)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   A1B5G5R5_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   A4B4G4R4_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   L8A8_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   L8A8_SINT)
+   SF( Y,  Y,  x, 45,  Y,  Y,  Y,  x,  x,    x,   R8_UNORM)
+   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x,    x,   R8_SNORM)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R8_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R8_UINT)
+   SF( Y,  Y,  x,  Y,  Y,  Y,  x,  x,  x,    x,   A8_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   I8_UNORM)
+   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x,    x,   L8_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   P4A4_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   A4P4_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R8_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R8_USCALED)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x,    x,   P8_UNORM_PALETTE0)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x,    x,   L8_UNORM_SRGB)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x,    x,   P8_UNORM_PALETTE1)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x,    x,   P4A4_UNORM_PALETTE1)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x,    x,   A4P4_UNORM_PALETTE1)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   Y8_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   L8_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   L8_SINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   I8_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   I8_SINT)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x,    x,   DXT1_RGB_SRGB)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   R1_UINT)
+   SF( Y,  Y,  x,  Y,  Y,  x,  x,  x, 60,    x,   YCRCB_NORMAL)
+   SF( Y,  Y,  x,  Y,  Y,  x,  x,  x, 60,    x,   YCRCB_SWAPUVY)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x,    x,   P2_UNORM_PALETTE0)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x,    x,   P2_UNORM_PALETTE1)
+   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x,    x,   BC1_UNORM)
+   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x,    x,   BC2_UNORM)
+   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x,    x,   BC3_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   BC4_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   BC5_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   BC1_UNORM_SRGB)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   BC2_UNORM_SRGB)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   BC3_UNORM_SRGB)
+   SF( Y,  x,  x,  x,  x,  x,  x,  x,  x,    x,   MONO8)
+   SF( Y,  Y,  x,  x,  Y,  x,  x,  x, 60,    x,   YCRCB_SWAPUV)
+   SF( Y,  Y,  x,  x,  Y,  x,  x,  x, 60,    x,   YCRCB_SWAPY)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   DXT1_RGB)
+/* smpl filt shad CK  RT  AB  VB  SO  color ccs_e */
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   FXT1)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R8G8B8_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R8G8B8_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R8G8B8_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R8G8B8_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R64G64B64A64_FLOAT)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R64G64B64_FLOAT)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   BC4_SNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   BC5_SNORM)
+   SF(50, 50,  x,  x,  x,  x, 60,  x,  x,    x,   R16G16B16_FLOAT)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R16G16B16_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R16G16B16_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R16G16B16_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R16G16B16_USCALED)
+   SF(70, 70,  x,  x,  x,  x,  x,  x,  x,    x,   BC6H_SF16)
+   SF(70, 70,  x,  x,  x,  x,  x,  x,  x,    x,   BC7_UNORM)
+   SF(70, 70,  x,  x,  x,  x,  x,  x,  x,    x,   BC7_UNORM_SRGB)
+   SF(70, 70,  x,  x,  x,  x,  x,  x,  x,    x,   BC6H_UF16)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   PLANAR_420_8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R8G8B8_UNORM_SRGB)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   ETC1_RGB8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   ETC2_RGB8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   EAC_R11)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   EAC_RG11)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   EAC_SIGNED_R11)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   EAC_SIGNED_RG11)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   ETC2_SRGB8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R16G16B16_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R16G16B16_SINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R32_SFIXED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R10G10B10A2_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R10G10B10A2_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R10G10B10A2_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R10G10B10A2_SINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   B10G10R10A2_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   B10G10R10A2_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   B10G10R10A2_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   B10G10R10A2_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   B10G10R10A2_SINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R64G64B64A64_PASSTHRU)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R64G64B64_PASSTHRU)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   ETC2_RGB8_PTA)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   ETC2_SRGB8_PTA)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   ETC2_EAC_RGBA8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   ETC2_EAC_SRGB8_A8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R8G8B8_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R8G8B8_SINT)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_4x4_FLT16)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_5x4_FLT16)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_5x5_FLT16)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_6x5_FLT16)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_6x6_FLT16)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_8x5_FLT16)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_8x6_FLT16)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_8x8_FLT16)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_10x5_FLT16)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_10x6_FLT16)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_10x8_FLT16)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_10x10_FLT16)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_12x10_FLT16)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_12x12_FLT16)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_4x4_U8sRGB)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_5x4_U8sRGB)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_5x5_U8sRGB)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_6x5_U8sRGB)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_6x6_U8sRGB)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_8x5_U8sRGB)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_8x6_U8sRGB)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_8x8_U8sRGB)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_10x5_U8sRGB)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_10x6_U8sRGB)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_10x8_U8sRGB)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_10x10_U8sRGB)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_12x10_U8sRGB)
+   SF(80, 80,  x,  x,  x,  x,  x,  x,  x,    x,   ASTC_LDR_2D_12x12_U8sRGB)
 };
 #undef x
 #undef Y
@@ -784,6 +787,26 @@ brw_render_target_supported(struct brw_context *brw,
    return brw->format_supported_as_render_target[format];
 }
 
+/*
+ * True if the underlying hardware format can support lossless color
+ * compression.
+ */
+bool
+brw_losslessly_compressible_format(struct brw_context *brw,
+                                   uint32_t brw_format)
+{
+   const struct surface_format_info * const sinfo =
+      &surface_formats[brw_format];
+   const int gen = brw->gen * 10;
+
+   assert(brw->gen >= 9);
+
+   if (gen >= sinfo->lossless_compression)
+      return true;
+
+   return false;
+}
+
 GLuint
 translate_tex_format(struct brw_context *brw,
                      mesa_format mesa_format,
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 4c3f2c00d6f..056cdb68b32 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -35,6 +35,7 @@
 
 #include "brw_blorp.h"
 #include "brw_context.h"
+#include "brw_state.h"
 
 #include "main/enums.h"
 #include "main/fbobject.h"
@@ -265,7 +266,11 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
    if (!brw->format_supported_as_render_target[mt->format])
       return false;
 
-   return true;
+   if (brw->gen >= 9) {
+      const uint32_t brw_format = brw_format_for_mesa_format(mt->format);
+      return brw_losslessly_compressible_format(brw, brw_format);
+   } else
+      return true;
 }
 
 

From 6fa1130cd21926cdd4ae86aa12ee3f5c0bb5ba33 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 13 Oct 2015 20:50:21 -0700
Subject: [PATCH 185/335] i965/skl: skip fast clears for certain surface
 formats

Some of the information originally in this commit message is now in the patch
before this.

SKL adds compressible render targets and as a result mutates some of the
programming for fast clears and resolves. There is a new internal surface type
called the CCS. The old AUX_MCS bit becomes AUX_CCS_D. "Auxiliary Surfaces For
Sampled Tiled Resource".

The formats which are supported are defined in the table titled "Render Target
Surface Types [SKL+]". There is no PRM yet to reference. The previously
implemented helper function already does the right thing provided the table is
correct.

v2: Use better English in commit message (Matt)
s/compressable/compressible/ (Matt)
Don't compare bools to true (Matt)
Use the helper function and don't increase the context size - this is mostly
implemented in the patch just before this (Chad, Neil)
Remove an "invalid" assert (Chad)
Fix assertion to check num_samples > 1, instead of num_samples (Chad)

v3:
Use Matt's code as Requested-by: Chad. I didn't even look at it since Chad said
he was fine with that, and presumably Matt is fine with it.

v4: Use better quote from spec (Topi)

Cc: Chad Versace <chad.versace@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
---
 .../drivers/dri/i965/brw_surface_formats.c    | 52 +++++++++----------
 .../drivers/dri/i965/gen8_surface_state.c     |  8 ++-
 2 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c
index 944074d1de3..55e7e649620 100644
--- a/src/mesa/drivers/dri/i965/brw_surface_formats.c
+++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c
@@ -90,9 +90,9 @@ struct surface_format_info {
  */
 const struct surface_format_info surface_formats[] = {
 /* smpl filt shad CK  RT  AB  VB  SO  color ccs_e */
-   SF( Y, 50,  x,  x,  Y,  Y,  Y,  Y,  x,    x,   R32G32B32A32_FLOAT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,    x,   R32G32B32A32_SINT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,    x,   R32G32B32A32_UINT)
+   SF( Y, 50,  x,  x,  Y,  Y,  Y,  Y,  x,   90,   R32G32B32A32_FLOAT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,   90,   R32G32B32A32_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,   90,   R32G32B32A32_UINT)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32B32A32_UNORM)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32B32A32_SNORM)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R64G64_FLOAT)
@@ -109,15 +109,15 @@ const struct surface_format_info surface_formats[] = {
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32B32_SSCALED)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32B32_USCALED)
    SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R32G32B32_SFIXED)
-   SF( Y,  Y,  x,  x,  Y, 45,  Y,  x, 60,    x,   R16G16B16A16_UNORM)
-   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x,    x,   R16G16B16A16_SNORM)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R16G16B16A16_SINT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R16G16B16A16_UINT)
-   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x,    x,   R16G16B16A16_FLOAT)
-   SF( Y, 50,  x,  x,  Y,  Y,  Y,  Y,  x,    x,   R32G32_FLOAT)
+   SF( Y,  Y,  x,  x,  Y, 45,  Y,  x, 60,   90,   R16G16B16A16_UNORM)
+   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x,   90,   R16G16B16A16_SNORM)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,   90,   R16G16B16A16_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,   90,   R16G16B16A16_UINT)
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x,   90,   R16G16B16A16_FLOAT)
+   SF( Y, 50,  x,  x,  Y,  Y,  Y,  Y,  x,   90,   R32G32_FLOAT)
    SF( Y, 70,  x,  x,  Y,  Y,  Y,  Y,  x,    x,   R32G32_FLOAT_LD)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,    x,   R32G32_SINT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,    x,   R32G32_UINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,   90,   R32G32_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,   90,   R32G32_UINT)
    SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x,    x,   R32_FLOAT_X8X24_TYPELESS)
    SF( Y,  x,  x,  x,  x,  x,  x,  x,  x,    x,   X32_TYPELESS_G8X24_UINT)
    SF( Y, 50,  x,  x,  x,  x,  x,  x,  x,    x,   L32A32_FLOAT)
@@ -125,7 +125,7 @@ const struct surface_format_info surface_formats[] = {
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32_SNORM)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R64_FLOAT)
    SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   R16G16B16X16_UNORM)
-   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   R16G16B16X16_FLOAT)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,   90,   R16G16B16X16_FLOAT)
    SF( Y, 50,  x,  x,  x,  x,  x,  x,  x,    x,   A32X32_FLOAT)
    SF( Y, 50,  x,  x,  x,  x,  x,  x,  x,    x,   L32X32_FLOAT)
    SF( Y, 50,  x,  x,  x,  x,  x,  x,  x,    x,   I32X32_FLOAT)
@@ -135,29 +135,29 @@ const struct surface_format_info surface_formats[] = {
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,    x,   R32G32_USCALED)
    SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R32G32_SFIXED)
    SF( x,  x,  x,  x,  x,  x,  x,  x,  x,    x,   R64_PASSTHRU)
-   SF( Y,  Y,  x,  Y,  Y,  Y,  Y,  x, 60,    x,   B8G8R8A8_UNORM)
+   SF( Y,  Y,  x,  Y,  Y,  Y,  Y,  x, 60,   90,   B8G8R8A8_UNORM)
    SF( Y,  Y,  x,  x,  Y,  Y,  x,  x,  x,    x,   B8G8R8A8_UNORM_SRGB)
 /* smpl filt shad CK  RT  AB  VB  SO  color ccs_e */
    SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x, 60,    x,   R10G10B10A2_UNORM)
    SF( Y,  Y,  x,  x,  x,  x,  x,  x, 60,    x,   R10G10B10A2_UNORM_SRGB)
    SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R10G10B10A2_UINT)
    SF( Y,  Y,  x,  x,  x,  Y,  Y,  x,  x,    x,   R10G10B10_SNORM_A2_UNORM)
-   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x, 60,    x,   R8G8B8A8_UNORM)
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x, 60,   90,   R8G8B8A8_UNORM)
    SF( Y,  Y,  x,  x,  Y,  Y,  x,  x, 60,    x,   R8G8B8A8_UNORM_SRGB)
-   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x,    x,   R8G8B8A8_SNORM)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R8G8B8A8_SINT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R8G8B8A8_UINT)
-   SF( Y,  Y,  x,  x,  Y, 45,  Y,  x,  x,    x,   R16G16_UNORM)
-   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x,    x,   R16G16_SNORM)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R16G16_SINT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,    x,   R16G16_UINT)
-   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x,    x,   R16G16_FLOAT)
+   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x,   90,   R8G8B8A8_SNORM)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,   90,   R8G8B8A8_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,   90,   R8G8B8A8_UINT)
+   SF( Y,  Y,  x,  x,  Y, 45,  Y,  x,  x,   90,   R16G16_UNORM)
+   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x,   90,   R16G16_SNORM)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,   90,   R16G16_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,   90,   R16G16_UINT)
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x,   90,   R16G16_FLOAT)
    SF( Y,  Y,  x,  x,  Y,  Y,  x,  x, 60,    x,   B10G10R10A2_UNORM)
    SF( Y,  Y,  x,  x,  Y,  Y,  x,  x, 60,    x,   B10G10R10A2_UNORM_SRGB)
    SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x,    x,   R11G11B10_FLOAT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,    x,   R32_SINT)
-   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,    x,   R32_UINT)
-   SF( Y, 50,  Y,  x,  Y,  Y,  Y,  Y,  x,    x,   R32_FLOAT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,   90,   R32_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,   90,   R32_UINT)
+   SF( Y, 50,  Y,  x,  Y,  Y,  Y,  Y,  x,   90,   R32_FLOAT)
    SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x,    x,   R24_UNORM_X8_TYPELESS)
    SF( Y,  x,  x,  x,  x,  x,  x,  x,  x,    x,   X24_TYPELESS_G8_UINT)
    SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   L16A16_UNORM)
@@ -167,7 +167,7 @@ const struct surface_format_info surface_formats[] = {
    SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x,    x,   I32_FLOAT)
    SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x,    x,   L32_FLOAT)
    SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x,    x,   A32_FLOAT)
-   SF( Y,  Y,  x,  Y,  x,  x,  x,  x, 60,    x,   B8G8R8X8_UNORM)
+   SF( Y,  Y,  x,  Y,  x,  x,  x,  x, 60,   90,   B8G8R8X8_UNORM)
    SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   B8G8R8X8_UNORM_SRGB)
    SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   R8G8B8X8_UNORM)
    SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x,    x,   R8G8B8X8_UNORM_SRGB)
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index 69098583357..e81b64629f8 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -222,6 +222,7 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
    int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
    unsigned tiling_mode, pitch;
    const unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode);
+   const uint32_t surf_type = translate_tex_target(target);
 
    if (mt->format == MESA_FORMAT_S_UINT8) {
       tiling_mode = GEN8_SURFACE_TILING_W;
@@ -245,9 +246,14 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
        */
       if (brw->gen >= 9 || mt->num_samples == 1)
          assert(mt->halign == 16);
+
+      if (brw->gen >= 9) {
+         assert(mt->num_samples > 1 ||
+                brw_losslessly_compressible_format(brw, surf_type));
+      }
+
    }
 
-   const uint32_t surf_type = translate_tex_target(target);
    uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index);
 
    surf[0] = SET_FIELD(surf_type, BRW_SURFACE_TYPE) |

From c4edc048c6f6877461a9d9dc07142640f380f340 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 13 Oct 2015 20:50:22 -0700
Subject: [PATCH 186/335] i965/meta/gen9: Individually fast clear color
 attachments

The impetus for this patch comes from a seemingly benign statement within the
spec (quoted within the patch).

It is very important for clearing multiple color buffer attachments and can be
observed in the following piglit tests:
spec/arb_framebuffer_object/fbo-drawbuffers-none glclear
spec/ext_framebuffer_multisample/blit-multiple-render-targets 0

v2: Doing the framebuffer binding only once (Chad)
Directly use the renderbuffers from the mt (Chad)

v3: Patch from Neil whose feedback I originally missed.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Chad Versace <chad.versace@intel.com>
Reviewed-by: Neil Roberts <neil@linux.intel.com>
---
 .../drivers/dri/i965/brw_meta_fast_clear.c    | 78 +++++++++++++++----
 1 file changed, 65 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index 938e028f58d..7bf68194b71 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -427,6 +427,55 @@ use_rectlist(struct brw_context *brw, bool enable)
    brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
 }
 
+/**
+ * Individually fast clear each color buffer attachment. On previous gens this
+ * isn't required. The motivation for this comes from one line (which seems to
+ * be specific to SKL+). The list item is in section titled _MCS Buffer for
+ * Render Target(s)_
+ *
+ *   "Since only one RT is bound with a clear pass, only one RT can be cleared
+ *   at a time. To clear multiple RTs, multiple clear passes are required."
+ *
+ * The code follows the same idea as the resolve code which creates a fake FBO
+ * to avoid interfering with too much of the GL state.
+ */
+static void
+fast_clear_attachments(struct brw_context *brw,
+                       struct gl_framebuffer *fb,
+                       uint32_t fast_clear_buffers,
+                       struct rect fast_clear_rect)
+{
+   assert(brw->gen >= 9);
+   struct gl_context *ctx = &brw->ctx;
+
+   brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
+
+   /* SKL+ also has a resolve mode for compressed render targets and thus more
+    * bits to let us select the type of resolve.  For fast clear resolves, it
+    * turns out we can use the same value as pre-SKL though.
+    */
+   set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE);
+
+   while (fast_clear_buffers) {
+      int index = ffs(fast_clear_buffers) - 1;
+
+      fast_clear_buffers &= ~(1 << index);
+
+      _mesa_meta_drawbuffers_from_bitfield(1 << index);
+
+      brw_draw_rectlist(ctx, &fast_clear_rect, MAX2(1, fb->MaxNumLayers));
+
+      /* Now set the mcs we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll
+       * resolve them eventually.
+       */
+      struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[0];
+      struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+      irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
+   }
+
+   set_fast_clear_op(brw, 0);
+}
+
 bool
 brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
                     GLbitfield buffers, bool partial_clear)
@@ -609,12 +658,27 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
    use_rectlist(brw, true);
 
    layers = MAX2(1, fb->MaxNumLayers);
-   if (fast_clear_buffers) {
+
+   if (brw->gen >= 9 && fast_clear_buffers) {
+      fast_clear_attachments(brw, fb, fast_clear_buffers, fast_clear_rect);
+   } else if (fast_clear_buffers) {
       _mesa_meta_drawbuffers_from_bitfield(fast_clear_buffers);
       brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
       set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE);
       brw_draw_rectlist(ctx, &fast_clear_rect, layers);
       set_fast_clear_op(brw, 0);
+
+      /* Now set the mcs we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll
+       * resolve them eventually.
+       */
+      for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) {
+         struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf];
+         struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+         int index = fb->_ColorDrawBufferIndexes[buf];
+
+         if ((1 << index) & fast_clear_buffers)
+            irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
+      }
    }
 
    if (rep_clear_buffers) {
@@ -623,18 +687,6 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
       brw_draw_rectlist(ctx, &clear_rect, layers);
    }
 
-   /* Now set the mts we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll
-    * resolve them eventually.
-    */
-   for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) {
-      struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf];
-      struct intel_renderbuffer *irb = intel_renderbuffer(rb);
-      int index = fb->_ColorDrawBufferIndexes[buf];
-
-      if ((1 << index) & fast_clear_buffers)
-         irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
-   }
-
  bail_to_meta:
    /* Dirty _NEW_BUFFERS so we reemit SURFACE_STATE which sets the fast clear
     * color before resolve and sets irb->mt->fast_clear_state to UNRESOLVED if

From f838e53c70e387232f89bd235bcee582894dc257 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 13 Oct 2015 20:50:23 -0700
Subject: [PATCH 187/335] Revert "i965/gen9: Disable MCS for 1x color surfaces"

This reverts commit dcd59a9e322edeea74187bcad65a8e56c0bfaaa2.

Reviewed-by: Neil Roberts <neil@linux.intel.com>
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 056cdb68b32..87e01366932 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -208,14 +208,6 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
    if (brw->gen < 7)
       return false;
 
-   if (brw->gen >= 9) {
-      /* FINISHME: Enable singlesample fast MCS clears on SKL after all GPU
-       * FINISHME: hangs are resolved.
-       */
-      perf_debug("singlesample fast MCS clears disabled on gen9");
-      return false;
-   }
-
    if (mt->disable_aux_buffers)
       return false;
 

From 7c690da29c9eb78f1f61c0aed6582065b8ff9775 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 13 Oct 2015 20:50:24 -0700
Subject: [PATCH 188/335] Revert "i965/gen9: Enable rep clears on gen9"

This reverts commit 8a0c85b25853decb4a110b6d36d79c4f095d437b.

It's not a strict revert because I don't want to bring back the gen < 9 check at
this point in time.

Reviewed-by: Neil Roberts <neil@linux.intel.com>
---
 src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index 7bf68194b71..f3c256d11db 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -525,11 +525,6 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
       if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_NO_MCS)
          clear_type = REP_CLEAR;
 
-      if (brw->gen >= 9 && clear_type == FAST_CLEAR) {
-         perf_debug("fast MCS clears are disabled on gen9");
-         clear_type = REP_CLEAR;
-      }
-
       /* We can't do scissored fast clears because of the restrictions on the
        * fast clear rectangle size.
        */

From 0288f92e7b0ce5f0d821f2d0ddef522a23776ecb Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 13 Oct 2015 20:50:27 -0700
Subject: [PATCH 189/335] i965/gen9: Support fast clears for 32b float

SKL supports the ability to do fast clears and resolves of 32b RGBA as both
integer and floats. This patch only enables float color clears because we
haven't yet enabled integer color clears, (HW support for that was added in
BDW).

v2: Remove LUMINANCE16F and INTENSITY16F special cases since they are now
handled by Neil's patch to disable MSAA fast clears.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Neil Roberts <neil@linux.intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 8 ++++++--
 src/mesa/drivers/dri/i965/gen8_surface_state.c  | 8 --------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index f3c256d11db..499daba3b00 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -359,8 +359,12 @@ is_color_fast_clear_compatible(struct brw_context *brw,
    }
 
    for (int i = 0; i < 4; i++) {
-      if (color->f[i] != 0.0f && color->f[i] != 1.0f &&
-          _mesa_format_has_color_component(format, i)) {
+      if (!_mesa_format_has_color_component(format, i)) {
+         continue;
+      }
+
+      if (brw->gen < 9 &&
+          color->f[i] != 0.0f && color->f[i] != 1.0f) {
          return false;
       }
    }
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index e81b64629f8..9cdd1c71b4d 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -188,14 +188,6 @@ gen8_emit_fast_clear_color(struct brw_context *brw,
                            uint32_t *surf)
 {
    if (brw->gen >= 9) {
-#define check_fast_clear_val(x) \
-      assert(mt->gen9_fast_clear_color.f[x] == 0.0 || \
-             mt->gen9_fast_clear_color.f[x] == 1.0)
-      check_fast_clear_val(0);
-      check_fast_clear_val(1);
-      check_fast_clear_val(2);
-      check_fast_clear_val(3);
-#undef check_fast_clear_val
       surf[12] = mt->gen9_fast_clear_color.ui[0];
       surf[13] = mt->gen9_fast_clear_color.ui[1];
       surf[14] = mt->gen9_fast_clear_color.ui[2];

From aede8ca9a79cafa7d019a16d38e6ee6bfc557100 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Tue, 10 Nov 2015 01:40:00 +0100
Subject: [PATCH 190/335] nv50: expose two groups of compute-related MP perf
 counters

This turns on GL_AMD_performance_monitor.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Acked-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/nv50/nv50_query.c | 52 +++++++++++++++++++
 src/gallium/drivers/nouveau/nv50/nv50_query.h |  6 +++
 .../nouveau/nv50/nv50_query_hw_metric.c       |  2 +-
 .../drivers/nouveau/nv50/nv50_query_hw_sm.c   |  2 +-
 .../drivers/nouveau/nv50/nv50_screen.c        |  1 +
 .../drivers/nouveau/nv50/nv50_screen.h        |  2 +
 6 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 6b3e49a25a9..4cd3b615606 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -27,6 +27,8 @@
 #include "nv50/nv50_context.h"
 #include "nv50/nv50_query.h"
 #include "nv50/nv50_query_hw.h"
+#include "nv50/nv50_query_hw_metric.h"
+#include "nv50/nv50_query_hw_sm.h"
 
 static struct pipe_query *
 nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
@@ -178,3 +180,53 @@ nv50_screen_get_driver_query_info(struct pipe_screen *pscreen,
 
    return nv50_hw_get_driver_query_info(screen, id, info);
 }
+
+int
+nv50_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
+                                        unsigned id,
+                                        struct pipe_driver_query_group_info *info)
+{
+   struct nv50_screen *screen = nv50_screen(pscreen);
+   int count = 0;
+
+   if (screen->compute)
+      if (screen->base.class_3d >= NV84_3D_CLASS)
+         count += 2;
+
+   if (!info)
+      return count;
+
+   if (id == NV50_HW_SM_QUERY_GROUP) {
+      if (screen->compute) {
+         if (screen->base.class_3d >= NV84_3D_CLASS) {
+            info->name = "MP counters";
+
+            /* Because we can't expose the number of hardware counters needed
+             * for each different query, we don't want to allow more than one
+             * active query simultaneously to avoid failure when the maximum
+             * number of counters is reached. Note that these groups of GPU
+             * counters are currently only used by AMD_performance_monitor.
+             */
+            info->max_active_queries = 1;
+            info->num_queries = NV50_HW_SM_QUERY_COUNT;
+            return 1;
+         }
+      }
+   } else
+   if (id == NV50_HW_METRIC_QUERY_GROUP) {
+      if (screen->compute) {
+         if (screen->base.class_3d >= NV84_3D_CLASS) {
+            info->name = "Performance metrics";
+            info->max_active_queries = 1;
+            info->num_queries = NV50_HW_METRIC_QUERY_COUNT;
+            return 1;
+         }
+      }
+   }
+
+   /* user asked for info about non-existing query group */
+   info->name = "this_is_not_the_query_group_you_are_looking_for";
+   info->max_active_queries = 0;
+   info->num_queries = 0;
+   return 0;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h b/src/gallium/drivers/nouveau/nv50/nv50_query.h
index d990285c857..bd4c0a386f6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h
@@ -28,6 +28,12 @@ nv50_query(struct pipe_query *pipe)
    return (struct nv50_query *)pipe;
 }
 
+/*
+ * Driver queries groups:
+ */
+#define NV50_HW_SM_QUERY_GROUP       0
+#define NV50_HW_METRIC_QUERY_GROUP   1
+
 void nv50_init_query_functions(struct nv50_context *);
 
 #endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c
index 13dad30f113..d1bccb94193 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c
@@ -198,7 +198,7 @@ nv50_hw_metric_get_driver_query_info(struct nv50_screen *screen, unsigned id,
          if (screen->base.class_3d >= NV84_3D_CLASS) {
             info->name = nv50_hw_metric_names[id];
             info->query_type = NV50_HW_METRIC_QUERY(id);
-            info->group_id = -1;
+            info->group_id = NV50_HW_METRIC_QUERY_GROUP;
             return 1;
          }
       }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
index e75b428fb12..8453ce76095 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
@@ -408,7 +408,7 @@ nv50_hw_sm_get_driver_query_info(struct nv50_screen *screen, unsigned id,
          if (screen->base.class_3d >= NV84_3D_CLASS) {
             info->name = nv50_hw_sm_query_names[id];
             info->query_type = NV50_HW_SM_QUERY(id);
-            info->group_id = -1;
+            info->group_id = NV50_HW_SM_QUERY_GROUP;
             return 1;
          }
       }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index cc7984d307b..1e4b75f18e0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -790,6 +790,7 @@ nv50_screen_create(struct nouveau_device *dev)
    pscreen->get_paramf = nv50_screen_get_paramf;
    pscreen->get_compute_param = nv50_screen_get_compute_param;
    pscreen->get_driver_query_info = nv50_screen_get_driver_query_info;
+   pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info;
 
    nv50_screen_init_resource_functions(pscreen);
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index c2a16d8bd1d..2a4983d1020 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -117,6 +117,8 @@ nv50_screen(struct pipe_screen *screen)
 
 int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned,
                                       struct pipe_driver_query_info *);
+int nv50_screen_get_driver_query_group_info(struct pipe_screen *, unsigned,
+                                            struct pipe_driver_query_group_info *);
 
 bool nv50_blitter_create(struct nv50_screen *);
 void nv50_blitter_destroy(struct nv50_screen *);

From f57285c8fc8c3128fc07a59bc0d56645f7e0ef18 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Tue, 10 Nov 2015 01:08:28 +0100
Subject: [PATCH 191/335] docs: mark GL_AMD_performance_monitor for nv50

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Acked-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 docs/relnotes/11.1.0.html | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index d317bfbbddb..04c7f7f9a8f 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers.
 
 <ul>
 <li>OpenGL 3.1 support on freedreno (a3xx, a4xx)</li>
+<li>GL_AMD_performance_monitor on nv50</li>
 <li>GL_ARB_arrays_of_arrays on i965</li>
 <li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
 <li>GL_ARB_clear_texture on nv50, nvc0</li>

From de8f0c9ab99ac6140f6560e776a42a22eeff6721 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Sat, 14 Nov 2015 12:22:49 +1100
Subject: [PATCH 192/335] glsl: add process_qualifier_constant() helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For now this just validates that a qualifier is inside its
minimum boundary, in a later patch we will expand it to
evaluate compile time constants.

Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index a9b1c0ed34b..334561d4eb6 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2504,6 +2504,23 @@ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state,
    }
 }
 
+static bool
+process_qualifier_constant(struct _mesa_glsl_parse_state *state,
+                           YYLTYPE *loc,
+                           const char *qual_indentifier,
+                           int qual_value,
+                           unsigned *value)
+{
+   if (qual_value < 0) {
+      _mesa_glsl_error(loc, state, "%s layout qualifier is invalid (%d < 0)",
+                       qual_indentifier, qual_value);
+      return false;
+   }
+
+   *value = (unsigned) qual_value;
+   return true;
+}
+
 static bool
 validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
                            YYLTYPE *loc,

From d1f23545a1416ffa476a3685e39380701cbfc4fd Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Fri, 13 Nov 2015 15:10:57 +1100
Subject: [PATCH 193/335] glsl: move location layout qualifier validation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We are moving this out of the parser in preparation for compile
time constant support.

Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 22 ++++++++++++++--------
 src/glsl/glsl_parser.yy |  8 +-------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 334561d4eb6..f0f2d52d811 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2674,13 +2674,19 @@ apply_explicit_location(const struct ast_type_qualifier *qual,
 {
    bool fail = false;
 
+   unsigned qual_location;
+   if (!process_qualifier_constant(state, loc, "location", qual->location,
+                                   &qual_location)) {
+      return;
+   }
+
    /* Checks for GL_ARB_explicit_uniform_location. */
    if (qual->flags.q.uniform) {
       if (!state->check_explicit_uniform_location_allowed(loc, var))
          return;
 
       const struct gl_context *const ctx = state->ctx;
-      unsigned max_loc = qual->location + var->type->uniform_locations() - 1;
+      unsigned max_loc = qual_location + var->type->uniform_locations() - 1;
 
       if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) {
          _mesa_glsl_error(loc, state, "location(s) consumed by uniform %s "
@@ -2690,7 +2696,7 @@ apply_explicit_location(const struct ast_type_qualifier *qual,
       }
 
       var->data.explicit_location = true;
-      var->data.location = qual->location;
+      var->data.location = qual_location;
       return;
    }
 
@@ -2775,23 +2781,23 @@ apply_explicit_location(const struct ast_type_qualifier *qual,
       switch (state->stage) {
       case MESA_SHADER_VERTEX:
          var->data.location = (var->data.mode == ir_var_shader_in)
-            ? (qual->location + VERT_ATTRIB_GENERIC0)
-            : (qual->location + VARYING_SLOT_VAR0);
+            ? (qual_location + VERT_ATTRIB_GENERIC0)
+            : (qual_location + VARYING_SLOT_VAR0);
          break;
 
       case MESA_SHADER_TESS_CTRL:
       case MESA_SHADER_TESS_EVAL:
       case MESA_SHADER_GEOMETRY:
          if (var->data.patch)
-            var->data.location = qual->location + VARYING_SLOT_PATCH0;
+            var->data.location = qual_location + VARYING_SLOT_PATCH0;
          else
-            var->data.location = qual->location + VARYING_SLOT_VAR0;
+            var->data.location = qual_location + VARYING_SLOT_VAR0;
          break;
 
       case MESA_SHADER_FRAGMENT:
          var->data.location = (var->data.mode == ir_var_shader_out)
-            ? (qual->location + FRAG_RESULT_DATA0)
-            : (qual->location + VARYING_SLOT_VAR0);
+            ? (qual_location + FRAG_RESULT_DATA0)
+            : (qual_location + VARYING_SLOT_VAR0);
          break;
       case MESA_SHADER_COMPUTE:
          assert(!"Unexpected shader type");
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 403cbd1564a..d2d5058befe 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -1466,13 +1466,7 @@ layout_qualifier_id:
                                "GL_ARB_explicit_attrib_location layout "
                                "identifier `%s' used", $1);
          }
-
-         if ($3 >= 0) {
-            $$.location = $3;
-         } else {
-             _mesa_glsl_error(& @3, state, "invalid location %d specified", $3);
-             YYERROR;
-         }
+         $$.location = $3;
       }
 
       if (match_layout_qualifier("index", $1, state) == 0) {

From 1d87d6f9ca543631b2bc30ac8d82b6a23159fb55 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Fri, 13 Nov 2015 15:16:20 +1100
Subject: [PATCH 194/335] glsl: remove duplicate validation for index layout
 qualifier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The minimum value for index is validated in apply_explicit_location()
and we want to remove validation from the parser so we can add
compile time constant support.

Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/glsl_parser.yy | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index d2d5058befe..a96b18087b8 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -1476,13 +1476,7 @@ layout_qualifier_id:
          }
 
          $$.flags.q.explicit_index = 1;
-
-         if ($3 >= 0) {
-            $$.index = $3;
-         } else {
-            _mesa_glsl_error(& @3, state, "invalid index %d specified", $3);
-            YYERROR;
-         }
+         $$.index = $3;
       }
 
       if ((state->has_420pack() ||

From efa34e4a1d09c6f140fba7ff339a989ea079e212 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Sat, 14 Nov 2015 13:09:46 +1100
Subject: [PATCH 195/335] glsl: replace index layout min boundary check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use new helper that will in a later patch allow for
compile time constants.

Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index f0f2d52d811..fde3df5d9b6 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2804,7 +2804,10 @@ apply_explicit_location(const struct ast_type_qualifier *qual,
          break;
       }
 
-      if (qual->flags.q.explicit_index) {
+      unsigned qual_index;
+      if (qual->flags.q.explicit_index &&
+          process_qualifier_constant(state, loc, "index", qual->index,
+                                     &qual_index)) {
          /* From the GLSL 4.30 specification, section 4.4.2 (Output
           * Layout Qualifiers):
           *
@@ -2814,12 +2817,12 @@ apply_explicit_location(const struct ast_type_qualifier *qual,
           * Older specifications don't mandate a behavior; we take
           * this as a clarification and always generate the error.
           */
-         if (qual->index < 0 || qual->index > 1) {
+         if (qual_index > 1) {
             _mesa_glsl_error(loc, state,
                              "explicit index may only be 0 or 1");
          } else {
             var->data.explicit_index = true;
-            var->data.index = qual->index;
+            var->data.index = qual_index;
          }
       }
    }

From 17e224e8ec9c190fb856a60a22d8e19b8f20837e Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Fri, 13 Nov 2015 18:47:55 +1100
Subject: [PATCH 196/335] glsl: move stream layout qualifier validation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We are moving this out of the parser in preparation for compile
time constant support.

The reason a validation function is used rather than an apply
function like what is used with bindings is because glsl allows
streams to be defined on members of blocks even though they must
match the stream thats associated with the current block, this
means we need access to the value after validation to do this
comparision.

V2: Fix typo in comment (Emil)

Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 44 ++++++++++++++++++++++++++++++-----------
 src/glsl/glsl_parser.yy | 11 ++---------
 2 files changed, 35 insertions(+), 20 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index fde3df5d9b6..7104aa0a633 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -3034,7 +3034,11 @@ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
 
    if (state->stage == MESA_SHADER_GEOMETRY &&
        qual->flags.q.out && qual->flags.q.stream) {
-      var->data.stream = qual->stream;
+      unsigned qual_stream;
+      if (process_qualifier_constant(state, loc, "stream", qual->stream,
+                                     &qual_stream)) {
+         var->data.stream = qual_stream;
+      }
    }
 
    if (var->type->contains_atomic()) {
@@ -6080,7 +6084,8 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
                                           enum glsl_matrix_layout matrix_layout,
                                           bool allow_reserved_names,
                                           ir_variable_mode var_mode,
-                                          ast_type_qualifier *layout)
+                                          ast_type_qualifier *layout,
+                                          unsigned block_stream)
 {
    unsigned decl_count = 0;
 
@@ -6188,11 +6193,16 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
        *   the specified stream must match the stream associated with the
        *   containing block."
        */
-      if (qual->flags.q.explicit_stream &&
-          qual->stream != layout->stream) {
-         _mesa_glsl_error(&loc, state, "stream layout qualifier on interface "
-                          "block member does not match the interface block "
-                          "(%d vs %d)", qual->stream, layout->stream);
+      if (qual->flags.q.explicit_stream) {
+         unsigned qual_stream;
+         if (process_qualifier_constant(state, &loc, "stream",
+                                        qual->stream, &qual_stream) &&
+             qual_stream != block_stream) {
+            _mesa_glsl_error(&loc, state, "stream layout qualifier on "
+                             "interface block member does not match "
+                             "the interface block (%d vs %d)", qual->stream,
+                             block_stream);
+         }
       }
 
       if (qual->flags.q.uniform && qual->has_interpolation()) {
@@ -6350,7 +6360,8 @@ ast_struct_specifier::hir(exec_list *instructions,
                                                 GLSL_MATRIX_LAYOUT_INHERITED,
                                                 false /* allow_reserved_names */,
                                                 ir_var_auto,
-                                                NULL);
+                                                NULL,
+                                                0 /* for interface only */);
 
    validate_identifier(this->name, loc, state);
 
@@ -6504,6 +6515,16 @@ ast_interface_block::hir(exec_list *instructions,
                        "Interface block sets both readonly and writeonly");
    }
 
+   unsigned qual_stream;
+   if (!process_qualifier_constant(state, &loc, "stream", this->layout.stream,
+                                   &qual_stream)) {
+      /* If the stream qualifier is invalid it doesn't make sense to continue
+       * on and try to compare stream layouts on member variables against it
+       * so just return early.
+       */
+      return NULL;
+   }
+
    unsigned int num_variables =
       ast_process_struct_or_iface_block_members(&declared_variables,
                                                 state,
@@ -6513,7 +6534,8 @@ ast_interface_block::hir(exec_list *instructions,
                                                 matrix_layout,
                                                 redeclaring_per_vertex,
                                                 var_mode,
-                                                &this->layout);
+                                                &this->layout,
+                                                qual_stream);
 
    state->struct_specifier_depth--;
 
@@ -6859,7 +6881,7 @@ ast_interface_block::hir(exec_list *instructions,
          var->data.explicit_binding = this->layout.flags.q.explicit_binding;
          var->data.binding = this->layout.binding;
 
-         var->data.stream = this->layout.stream;
+         var->data.stream = qual_stream;
 
          state->symbols->add_variable(var);
          instructions->push_tail(var);
@@ -6879,7 +6901,7 @@ ast_interface_block::hir(exec_list *instructions,
          var->data.centroid = fields[i].centroid;
          var->data.sample = fields[i].sample;
          var->data.patch = fields[i].patch;
-         var->data.stream = this->layout.stream;
+         var->data.stream = qual_stream;
          var->init_interface_type(block_type);
 
          if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index a96b18087b8..b4a1652a14c 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -1514,15 +1514,8 @@ layout_qualifier_id:
          if (match_layout_qualifier("stream", $1, state) == 0 &&
              state->check_explicit_attrib_stream_allowed(& @3)) {
             $$.flags.q.stream = 1;
-
-            if ($3 < 0) {
-               _mesa_glsl_error(& @3, state,
-                                "invalid stream %d specified", $3);
-               YYERROR;
-            } else {
-               $$.flags.q.explicit_stream = 1;
-               $$.stream = $3;
-            }
+            $$.flags.q.explicit_stream = 1;
+            $$.stream = $3;
          }
       }
 

From db3c36aedfa2e92c2cf1c17a096c1b5e7cd51c42 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Sat, 14 Nov 2015 14:32:38 +1100
Subject: [PATCH 197/335] glsl: move stream layout max validation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This validation is moved later so we can validate the
max value when compile time constant support is added in a
later patch.

Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 21 +++++++++++++++++++--
 src/glsl/ast_type.cpp   | 14 --------------
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 7104aa0a633..bb0db7992e5 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2521,6 +2521,21 @@ process_qualifier_constant(struct _mesa_glsl_parse_state *state,
    return true;
 }
 
+static bool
+validate_stream_qualifier(YYLTYPE *loc, struct _mesa_glsl_parse_state *state,
+                          unsigned stream)
+{
+   if (stream >= state->ctx->Const.MaxVertexStreams) {
+      _mesa_glsl_error(loc, state,
+                       "invalid stream specified %d is larger than "
+                       "MAX_VERTEX_STREAMS - 1 (%d).",
+                       stream, state->ctx->Const.MaxVertexStreams - 1);
+      return false;
+   }
+
+   return true;
+}
+
 static bool
 validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
                            YYLTYPE *loc,
@@ -3036,7 +3051,8 @@ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
        qual->flags.q.out && qual->flags.q.stream) {
       unsigned qual_stream;
       if (process_qualifier_constant(state, loc, "stream", qual->stream,
-                                     &qual_stream)) {
+                                     &qual_stream) &&
+          validate_stream_qualifier(loc, state, qual_stream)) {
          var->data.stream = qual_stream;
       }
    }
@@ -6517,7 +6533,8 @@ ast_interface_block::hir(exec_list *instructions,
 
    unsigned qual_stream;
    if (!process_qualifier_constant(state, &loc, "stream", this->layout.stream,
-                                   &qual_stream)) {
+                                   &qual_stream) ||
+       !validate_stream_qualifier(&loc, state, qual_stream)) {
       /* If the stream qualifier is invalid it doesn't make sense to continue
        * on and try to compare stream layouts on member variables against it
        * so just return early.
diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
index 79134c19893..b107051e32c 100644
--- a/src/glsl/ast_type.cpp
+++ b/src/glsl/ast_type.cpp
@@ -190,20 +190,6 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
 
    if (state->stage == MESA_SHADER_GEOMETRY &&
        state->has_explicit_attrib_stream()) {
-      if (q.flags.q.stream && q.stream >= state->ctx->Const.MaxVertexStreams) {
-         _mesa_glsl_error(loc, state,
-                          "`stream' value is larger than MAX_VERTEX_STREAMS - 1 "
-                          "(%d > %d)",
-                          q.stream, state->ctx->Const.MaxVertexStreams - 1);
-      }
-      if (this->flags.q.explicit_stream &&
-          this->stream >= state->ctx->Const.MaxVertexStreams) {
-         _mesa_glsl_error(loc, state,
-                          "`stream' value is larger than MAX_VERTEX_STREAMS - 1 "
-                          "(%d > %d)",
-                          this->stream, state->ctx->Const.MaxVertexStreams - 1);
-      }
-
       if (!this->flags.q.explicit_stream) {
          if (q.flags.q.stream) {
             this->flags.q.stream = 1;

From 64710db66461e5ccfaf7667971bc5e513b9ce547 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Sun, 15 Nov 2015 00:55:29 +1100
Subject: [PATCH 198/335] glsl: encapsulate binding validation and setting

This change moves the binding layout handing code into an apply
function to be consistent with other helper functions in the ast
code, and to encapsulate the code so that when we introduce
compile time constants the code will be much cleaner.

One small downside is for unnamed interface blocks we will now
be revalidating the binding for each member its applied to.
However this seems a small sacrifice in order to have code which
is readable.

We also remove the incorrect comment in the named interface code
about propagating bindings to members which seems to have been
copied from the unnamed interface code.

Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 60 +++++++++++++++++++----------------------
 1 file changed, 28 insertions(+), 32 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index bb0db7992e5..75928b036c0 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2536,22 +2536,23 @@ validate_stream_qualifier(YYLTYPE *loc, struct _mesa_glsl_parse_state *state,
    return true;
 }
 
-static bool
-validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
-                           YYLTYPE *loc,
-                           const glsl_type *type,
-                           const ast_type_qualifier *qual)
+static void
+apply_explicit_binding(struct _mesa_glsl_parse_state *state,
+                       YYLTYPE *loc,
+                       ir_variable *var,
+                       const glsl_type *type,
+                       const ast_type_qualifier *qual)
 {
    if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
       _mesa_glsl_error(loc, state,
                        "the \"binding\" qualifier only applies to uniforms and "
                        "shader storage buffer objects");
-      return false;
+      return;
    }
 
    if (qual->binding < 0) {
       _mesa_glsl_error(loc, state, "binding values must be >= 0");
-      return false;
+      return;
    }
 
    const struct gl_context *const ctx = state->ctx;
@@ -2576,7 +2577,7 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
                           "the maximum number of UBO binding points (%d)",
                           qual->binding, elements,
                           ctx->Const.MaxUniformBufferBindings);
-         return false;
+         return;
       }
 
       /* SSBOs. From page 67 of the GLSL 4.30 specification:
@@ -2594,7 +2595,7 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
                           "the maximum number of SSBO binding points (%d)",
                           qual->binding, elements,
                           ctx->Const.MaxShaderStorageBufferBindings);
-         return false;
+         return;
       }
    } else if (base_type->is_sampler()) {
       /* Samplers.  From page 63 of the GLSL 4.20 specification:
@@ -2611,7 +2612,7 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
                           "exceeds the maximum number of texture image units "
                           "(%d)", qual->binding, elements, limit);
 
-         return false;
+         return;
       }
    } else if (base_type->contains_atomic()) {
       assert(ctx->Const.MaxAtomicBufferBindings <= MAX_COMBINED_ATOMIC_BUFFERS);
@@ -2621,7 +2622,7 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
                           "(%d)", qual->binding,
                           ctx->Const.MaxAtomicBufferBindings);
 
-         return false;
+         return;
       }
    } else if (state->is_version(420, 310) && base_type->is_image()) {
       assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS);
@@ -2629,17 +2630,20 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
          _mesa_glsl_error(loc, state, "Image binding %d exceeds the "
                           " maximum number of image units (%d)", max_index,
                           ctx->Const.MaxImageUnits);
-         return false;
+         return;
       }
 
    } else {
       _mesa_glsl_error(loc, state,
                        "the \"binding\" qualifier only applies to uniform "
                        "blocks, opaque variables, or arrays thereof");
-      return false;
+      return;
    }
 
-   return true;
+   var->data.explicit_binding = true;
+   var->data.binding = qual->binding;
+
+   return;
 }
 
 
@@ -3041,10 +3045,8 @@ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
       _mesa_glsl_error(loc, state, "explicit index requires explicit location");
    }
 
-   if (qual->flags.q.explicit_binding &&
-       validate_binding_qualifier(state, loc, var->type, qual)) {
-      var->data.explicit_binding = true;
-      var->data.binding = qual->binding;
+   if (qual->flags.q.explicit_binding) {
+      apply_explicit_binding(state, loc, var, var->type, qual);
    }
 
    if (state->stage == MESA_SHADER_GEOMETRY &&
@@ -6694,8 +6696,6 @@ ast_interface_block::hir(exec_list *instructions,
                                         num_variables,
                                         packing,
                                         this->block_name);
-   if (this->layout.flags.q.explicit_binding)
-      validate_binding_qualifier(state, &loc, block_type, &this->layout);
 
    if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) {
       YYLTYPE loc = this->get_location();
@@ -6826,10 +6826,6 @@ ast_interface_block::hir(exec_list *instructions,
                              "not allowed");
          }
 
-         if (this->layout.flags.q.explicit_binding)
-            validate_binding_qualifier(state, &loc, block_array_type,
-                                       &this->layout);
-
          var = new(state) ir_variable(block_array_type,
                                       this->instance_name,
                                       var_mode);
@@ -6891,12 +6887,10 @@ ast_interface_block::hir(exec_list *instructions,
          earlier->reinit_interface_type(block_type);
          delete var;
       } else {
-         /* Propagate the "binding" keyword into this UBO's fields;
-          * the UBO declaration itself doesn't get an ir_variable unless it
-          * has an instance name.  This is ugly.
-          */
-         var->data.explicit_binding = this->layout.flags.q.explicit_binding;
-         var->data.binding = this->layout.binding;
+         if (this->layout.flags.q.explicit_binding) {
+            apply_explicit_binding(state, &loc, var,
+                                   var->get_interface_type(), &this->layout);
+         }
 
          var->data.stream = qual_stream;
 
@@ -6975,8 +6969,10 @@ ast_interface_block::hir(exec_list *instructions,
           * The UBO declaration itself doesn't get an ir_variable unless it
           * has an instance name.  This is ugly.
           */
-         var->data.explicit_binding = this->layout.flags.q.explicit_binding;
-         var->data.binding = this->layout.binding;
+         if (this->layout.flags.q.explicit_binding) {
+            apply_explicit_binding(state, &loc, var,
+                                   var->get_interface_type(), &this->layout);
+         }
 
          if (var->type->is_unsized_array()) {
             if (var->is_in_shader_storage_block()) {

From e74fe2a844e301e8774f2c7e6b142956cc980d4a Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Sun, 15 Nov 2015 00:42:44 +1100
Subject: [PATCH 199/335] glsl: replace binding layout min boundary check

Use new helper that will in a later patch allow for
compile time constants.

Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 75928b036c0..5f70a184917 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2550,14 +2550,15 @@ apply_explicit_binding(struct _mesa_glsl_parse_state *state,
       return;
    }
 
-   if (qual->binding < 0) {
-      _mesa_glsl_error(loc, state, "binding values must be >= 0");
+   unsigned qual_binding;
+   if (!process_qualifier_constant(state, loc, "binding", qual->binding,
+                                   &qual_binding)) {
       return;
    }
 
    const struct gl_context *const ctx = state->ctx;
    unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1;
-   unsigned max_index = qual->binding + elements - 1;
+   unsigned max_index = qual_binding + elements - 1;
    const glsl_type *base_type = type->without_array();
 
    if (base_type->is_interface()) {
@@ -2573,9 +2574,9 @@ apply_explicit_binding(struct _mesa_glsl_parse_state *state,
        */
       if (qual->flags.q.uniform &&
          max_index >= ctx->Const.MaxUniformBufferBindings) {
-         _mesa_glsl_error(loc, state, "layout(binding = %d) for %d UBOs exceeds "
+         _mesa_glsl_error(loc, state, "layout(binding = %u) for %d UBOs exceeds "
                           "the maximum number of UBO binding points (%d)",
-                          qual->binding, elements,
+                          qual_binding, elements,
                           ctx->Const.MaxUniformBufferBindings);
          return;
       }
@@ -2591,9 +2592,9 @@ apply_explicit_binding(struct _mesa_glsl_parse_state *state,
        */
       if (qual->flags.q.buffer &&
          max_index >= ctx->Const.MaxShaderStorageBufferBindings) {
-         _mesa_glsl_error(loc, state, "layout(binding = %d) for %d SSBOs exceeds "
+         _mesa_glsl_error(loc, state, "layout(binding = %u) for %d SSBOs exceeds "
                           "the maximum number of SSBO binding points (%d)",
-                          qual->binding, elements,
+                          qual_binding, elements,
                           ctx->Const.MaxShaderStorageBufferBindings);
          return;
       }
@@ -2610,16 +2611,16 @@ apply_explicit_binding(struct _mesa_glsl_parse_state *state,
       if (max_index >= limit) {
          _mesa_glsl_error(loc, state, "layout(binding = %d) for %d samplers "
                           "exceeds the maximum number of texture image units "
-                          "(%d)", qual->binding, elements, limit);
+                          "(%u)", qual_binding, elements, limit);
 
          return;
       }
    } else if (base_type->contains_atomic()) {
       assert(ctx->Const.MaxAtomicBufferBindings <= MAX_COMBINED_ATOMIC_BUFFERS);
-      if (unsigned(qual->binding) >= ctx->Const.MaxAtomicBufferBindings) {
+      if (qual_binding >= ctx->Const.MaxAtomicBufferBindings) {
          _mesa_glsl_error(loc, state, "layout(binding = %d) exceeds the "
                           " maximum number of atomic counter buffer bindings"
-                          "(%d)", qual->binding,
+                          "(%u)", qual_binding,
                           ctx->Const.MaxAtomicBufferBindings);
 
          return;
@@ -2641,7 +2642,7 @@ apply_explicit_binding(struct _mesa_glsl_parse_state *state,
    }
 
    var->data.explicit_binding = true;
-   var->data.binding = qual->binding;
+   var->data.binding = qual_binding;
 
    return;
 }

From 4196af4ce7cdb0217a7cc6e196b1a788d32c5b6f Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Sat, 14 Nov 2015 14:05:30 +1100
Subject: [PATCH 200/335] glsl: call set_shader_inout_layout() earlier

This will allow us to add error checking to this function
in a later patch, if we don't move it the error messages
will go missing.

Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/glsl_parser_extras.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index c54dcfdab7b..13a3c941e6a 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -1799,6 +1799,9 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
    if (shader->InfoLog)
       ralloc_free(shader->InfoLog);
 
+   if (!state->error)
+      set_shader_inout_layout(shader, state);
+
    shader->symbols = new(shader->ir) glsl_symbol_table;
    shader->CompileStatus = !state->error;
    shader->InfoLog = state->info_log;
@@ -1806,9 +1809,6 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
    shader->IsES = state->es_shader;
    shader->uses_builtin_functions = state->uses_builtin_functions;
 
-   if (!state->error)
-      set_shader_inout_layout(shader, state);
-
    /* Retain any live IR, but trash the rest. */
    reparent_ir(shader->ir, shader->ir);
 

From 0954b813a3a356b5836f4169783b8c8c58ff2158 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Mon, 9 Nov 2015 07:48:46 +1100
Subject: [PATCH 201/335] glsl: add new type for compile time constants

In this patch we introduce a new ast type for holding the new
compile-time constant expressions. The main reason for this is that
we can no longer do merging of layout qualifiers before they have been
converted into GLSL IR so we need to store them to be proccessed later.

The new type has two helper functions:

- process_qualifier_constant()

 Used to merge and then evaluate qualifier expressions

- merge_qualifier()

 Simply appends a qualifier to a list to be merged later by
 process_qualifier_constant()

In order to avoid cascading error messages the process_qualifier_constant()
helpers return a bool

Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast.h        | 20 +++++++++++++++
 src/glsl/ast_type.cpp | 60 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+)

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index ae763424f71..ca493401d7c 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -350,6 +350,26 @@ public:
    exec_list array_dimensions;
 };
 
+class ast_layout_expression : public ast_node {
+public:
+   ast_layout_expression(const struct YYLTYPE &locp, ast_expression *expr)
+   {
+      set_location(locp);
+      layout_const_expressions.push_tail(&expr->link);
+   }
+
+   bool process_qualifier_constant(struct _mesa_glsl_parse_state *state,
+                                   const char *qual_indentifier,
+                                   unsigned *value, bool can_be_zero);
+
+   void merge_qualifier(ast_layout_expression *l_expr)
+   {
+      layout_const_expressions.append_list(&l_expr->layout_const_expressions);
+   }
+
+   exec_list layout_const_expressions;
+};
+
 /**
  * C-style aggregate initialization class
  *
diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
index b107051e32c..89ab8eaa7ff 100644
--- a/src/glsl/ast_type.cpp
+++ b/src/glsl/ast_type.cpp
@@ -467,3 +467,63 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
 
    return true;
 }
+
+bool
+ast_layout_expression::process_qualifier_constant(struct _mesa_glsl_parse_state *state,
+                                                  const char *qual_indentifier,
+                                                  unsigned *value,
+                                                  bool can_be_zero)
+{
+   int min_value = 0;
+   bool first_pass = true;
+   *value = 0;
+
+   if (!can_be_zero)
+      min_value = 1;
+
+   for (exec_node *node = layout_const_expressions.head;
+           !node->is_tail_sentinel(); node = node->next) {
+
+      exec_list dummy_instructions;
+      ast_node *const_expression = exec_node_data(ast_node, node, link);
+
+      ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state);
+
+      ir_constant *const const_int = ir->constant_expression_value();
+      if (const_int == NULL || !const_int->type->is_integer()) {
+         YYLTYPE loc = const_expression->get_location();
+         _mesa_glsl_error(&loc, state, "%s must be an integral constant "
+                          "expression", qual_indentifier);
+         return false;
+      }
+
+      if (const_int->value.i[0] < min_value) {
+         YYLTYPE loc = const_expression->get_location();
+         _mesa_glsl_error(&loc, state, "%s layout qualifier is invalid "
+                          "(%d < %d)", qual_indentifier,
+                          const_int->value.i[0], min_value);
+         return false;
+      }
+
+      if (!first_pass && *value != const_int->value.u[0]) {
+         YYLTYPE loc = const_expression->get_location();
+         _mesa_glsl_error(&loc, state, "%s layout qualifier does not "
+		          "match previous declaration (%d vs %d)",
+                          qual_indentifier, *value, const_int->value.i[0]);
+         return false;
+      } else {
+         first_pass = false;
+         *value = const_int->value.u[0];
+      }
+
+      /* If the location is const (and we've verified that
+       * it is) then no instructions should have been emitted
+       * when we converted it to HIR. If they were emitted,
+       * then either the location isn't const after all, or
+       * we are emitting unnecessary instructions.
+       */
+      assert(dummy_instructions.is_empty());
+   }
+
+   return true;
+}

From 02d2ab23786a0f4ef635914801da97faf577197a Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Sat, 14 Nov 2015 15:13:28 +1100
Subject: [PATCH 202/335] glsl: add support for complie-time constant
 expressions

This patch replaces the old interger constant qualifiers with either
the new ast_layout_expression type if the qualifier requires merging
or ast_expression if the qualifier can't have mulitple declarations
or if all but the newest qualifier is simply ignored.

We also update the process_qualifier_constant() helper to be
similar to the one in the ast_layout_expression class, but in
this case it will be used to process the ast_expression qualifiers.

Global shader layout qualifier validation is moved out of the parser
in this change as we now need to evaluate any constant expression
before doing the validation.

V2: Fix minimum value check for vertices (Emil)

Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast.h                  |  33 ++++----
 src/glsl/ast_to_hir.cpp         | 128 +++++++++++++++++++++++---------
 src/glsl/ast_type.cpp           |  69 ++++++-----------
 src/glsl/glsl_parser.yy         |  87 +++++++---------------
 src/glsl/glsl_parser_extras.cpp |  44 +++++++++--
 5 files changed, 196 insertions(+), 165 deletions(-)

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index ca493401d7c..dfb036dde7e 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -578,7 +578,7 @@ struct ast_type_qualifier {
    unsigned precision:2;
 
    /** Geometry shader invocations for GL_ARB_gpu_shader5. */
-   int invocations;
+   ast_layout_expression *invocations;
 
    /**
     * Location specified via GL_ARB_explicit_attrib_location layout
@@ -586,20 +586,20 @@ struct ast_type_qualifier {
     * \note
     * This field is only valid if \c explicit_location is set.
     */
-   int location;
+   ast_expression *location;
    /**
     * Index specified via GL_ARB_explicit_attrib_location layout
     *
     * \note
     * This field is only valid if \c explicit_index is set.
     */
-   int index;
+   ast_expression *index;
 
    /** Maximum output vertices in GLSL 1.50 geometry shaders. */
-   int max_vertices;
+   ast_layout_expression *max_vertices;
 
    /** Stream in GLSL 1.50 geometry shaders. */
-   unsigned stream;
+   ast_expression *stream;
 
    /**
     * Input or output primitive type in GLSL 1.50 geometry shaders
@@ -613,7 +613,7 @@ struct ast_type_qualifier {
     * \note
     * This field is only valid if \c explicit_binding is set.
     */
-   int binding;
+   ast_expression *binding;
 
    /**
     * Offset specified via GL_ARB_shader_atomic_counter's "offset"
@@ -622,14 +622,14 @@ struct ast_type_qualifier {
     * \note
     * This field is only valid if \c explicit_offset is set.
     */
-   int offset;
+   ast_expression *offset;
 
    /**
     * Local size specified via GL_ARB_compute_shader's "local_size_{x,y,z}"
     * layout qualifier.  Element i of this array is only valid if
     * flags.q.local_size & (1 << i) is set.
     */
-   int local_size[3];
+   ast_layout_expression *local_size[3];
 
    /** Tessellation evaluation shader: vertex spacing (equal, fractional even/odd) */
    GLenum vertex_spacing;
@@ -641,7 +641,7 @@ struct ast_type_qualifier {
    bool point_mode;
 
    /** Tessellation control shader: number of output vertices */
-   int vertices;
+   ast_layout_expression *vertices;
 
    /**
     * Image format specified with an ARB_shader_image_load_store
@@ -1113,17 +1113,13 @@ public:
 class ast_tcs_output_layout : public ast_node
 {
 public:
-   ast_tcs_output_layout(const struct YYLTYPE &locp, int vertices)
-      : vertices(vertices)
+   ast_tcs_output_layout(const struct YYLTYPE &locp)
    {
       set_location(locp);
    }
 
    virtual ir_rvalue *hir(exec_list *instructions,
                           struct _mesa_glsl_parse_state *state);
-
-private:
-   const int vertices;
 };
 
 
@@ -1155,9 +1151,12 @@ private:
 class ast_cs_input_layout : public ast_node
 {
 public:
-   ast_cs_input_layout(const struct YYLTYPE &locp, const unsigned *local_size)
+   ast_cs_input_layout(const struct YYLTYPE &locp,
+                       ast_layout_expression **local_size)
    {
-      memcpy(this->local_size, local_size, sizeof(this->local_size));
+      for (int i = 0; i < 3; i++) {
+         this->local_size[i] = local_size[i];
+      }
       set_location(locp);
    }
 
@@ -1165,7 +1164,7 @@ public:
                           struct _mesa_glsl_parse_state *state);
 
 private:
-   unsigned local_size[3];
+   ast_layout_expression *local_size[3];
 };
 
 /*@}*/
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 5f70a184917..db8c450b587 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2508,16 +2508,40 @@ static bool
 process_qualifier_constant(struct _mesa_glsl_parse_state *state,
                            YYLTYPE *loc,
                            const char *qual_indentifier,
-                           int qual_value,
+                           ast_expression *const_expression,
                            unsigned *value)
 {
-   if (qual_value < 0) {
-      _mesa_glsl_error(loc, state, "%s layout qualifier is invalid (%d < 0)",
-                       qual_indentifier, qual_value);
+   exec_list dummy_instructions;
+
+   if (const_expression == NULL) {
+      *value = 0;
+      return true;
+   }
+
+   ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state);
+
+   ir_constant *const const_int = ir->constant_expression_value();
+   if (const_int == NULL || !const_int->type->is_integer()) {
+      _mesa_glsl_error(loc, state, "%s must be an integral constant "
+                       "expression", qual_indentifier);
       return false;
    }
 
-   *value = (unsigned) qual_value;
+   if (const_int->value.i[0] < 0) {
+      _mesa_glsl_error(loc, state, "%s layout qualifier is invalid (%d < 0)",
+                       qual_indentifier, const_int->value.u[0]);
+      return false;
+   }
+
+   /* If the location is const (and we've verified that
+    * it is) then no instructions should have been emitted
+    * when we converted it to HIR. If they were emitted,
+    * then either the location isn't const after all, or
+    * we are emitting unnecessary instructions.
+    */
+   assert(dummy_instructions.is_empty());
+
+   *value = const_int->value.u[0];
    return true;
 }
 
@@ -3845,7 +3869,17 @@ handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state,
    unsigned num_vertices = 0;
 
    if (state->tcs_output_vertices_specified) {
-      num_vertices = state->out_qualifier->vertices;
+      if (!state->out_qualifier->vertices->
+             process_qualifier_constant(state, "vertices",
+                                        &num_vertices, false)) {
+         return;
+      }
+
+      if (num_vertices > state->Const.MaxPatchVertices) {
+         _mesa_glsl_error(&loc, state, "vertices (%d) exceeds "
+                          "GL_MAX_PATCH_VERTICES", num_vertices);
+         return;
+      }
    }
 
    if (!var->type->is_array() && !var->data.patch) {
@@ -4079,9 +4113,18 @@ ast_declarator_list::hir(exec_list *instructions,
     */
    if (decl_type && decl_type->contains_atomic()) {
       if (type->qualifier.flags.q.explicit_binding &&
-          type->qualifier.flags.q.explicit_offset)
-         state->atomic_counter_offsets[type->qualifier.binding] =
-            type->qualifier.offset;
+          type->qualifier.flags.q.explicit_offset) {
+         unsigned qual_binding;
+         unsigned qual_offset;
+         if (process_qualifier_constant(state, &loc, "binding",
+                                        type->qualifier.binding,
+                                        &qual_binding)
+             && process_qualifier_constant(state, &loc, "offset",
+                                        type->qualifier.offset,
+                                        &qual_offset)) {
+            state->atomic_counter_offsets[qual_binding] = qual_offset;
+         }
+      }
    }
 
    if (this->declarations.is_empty()) {
@@ -7055,22 +7098,18 @@ ast_tcs_output_layout::hir(exec_list *instructions,
 {
    YYLTYPE loc = this->get_location();
 
-   /* If any tessellation control output layout declaration preceded this
-    * one, make sure it was consistent with this one.
-    */
-   if (state->tcs_output_vertices_specified &&
-       state->out_qualifier->vertices != this->vertices) {
-      _mesa_glsl_error(&loc, state,
-		       "tessellation control shader output layout does not "
-		       "match previous declaration");
-      return NULL;
+   unsigned num_vertices;
+   if (!state->out_qualifier->vertices->
+          process_qualifier_constant(state, "vertices", &num_vertices,
+                                     false)) {
+      /* return here to stop cascading incorrect error messages */
+     return NULL;
    }
 
    /* If any shader outputs occurred before this declaration and specified an
     * array size, make sure the size they specified is consistent with the
     * primitive type.
     */
-   unsigned num_vertices = this->vertices;
    if (state->tcs_output_size != 0 && state->tcs_output_size != num_vertices) {
       _mesa_glsl_error(&loc, state,
 		       "this tessellation control shader output layout "
@@ -7178,20 +7217,6 @@ ast_cs_input_layout::hir(exec_list *instructions,
 {
    YYLTYPE loc = this->get_location();
 
-   /* If any compute input layout declaration preceded this one, make sure it
-    * was consistent with this one.
-    */
-   if (state->cs_input_local_size_specified) {
-      for (int i = 0; i < 3; i++) {
-         if (state->cs_input_local_size[i] != this->local_size[i]) {
-            _mesa_glsl_error(&loc, state,
-                             "compute shader input layout does not match"
-                             " previous declaration");
-            return NULL;
-         }
-      }
-   }
-
    /* From the ARB_compute_shader specification:
     *
     *     If the local size of the shader in any dimension is greater
@@ -7204,15 +7229,30 @@ ast_cs_input_layout::hir(exec_list *instructions,
     * report it at compile time as well.
     */
    GLuint64 total_invocations = 1;
+   unsigned qual_local_size[3];
    for (int i = 0; i < 3; i++) {
-      if (this->local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) {
+
+      char *local_size_str = ralloc_asprintf(NULL, "invalid local_size_%c",
+                                             'x' + i);
+      /* Infer a local_size of 1 for unspecified dimensions */
+      if (this->local_size[i] == NULL) {
+         qual_local_size[i] = 1;
+      } else if (!this->local_size[i]->
+             process_qualifier_constant(state, local_size_str,
+                                        &qual_local_size[i], false)) {
+         ralloc_free(local_size_str);
+         return NULL;
+      }
+      ralloc_free(local_size_str);
+
+      if (qual_local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) {
          _mesa_glsl_error(&loc, state,
                           "local_size_%c exceeds MAX_COMPUTE_WORK_GROUP_SIZE"
                           " (%d)", 'x' + i,
                           state->ctx->Const.MaxComputeWorkGroupSize[i]);
          break;
       }
-      total_invocations *= this->local_size[i];
+      total_invocations *= qual_local_size[i];
       if (total_invocations >
           state->ctx->Const.MaxComputeWorkGroupInvocations) {
          _mesa_glsl_error(&loc, state,
@@ -7223,9 +7263,23 @@ ast_cs_input_layout::hir(exec_list *instructions,
       }
    }
 
+   /* If any compute input layout declaration preceded this one, make sure it
+    * was consistent with this one.
+    */
+   if (state->cs_input_local_size_specified) {
+      for (int i = 0; i < 3; i++) {
+         if (state->cs_input_local_size[i] != qual_local_size[i]) {
+            _mesa_glsl_error(&loc, state,
+                             "compute shader input layout does not match"
+                             " previous declaration");
+            return NULL;
+         }
+      }
+   }
+
    state->cs_input_local_size_specified = true;
    for (int i = 0; i < 3; i++)
-      state->cs_input_local_size[i] = this->local_size[i];
+      state->cs_input_local_size[i] = qual_local_size[i];
 
    /* We may now declare the built-in constant gl_WorkGroupSize (see
     * builtin_variable_generator::generate_constants() for why we didn't
@@ -7240,7 +7294,7 @@ ast_cs_input_layout::hir(exec_list *instructions,
    ir_constant_data data;
    memset(&data, 0, sizeof(data));
    for (int i = 0; i < 3; i++)
-      data.u[i] = this->local_size[i];
+      data.u[i] = qual_local_size[i];
    var->constant_value = new(var) ir_constant(glsl_type::uvec3_type, &data);
    var->constant_initializer =
       new(var) ir_constant(glsl_type::uvec3_type, &data);
diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
index 89ab8eaa7ff..1e89a9e76b9 100644
--- a/src/glsl/ast_type.cpp
+++ b/src/glsl/ast_type.cpp
@@ -169,23 +169,19 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
    }
 
    if (q.flags.q.max_vertices) {
-      if (this->flags.q.max_vertices && this->max_vertices != q.max_vertices) {
-	 _mesa_glsl_error(loc, state,
-			  "geometry shader set conflicting max_vertices "
-			  "(%d and %d)", this->max_vertices, q.max_vertices);
-	 return false;
+      if (this->max_vertices) {
+         this->max_vertices->merge_qualifier(q.max_vertices);
+      } else {
+         this->max_vertices = q.max_vertices;
       }
-      this->max_vertices = q.max_vertices;
    }
 
    if (q.flags.q.invocations) {
-      if (this->flags.q.invocations && this->invocations != q.invocations) {
-         _mesa_glsl_error(loc, state,
-                          "geometry shader set conflicting invocations "
-                          "(%d and %d)", this->invocations, q.invocations);
-         return false;
+      if (this->invocations) {
+         this->invocations->merge_qualifier(q.invocations);
+      } else {
+         this->invocations = q.invocations;
       }
-      this->invocations = q.invocations;
    }
 
    if (state->stage == MESA_SHADER_GEOMETRY &&
@@ -208,14 +204,11 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
    }
 
    if (q.flags.q.vertices) {
-      if (this->flags.q.vertices && this->vertices != q.vertices) {
-	 _mesa_glsl_error(loc, state,
-			  "tessellation control shader set conflicting "
-			  "vertices (%d and %d)",
-			  this->vertices, q.vertices);
-	 return false;
+      if (this->vertices) {
+         this->vertices->merge_qualifier(q.vertices);
+      } else {
+         this->vertices = q.vertices;
       }
-      this->vertices = q.vertices;
    }
 
    if (q.flags.q.vertex_spacing) {
@@ -252,15 +245,11 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
 
    for (int i = 0; i < 3; i++) {
       if (q.flags.q.local_size & (1 << i)) {
-         if ((this->flags.q.local_size & (1 << i)) &&
-             this->local_size[i] != q.local_size[i]) {
-            _mesa_glsl_error(loc, state,
-                             "compute shader set conflicting values for "
-                             "local_size_%c (%d and %d)", 'x' + i,
-                             this->local_size[i], q.local_size[i]);
-            return false;
+         if (this->local_size[i]) {
+            this->local_size[i]->merge_qualifier(q.local_size[i]);
+         } else {
+            this->local_size[i] = q.local_size[i];
          }
-         this->local_size[i] = q.local_size[i];
       }
    }
 
@@ -299,7 +288,7 @@ ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc,
    const bool r = this->merge_qualifier(loc, state, q);
 
    if (state->stage == MESA_SHADER_TESS_CTRL) {
-      node = new(mem_ctx) ast_tcs_output_layout(*loc, q.vertices);
+      node = new(mem_ctx) ast_tcs_output_layout(*loc);
    }
 
    return r;
@@ -403,15 +392,13 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
       state->in_qualifier->prim_type = q.prim_type;
    }
 
-   if (this->flags.q.invocations &&
-       q.flags.q.invocations &&
-       this->invocations != q.invocations) {
-      _mesa_glsl_error(loc, state,
-                       "conflicting invocations counts specified");
-      return false;
-   } else if (q.flags.q.invocations) {
+   if (q.flags.q.invocations) {
       this->flags.q.invocations = 1;
-      this->invocations = q.invocations;
+      if (this->invocations) {
+         this->invocations->merge_qualifier(q.invocations);
+      } else {
+         this->invocations = q.invocations;
+      }
    }
 
    if (q.flags.q.early_fragment_tests) {
@@ -454,15 +441,7 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
    if (create_gs_ast) {
       node = new(mem_ctx) ast_gs_input_layout(*loc, q.prim_type);
    } else if (create_cs_ast) {
-      /* Infer a local_size of 1 for every unspecified dimension */
-      unsigned local_size[3];
-      for (int i = 0; i < 3; i++) {
-         if (q.flags.q.local_size & (1 << i))
-            local_size[i] = q.local_size[i];
-         else
-            local_size[i] = 1;
-      }
-      node = new(mem_ctx) ast_cs_input_layout(*loc, local_size);
+      node = new(mem_ctx) ast_cs_input_layout(*loc, q.local_size);
    }
 
    return true;
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index b4a1652a14c..5a8f98019d1 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -298,7 +298,6 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
 %type <node> conditionopt
 %type <node> for_init_statement
 %type <for_rest_statement> for_rest_statement
-%type <n> integer_constant
 %type <node> layout_defaults
 
 %right THEN ELSE
@@ -1152,11 +1151,6 @@ layout_qualifier_id_list:
    }
    ;
 
-integer_constant:
-   INTCONSTANT { $$ = $1; }
-   | UINTCONSTANT { $$ = $1; }
-   ;
-
 layout_qualifier_id:
    any_identifier
    {
@@ -1453,9 +1447,18 @@ layout_qualifier_id:
          YYERROR;
       }
    }
-   | any_identifier '=' integer_constant
+   | any_identifier '=' constant_expression
    {
       memset(& $$, 0, sizeof($$));
+      void *ctx = state;
+
+      if ($3->oper != ast_int_constant &&
+          $3->oper != ast_uint_constant &&
+          !state->has_enhanced_layouts()) {
+         _mesa_glsl_error(& @1, state,
+                          "compile-time constant expressions require "
+                          "GLSL 4.40 or ARB_enhanced_layouts");
+      }
 
       if (match_layout_qualifier("location", $1, state) == 0) {
          $$.flags.q.explicit_location = 1;
@@ -1495,18 +1498,11 @@ layout_qualifier_id:
 
       if (match_layout_qualifier("max_vertices", $1, state) == 0) {
          $$.flags.q.max_vertices = 1;
-
-         if ($3 < 0) {
+         $$.max_vertices = new(ctx) ast_layout_expression(@1, $3);
+         if (!state->is_version(150, 0)) {
             _mesa_glsl_error(& @3, state,
-                             "invalid max_vertices %d specified", $3);
-            YYERROR;
-         } else {
-            $$.max_vertices = $3;
-            if (!state->is_version(150, 0)) {
-               _mesa_glsl_error(& @3, state,
-                                "#version 150 max_vertices qualifier "
-                                "specified", $3);
-            }
+                             "#version 150 max_vertices qualifier "
+                             "specified", $3);
          }
       }
 
@@ -1527,12 +1523,7 @@ layout_qualifier_id:
       for (int i = 0; i < 3; i++) {
          if (match_layout_qualifier(local_size_qualifiers[i], $1,
                                     state) == 0) {
-            if ($3 <= 0) {
-               _mesa_glsl_error(& @3, state,
-                                "invalid %s of %d specified",
-                                local_size_qualifiers[i], $3);
-               YYERROR;
-            } else if (!state->has_compute_shader()) {
+            if (!state->has_compute_shader()) {
                _mesa_glsl_error(& @3, state,
                                 "%s qualifier requires GLSL 4.30 or "
                                 "GLSL ES 3.10 or ARB_compute_shader",
@@ -1540,7 +1531,7 @@ layout_qualifier_id:
                YYERROR;
             } else {
                $$.flags.q.local_size |= (1 << i);
-               $$.local_size[i] = $3;
+               $$.local_size[i] = new(ctx) ast_layout_expression(@1, $3);
             }
             break;
          }
@@ -1548,48 +1539,24 @@ layout_qualifier_id:
 
       if (match_layout_qualifier("invocations", $1, state) == 0) {
          $$.flags.q.invocations = 1;
-
-         if ($3 <= 0) {
+         $$.invocations = new(ctx) ast_layout_expression(@1, $3);
+         if (!state->is_version(400, 0) &&
+             !state->ARB_gpu_shader5_enable) {
             _mesa_glsl_error(& @3, state,
-                             "invalid invocations %d specified", $3);
-            YYERROR;
-         } else if ($3 > MAX_GEOMETRY_SHADER_INVOCATIONS) {
-            _mesa_glsl_error(& @3, state,
-                             "invocations (%d) exceeds "
-                             "GL_MAX_GEOMETRY_SHADER_INVOCATIONS", $3);
-            YYERROR;
-         } else {
-            $$.invocations = $3;
-            if (!state->is_version(400, 0) &&
-                !state->ARB_gpu_shader5_enable) {
-               _mesa_glsl_error(& @3, state,
-                                "GL_ARB_gpu_shader5 invocations "
-                                "qualifier specified", $3);
-            }
+                             "GL_ARB_gpu_shader5 invocations "
+                             "qualifier specified", $3);
          }
       }
 
       /* Layout qualifiers for tessellation control shaders. */
       if (match_layout_qualifier("vertices", $1, state) == 0) {
          $$.flags.q.vertices = 1;
-
-         if ($3 <= 0) {
-            _mesa_glsl_error(& @3, state,
-                             "invalid vertices (%d) specified", $3);
-            YYERROR;
-         } else if ($3 > (int)state->Const.MaxPatchVertices) {
-            _mesa_glsl_error(& @3, state,
-                             "vertices (%d) exceeds "
-                             "GL_MAX_PATCH_VERTICES", $3);
-            YYERROR;
-         } else {
-            $$.vertices = $3;
-            if (!state->ARB_tessellation_shader_enable &&
-                !state->is_version(400, 0)) {
-               _mesa_glsl_error(& @1, state,
-                                "vertices qualifier requires GLSL 4.00 or "
-                                "ARB_tessellation_shader");
-            }
+         $$.vertices = new(ctx) ast_layout_expression(@1, $3);
+         if (!state->ARB_tessellation_shader_enable &&
+             !state->is_version(400, 0)) {
+            _mesa_glsl_error(& @1, state,
+                             "vertices qualifier requires GLSL 4.00 or "
+                             "ARB_tessellation_shader");
          }
       }
 
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 13a3c941e6a..84b3aca1a82 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -1648,8 +1648,20 @@ set_shader_inout_layout(struct gl_shader *shader,
    switch (shader->Stage) {
    case MESA_SHADER_TESS_CTRL:
       shader->TessCtrl.VerticesOut = 0;
-      if (state->tcs_output_vertices_specified)
-         shader->TessCtrl.VerticesOut = state->out_qualifier->vertices;
+      if (state->tcs_output_vertices_specified) {
+         unsigned vertices;
+         if (state->out_qualifier->vertices->
+               process_qualifier_constant(state, "vertices", &vertices,
+                                          false)) {
+
+            YYLTYPE loc = state->out_qualifier->vertices->get_location();
+            if (vertices > state->Const.MaxPatchVertices) {
+               _mesa_glsl_error(&loc, state, "vertices (%d) exceeds "
+                                "GL_MAX_PATCH_VERTICES", vertices);
+            }
+            shader->TessCtrl.VerticesOut = vertices;
+         }
+      }
       break;
    case MESA_SHADER_TESS_EVAL:
       shader->TessEval.PrimitiveMode = PRIM_UNKNOWN;
@@ -1670,8 +1682,14 @@ set_shader_inout_layout(struct gl_shader *shader,
       break;
    case MESA_SHADER_GEOMETRY:
       shader->Geom.VerticesOut = 0;
-      if (state->out_qualifier->flags.q.max_vertices)
-         shader->Geom.VerticesOut = state->out_qualifier->max_vertices;
+      if (state->out_qualifier->flags.q.max_vertices) {
+         unsigned qual_max_vertices;
+         if (state->out_qualifier->max_vertices->
+               process_qualifier_constant(state, "max_vertices",
+                                          &qual_max_vertices, true)) {
+            shader->Geom.VerticesOut = qual_max_vertices;
+         }
+      }
 
       if (state->gs_input_prim_type_specified) {
          shader->Geom.InputType = state->in_qualifier->prim_type;
@@ -1686,8 +1704,22 @@ set_shader_inout_layout(struct gl_shader *shader,
       }
 
       shader->Geom.Invocations = 0;
-      if (state->in_qualifier->flags.q.invocations)
-         shader->Geom.Invocations = state->in_qualifier->invocations;
+      if (state->in_qualifier->flags.q.invocations) {
+         unsigned invocations;
+         if (state->in_qualifier->invocations->
+               process_qualifier_constant(state, "invocations",
+                                          &invocations, false)) {
+
+            YYLTYPE loc = state->in_qualifier->invocations->get_location();
+            if (invocations > MAX_GEOMETRY_SHADER_INVOCATIONS) {
+               _mesa_glsl_error(&loc, state,
+                                "invocations (%d) exceeds "
+                                "GL_MAX_GEOMETRY_SHADER_INVOCATIONS",
+                                invocations);
+            }
+            shader->Geom.Invocations = invocations;
+         }
+      }
       break;
 
    case MESA_SHADER_COMPUTE:

From f7af69c350977f03c6648bdb0b21851933cb98ad Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Mon, 9 Nov 2015 09:34:40 +1100
Subject: [PATCH 203/335] glsl: add subroutine index qualifier support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ARB_explicit_uniform_location allows the index for subroutine functions
to be explicitly set in the shader.

This patch reduces the restriction on the index qualifier in
validate_layout_qualifiers() to allow it to be applied to subroutines
and adds the new subroutine qualifier validation to ast_function::hir().

ast_fully_specified_type::has_qualifiers() is updated to allow the
index qualifier on subroutine functions when explicit uniform locations
is available.

A new check is added to ast_type_qualifier::merge_qualifier() to stop
multiple function qualifiers from being defied, before this patch this
would cause a segfault.

Finally a new variable is added to ir_function_signature to store the
index. This value is validated and the non explicit values assigned in
link_assign_subroutine_types().

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/glsl/ast.h                 |  2 +-
 src/glsl/ast_to_hir.cpp        | 34 ++++++++++++++++++++++++++++++++--
 src/glsl/ast_type.cpp          | 14 +++++++++++++-
 src/glsl/ir.cpp                |  1 +
 src/glsl/ir.h                  |  2 ++
 src/glsl/ir_clone.cpp          |  1 +
 src/glsl/linker.cpp            | 33 +++++++++++++++++++++++++++++++++
 src/mesa/main/mtypes.h         |  1 +
 src/mesa/main/shader_query.cpp |  7 +++++++
 9 files changed, 91 insertions(+), 4 deletions(-)

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index dfb036dde7e..3bea63ea0ed 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -772,7 +772,7 @@ public:
 class ast_fully_specified_type : public ast_node {
 public:
    virtual void print(void) const;
-   bool has_qualifiers() const;
+   bool has_qualifiers(_mesa_glsl_parse_state *state) const;
 
    ast_fully_specified_type() : qualifier(), specifier(NULL)
    {
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index db8c450b587..df6dd9b4759 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2848,6 +2848,13 @@ apply_explicit_location(const struct ast_type_qualifier *qual,
          break;
       }
 
+      /* Check if index was set for the uniform instead of the function */
+      if (qual->flags.q.explicit_index && qual->flags.q.subroutine) {
+         _mesa_glsl_error(loc, state, "an index qualifier can only be "
+                          "used with subroutine functions");
+         return;
+      }
+
       unsigned qual_index;
       if (qual->flags.q.explicit_index &&
           process_qualifier_constant(state, loc, "index", qual->index,
@@ -3067,7 +3074,9 @@ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
    if (qual->flags.q.explicit_location) {
       apply_explicit_location(qual, var, state, loc);
    } else if (qual->flags.q.explicit_index) {
-      _mesa_glsl_error(loc, state, "explicit index requires explicit location");
+      if (!qual->flags.q.subroutine_def)
+         _mesa_glsl_error(loc, state,
+                          "explicit index requires explicit location");
    }
 
    if (qual->flags.q.explicit_binding) {
@@ -5075,7 +5084,7 @@ ast_function::hir(exec_list *instructions,
    /* From page 56 (page 62 of the PDF) of the GLSL 1.30 spec:
     * "No qualifier is allowed on the return type of a function."
     */
-   if (this->return_type->has_qualifiers()) {
+   if (this->return_type->has_qualifiers(state)) {
       YYLTYPE loc = this->get_location();
       _mesa_glsl_error(& loc, state,
                        "function `%s' return type has qualifiers", name);
@@ -5207,6 +5216,27 @@ ast_function::hir(exec_list *instructions,
    if (this->return_type->qualifier.flags.q.subroutine_def) {
       int idx;
 
+      if (this->return_type->qualifier.flags.q.explicit_index) {
+         unsigned qual_index;
+         if (process_qualifier_constant(state, &loc, "index",
+                                        this->return_type->qualifier.index,
+                                        &qual_index)) {
+            if (!state->has_explicit_uniform_location()) {
+               _mesa_glsl_error(&loc, state, "subroutine index requires "
+                                "GL_ARB_explicit_uniform_location or "
+                                "GLSL 4.30");
+            } else if (qual_index >= MAX_SUBROUTINES) {
+               _mesa_glsl_error(&loc, state,
+                                "invalid subroutine index (%d) index must "
+                                "be a number between 0 and "
+                                "GL_MAX_SUBROUTINES - 1 (%d)", qual_index,
+                                MAX_SUBROUTINES - 1);
+            } else {
+               f->subroutine_index = qual_index;
+            }
+         }
+      }
+
       f->num_subroutine_types = this->return_type->qualifier.subroutine_list->declarations.length();
       f->subroutine_types = ralloc_array(state, const struct glsl_type *,
                                          f->num_subroutine_types);
diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
index 1e89a9e76b9..03ed4dcfa2a 100644
--- a/src/glsl/ast_type.cpp
+++ b/src/glsl/ast_type.cpp
@@ -38,13 +38,16 @@ ast_type_specifier::print(void) const
 }
 
 bool
-ast_fully_specified_type::has_qualifiers() const
+ast_fully_specified_type::has_qualifiers(_mesa_glsl_parse_state *state) const
 {
    /* 'subroutine' isnt a real qualifier. */
    ast_type_qualifier subroutine_only;
    subroutine_only.flags.i = 0;
    subroutine_only.flags.q.subroutine = 1;
    subroutine_only.flags.q.subroutine_def = 1;
+   if (state->has_explicit_uniform_location()) {
+      subroutine_only.flags.q.explicit_index = 1;
+   }
    return (this->qualifier.flags.i & ~subroutine_only.flags.i) != 0;
 }
 
@@ -176,6 +179,15 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
       }
    }
 
+   if (q.flags.q.subroutine_def) {
+      if (this->flags.q.subroutine_def) {
+	 _mesa_glsl_error(loc, state,
+			  "conflicting subroutine qualifiers used");
+      } else {
+         this->subroutine_list = q.subroutine_list;
+      }
+   }
+
    if (q.flags.q.invocations) {
       if (this->invocations) {
          this->invocations->merge_qualifier(q.invocations);
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index f47100ee40f..ca520f547a1 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1846,6 +1846,7 @@ ir_function_signature::replace_parameters(exec_list *new_params)
 ir_function::ir_function(const char *name)
    : ir_instruction(ir_type_function)
 {
+   this->subroutine_index = -1;
    this->name = ralloc_strdup(this, name);
 }
 
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 39156e0e98c..e1109eec1d3 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1171,6 +1171,8 @@ public:
     */
    int num_subroutine_types;
    const struct glsl_type **subroutine_types;
+
+   int subroutine_index;
 };
 
 inline const char *ir_function_signature::function_name() const
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp
index 4484cc9d8a7..bee60a241e4 100644
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -270,6 +270,7 @@ ir_function::clone(void *mem_ctx, struct hash_table *ht) const
    ir_function *copy = new(mem_ctx) ir_function(this->name);
 
    copy->is_subroutine = this->is_subroutine;
+   copy->subroutine_index = this->subroutine_index;
    copy->num_subroutine_types = this->num_subroutine_types;
    copy->subroutine_types = ralloc_array(mem_ctx, const struct glsl_type *, copy->num_subroutine_types);
    for (int i = 0; i < copy->num_subroutine_types; i++)
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index db00f8febc6..331d9a28007 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3864,10 +3864,43 @@ link_assign_subroutine_types(struct gl_shader_program *prog)
          sh->SubroutineFunctions[sh->NumSubroutineFunctions].types =
             ralloc_array(sh, const struct glsl_type *,
                          fn->num_subroutine_types);
+
+         /* From Section 4.4.4(Subroutine Function Layout Qualifiers) of the
+          * GLSL 4.5 spec:
+          *
+          *    "Each subroutine with an index qualifier in the shader must be
+          *    given a unique index, otherwise a compile or link error will be
+          *    generated."
+          */
+         for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) {
+            if (sh->SubroutineFunctions[j].index != -1 &&
+                sh->SubroutineFunctions[j].index == fn->subroutine_index) {
+               linker_error(prog, "each subroutine index qualifier in the "
+                            "shader must be unique\n");
+               return;
+            }
+         }
+         sh->SubroutineFunctions[sh->NumSubroutineFunctions].index =
+            fn->subroutine_index;
+
          for (int j = 0; j < fn->num_subroutine_types; j++)
             sh->SubroutineFunctions[sh->NumSubroutineFunctions].types[j] = fn->subroutine_types[j];
          sh->NumSubroutineFunctions++;
       }
+
+      /* Assign index for subroutines without an explicit index*/
+      int index = 0;
+      for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) {
+         while (sh->SubroutineFunctions[j].index == -1) {
+            for (unsigned k = 0; k < sh->NumSubroutineFunctions; k++) {
+               if (sh->SubroutineFunctions[k].index == index)
+                  break;
+               else if (k == sh->NumSubroutineFunctions - 1)
+                  sh->SubroutineFunctions[j].index = index;
+            }
+            index++;
+         }
+      }
    }
 }
 
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 65276f9c56b..d425571ba1e 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2193,6 +2193,7 @@ struct gl_ati_fragment_shader_state
 struct gl_subroutine_function
 {
    char *name;
+   int index;
    int num_compat_types;
    const struct glsl_type **types;
 };
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index 14f849e0a94..79a91b5b6bd 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -661,6 +661,13 @@ _mesa_program_resource_index(struct gl_shader_program *shProg,
    switch (res->Type) {
    case GL_ATOMIC_COUNTER_BUFFER:
       return RESOURCE_ATC(res) - shProg->AtomicBuffers;
+   case GL_VERTEX_SUBROUTINE:
+   case GL_GEOMETRY_SUBROUTINE:
+   case GL_FRAGMENT_SUBROUTINE:
+   case GL_COMPUTE_SUBROUTINE:
+   case GL_TESS_CONTROL_SUBROUTINE:
+   case GL_TESS_EVALUATION_SUBROUTINE:
+      return RESOURCE_SUB(res)->index;
    case GL_UNIFORM_BLOCK:
    case GL_SHADER_STORAGE_BLOCK:
    case GL_TRANSFORM_FEEDBACK_VARYING:

From b109cd3c276e476143ae85fd48d5e43e904291dd Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Thu, 5 Nov 2015 22:17:36 +1100
Subject: [PATCH 204/335] docs: mark compile-time constant expressions as done

Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 docs/GL3.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index b768eea789a..ad6b95e992b 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -179,7 +179,7 @@ GL 4.4, GLSL 4.40:
   GL_ARB_buffer_storage                                DONE (i965, nv50, nvc0, r600, radeonsi)
   GL_ARB_clear_texture                                 DONE (i965, nv50, nvc0)
   GL_ARB_enhanced_layouts                              in progress (Timothy)
-  - compile-time constant expressions                  in progress
+  - compile-time constant expressions                  DONE
   - explicit byte offsets for blocks                   in progress
   - forced alignment within blocks                     in progress
   - specified vec4-slot component numbers              in progress

From e96d7a1489c1e4ddd66fc5c88c0fc603e1483e32 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 19 Nov 2015 10:39:09 -0700
Subject: [PATCH 205/335] svga: add some sanity check assertions in
 svga_buffer_transfer_map()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make sure y and z values of buffers are as expected.

Reviewed-by: José Fonseca <jfonseca@vmware.com>
Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/drivers/svga/svga_resource_buffer.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c
index 71f2f4f2779..449cc149a81 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -80,6 +80,11 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
    uint8_t *map;
    int64_t begin = os_time_get();
 
+   assert(box->y == 0);
+   assert(box->z == 0);
+   assert(box->height == 1);
+   assert(box->depth == 1);
+
    transfer = CALLOC_STRUCT(pipe_transfer);
    if (transfer == NULL) {
       return NULL;

From 527466d9a12e81f3bade04c5ff0258a003b48f34 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 19 Nov 2015 10:39:49 -0700
Subject: [PATCH 206/335] svga: add num-bytes-uploaded HUD query
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To graph the number of bytes uploaded to GPU per frame (vertex buffer data,
constant buffer data, texture data, etc).

Reviewed-by: José Fonseca <jfonseca@vmware.com>
Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/drivers/svga/svga_context.h           | 15 +++++++++------
 src/gallium/drivers/svga/svga_pipe_query.c        |  9 +++++++++
 .../drivers/svga/svga_resource_buffer_upload.c    |  4 ++++
 src/gallium/drivers/svga/svga_resource_texture.c  |  6 ++++++
 src/gallium/drivers/svga/svga_screen.c            |  2 ++
 5 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index bcce18a3502..6a4f9d8d076 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -51,14 +51,16 @@
 #define SVGA_QUERY_NUM_VALIDATIONS         (PIPE_QUERY_DRIVER_SPECIFIC + 3)
 #define SVGA_QUERY_MAP_BUFFER_TIME         (PIPE_QUERY_DRIVER_SPECIFIC + 4)
 #define SVGA_QUERY_NUM_RESOURCES_MAPPED    (PIPE_QUERY_DRIVER_SPECIFIC + 5)
+#define SVGA_QUERY_NUM_BYTES_UPLOADED      (PIPE_QUERY_DRIVER_SPECIFIC + 6)
+
 /* running total counters */
-#define SVGA_QUERY_MEMORY_USED             (PIPE_QUERY_DRIVER_SPECIFIC + 6)
-#define SVGA_QUERY_NUM_SHADERS             (PIPE_QUERY_DRIVER_SPECIFIC + 7)
-#define SVGA_QUERY_NUM_RESOURCES           (PIPE_QUERY_DRIVER_SPECIFIC + 8)
-#define SVGA_QUERY_NUM_STATE_OBJECTS       (PIPE_QUERY_DRIVER_SPECIFIC + 9)
-#define SVGA_QUERY_NUM_SURFACE_VIEWS       (PIPE_QUERY_DRIVER_SPECIFIC + 10)
+#define SVGA_QUERY_MEMORY_USED             (PIPE_QUERY_DRIVER_SPECIFIC + 7)
+#define SVGA_QUERY_NUM_SHADERS             (PIPE_QUERY_DRIVER_SPECIFIC + 8)
+#define SVGA_QUERY_NUM_RESOURCES           (PIPE_QUERY_DRIVER_SPECIFIC + 9)
+#define SVGA_QUERY_NUM_STATE_OBJECTS       (PIPE_QUERY_DRIVER_SPECIFIC + 10)
+#define SVGA_QUERY_NUM_SURFACE_VIEWS       (PIPE_QUERY_DRIVER_SPECIFIC + 11)
 /*SVGA_QUERY_MAX has to be last because it is size of an array*/
-#define SVGA_QUERY_MAX                     (PIPE_QUERY_DRIVER_SPECIFIC + 11)
+#define SVGA_QUERY_MAX                     (PIPE_QUERY_DRIVER_SPECIFIC + 12)
 
 /**
  * Maximum supported number of constant buffers per shader
@@ -485,6 +487,7 @@ struct svga_context
       uint64_t num_shaders;          /**< SVGA_QUERY_NUM_SHADERS */
       uint64_t num_state_objects;    /**< SVGA_QUERY_NUM_STATE_OBJECTS */
       uint64_t num_surface_views;    /**< SVGA_QUERY_NUM_SURFACE_VIEWS */
+      uint64_t num_bytes_uploaded;   /**< SVGA_QUERY_NUM_BYTES_UPLOADED */
    } hud;
 
    /** The currently bound stream output targets */
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c
index 8b9818334ca..5416a009dcb 100644
--- a/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -731,6 +731,7 @@ svga_create_query(struct pipe_context *pipe,
    case SVGA_QUERY_MAP_BUFFER_TIME:
    case SVGA_QUERY_NUM_SURFACE_VIEWS:
    case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+   case SVGA_QUERY_NUM_BYTES_UPLOADED:
       break;
    default:
       assert(!"unexpected query type in svga_create_query()");
@@ -797,6 +798,7 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
    case SVGA_QUERY_MAP_BUFFER_TIME:
    case SVGA_QUERY_NUM_SURFACE_VIEWS:
    case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+   case SVGA_QUERY_NUM_BYTES_UPLOADED:
       /* nothing */
       break;
    default:
@@ -876,6 +878,9 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
    case SVGA_QUERY_NUM_RESOURCES_MAPPED:
       sq->begin_count = svga->hud.num_resources_mapped;
       break;
+   case SVGA_QUERY_NUM_BYTES_UPLOADED:
+      sq->begin_count = svga->hud.num_bytes_uploaded;
+      break;
    case SVGA_QUERY_MEMORY_USED:
    case SVGA_QUERY_NUM_SHADERS:
    case SVGA_QUERY_NUM_RESOURCES:
@@ -966,6 +971,9 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
    case SVGA_QUERY_NUM_RESOURCES_MAPPED:
       sq->end_count = svga->hud.num_resources_mapped;
       break;
+   case SVGA_QUERY_NUM_BYTES_UPLOADED:
+      sq->end_count = svga->hud.num_bytes_uploaded;
+      break;
    case SVGA_QUERY_MEMORY_USED:
    case SVGA_QUERY_NUM_SHADERS:
    case SVGA_QUERY_NUM_RESOURCES:
@@ -1061,6 +1069,7 @@ svga_get_query_result(struct pipe_context *pipe,
    case SVGA_QUERY_NUM_FLUSHES:
    case SVGA_QUERY_NUM_VALIDATIONS:
    case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+   case SVGA_QUERY_NUM_BYTES_UPLOADED:
    case SVGA_QUERY_MAP_BUFFER_TIME:
       vresult->u64 = sq->end_count - sq->begin_count;
       break;
diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
index 69e5f75e208..8c5cff5abc1 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -429,6 +429,8 @@ svga_buffer_upload_flush(struct svga_context *svga,
 
          assert(box->x <= sbuf->b.b.width0);
          assert(box->x + box->w <= sbuf->b.b.width0);
+
+         svga->hud.num_bytes_uploaded += box->w;
       }
    }
    else {
@@ -454,6 +456,8 @@ svga_buffer_upload_flush(struct svga_context *svga,
 
          assert(box->x <= sbuf->b.b.width0);
          assert(box->x + box->w <= sbuf->b.b.width0);
+
+         svga->hud.num_bytes_uploaded += box->w;
       }
    }
 
diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c
index a02d1e495ff..81594777258 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/src/gallium/drivers/svga/svga_resource_texture.c
@@ -380,6 +380,12 @@ svga_texture_transfer_map(struct pipe_context *pipe,
       break;
    }
 
+   if (usage & PIPE_TRANSFER_WRITE) {
+      /* record texture upload for HUD */
+      svga->hud.num_bytes_uploaded +=
+         nblocksx * nblocksy * d * util_format_get_blocksize(texture->format);
+   }
+
    if (!use_direct_map) {
       /* Use a DMA buffer */
       st->hw_nblocksy = nblocksy;
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index a80bc9b9119..09a3d33552b 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -789,6 +789,8 @@ svga_get_driver_query_info(struct pipe_screen *screen,
       {"map-buffer-time", SVGA_QUERY_MAP_BUFFER_TIME, {0},
        PIPE_DRIVER_QUERY_TYPE_MICROSECONDS},
       {"num-resources-mapped", SVGA_QUERY_NUM_RESOURCES_MAPPED, {0}},
+      {"num-bytes-uploaded", SVGA_QUERY_NUM_BYTES_UPLOADED, {0},
+       PIPE_DRIVER_QUERY_TYPE_BYTES, PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE},
 
       /* running total counters */
       {"memory-used", SVGA_QUERY_MEMORY_USED, {0},

From 1def5ef95863f704ab5d1bd3bef3a31a6e461b60 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 20 Nov 2015 14:05:55 -0700
Subject: [PATCH 207/335] docs: mention GL 3.3 support for VMware driver in
 Mesa 11.1 relnotes

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 docs/relnotes/11.1.0.html | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 04c7f7f9a8f..c1d30e8e9ec 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -45,6 +45,8 @@ Note: some of the new features are only available with certain drivers.
 
 <ul>
 <li>OpenGL 3.1 support on freedreno (a3xx, a4xx)</li>
+<li>OpenGL 3.3 support for VMware guest VM driver (supported by Workstation 12
+    and Fusion 8).
 <li>GL_AMD_performance_monitor on nv50</li>
 <li>GL_ARB_arrays_of_arrays on i965</li>
 <li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>

From 47fae842d01331af5acc56ff8db37c09ceca791f Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 20 Nov 2015 10:19:16 -0700
Subject: [PATCH 208/335] mesa: update some old-style (K&R?) function pointer
 calls

Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/mesa/main/blend.c     | 4 ++--
 src/mesa/main/buffers.c   | 2 +-
 src/mesa/main/fog.c       | 2 +-
 src/mesa/main/getstring.c | 2 +-
 src/mesa/main/points.c    | 2 +-
 src/mesa/main/texenv.c    | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c
index f07552b4778..2ae22e9e691 100644
--- a/src/mesa/main/blend.c
+++ b/src/mesa/main/blend.c
@@ -404,7 +404,7 @@ _mesa_BlendEquation( GLenum mode )
    ctx->Color._BlendEquationPerBuffer = GL_FALSE;
 
    if (ctx->Driver.BlendEquationSeparate)
-      (*ctx->Driver.BlendEquationSeparate)( ctx, mode, mode );
+      ctx->Driver.BlendEquationSeparate(ctx, mode, mode);
 }
 
 
@@ -582,7 +582,7 @@ _mesa_BlendColor( GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha )
    ctx->Color.BlendColor[3] = CLAMP(tmp[3], 0.0F, 1.0F);
 
    if (ctx->Driver.BlendColor)
-      (*ctx->Driver.BlendColor)(ctx, ctx->Color.BlendColor);
+      ctx->Driver.BlendColor(ctx, ctx->Color.BlendColor);
 }
 
 
diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index 93588a2ee18..83e238ae825 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -731,7 +731,7 @@ _mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
    /* Call the device driver function only if fb is the bound read buffer */
    if (fb == ctx->ReadBuffer) {
       if (ctx->Driver.ReadBuffer)
-         (*ctx->Driver.ReadBuffer)(ctx, buffer);
+         ctx->Driver.ReadBuffer(ctx, buffer);
    }
 }
 
diff --git a/src/mesa/main/fog.c b/src/mesa/main/fog.c
index 45f343d61c8..1ad939cfde6 100644
--- a/src/mesa/main/fog.c
+++ b/src/mesa/main/fog.c
@@ -190,7 +190,7 @@ _mesa_Fogfv( GLenum pname, const GLfloat *params )
    }
 
    if (ctx->Driver.Fogfv) {
-      (*ctx->Driver.Fogfv)( ctx, pname, params );
+      ctx->Driver.Fogfv( ctx, pname, params );
    }
 
    return;
diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c
index 06ba17c92d6..87c5a3a194f 100644
--- a/src/mesa/main/getstring.c
+++ b/src/mesa/main/getstring.c
@@ -121,7 +121,7 @@ _mesa_GetString( GLenum name )
    assert(ctx->Driver.GetString);
    {
       /* Give the driver the chance to handle this query */
-      const GLubyte *str = (*ctx->Driver.GetString)(ctx, name);
+      const GLubyte *str = ctx->Driver.GetString(ctx, name);
       if (str)
          return str;
    }
diff --git a/src/mesa/main/points.c b/src/mesa/main/points.c
index 863e3c1af32..c2f2b6399cb 100644
--- a/src/mesa/main/points.c
+++ b/src/mesa/main/points.c
@@ -209,7 +209,7 @@ _mesa_PointParameterfv( GLenum pname, const GLfloat *params)
    }
 
    if (ctx->Driver.PointParameterfv)
-      (*ctx->Driver.PointParameterfv)(ctx, pname, params);
+      ctx->Driver.PointParameterfv(ctx, pname, params);
 }
 
 
diff --git a/src/mesa/main/texenv.c b/src/mesa/main/texenv.c
index 091922161c5..93c680650bb 100644
--- a/src/mesa/main/texenv.c
+++ b/src/mesa/main/texenv.c
@@ -495,7 +495,7 @@ _mesa_TexEnvfv( GLenum target, GLenum pname, const GLfloat *param )
 
    /* Tell device driver about the new texture environment */
    if (ctx->Driver.TexEnv) {
-      (*ctx->Driver.TexEnv)( ctx, target, pname, param );
+      ctx->Driver.TexEnv(ctx, target, pname, param);
    }
 }
 

From 6f4fe8e76ada4cd04f20d37825d7763fcc2b62ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Fri, 20 Nov 2015 11:46:26 +0100
Subject: [PATCH 209/335] radeon: reset query buffers for PIPE_QUERY_TIMESTAMP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since begin_query is not called for this query type, we need to reset the
query buffer state in end_query instead.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93015
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Tested-by: Andy Furniss <adf.lists@gmail.com>
Tested-by: Mathias Tillman <master.homer@gmail.com>
---
 src/gallium/drivers/radeon/r600_query.c | 26 +++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 2797bcb76b7..526be16aa89 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -654,17 +654,11 @@ static boolean r600_begin_query(struct pipe_context *ctx,
 	return rquery->ops->begin(rctx, rquery);
 }
 
-boolean r600_query_hw_begin(struct r600_common_context *rctx,
-			    struct r600_query *rquery)
+static void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
+					struct r600_query_hw *query)
 {
-	struct r600_query_hw *query = (struct r600_query_hw *)rquery;
 	struct r600_query_buffer *prev = query->buffer.previous;
 
-	if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
-		assert(0);
-		return false;
-	}
-
 	/* Discard the old query buffers. */
 	while (prev) {
 		struct r600_query_buffer *qbuf = prev;
@@ -682,6 +676,19 @@ boolean r600_query_hw_begin(struct r600_common_context *rctx,
 
 	query->buffer.results_end = 0;
 	query->buffer.previous = NULL;
+}
+
+boolean r600_query_hw_begin(struct r600_common_context *rctx,
+			    struct r600_query *rquery)
+{
+	struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+
+	if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
+		assert(0);
+		return false;
+	}
+
+	r600_query_hw_reset_buffers(rctx, query);
 
 	r600_query_hw_emit_start(rctx, query);
 
@@ -705,6 +712,9 @@ void r600_query_hw_end(struct r600_common_context *rctx,
 {
 	struct r600_query_hw *query = (struct r600_query_hw *)rquery;
 
+	if (query->flags & R600_QUERY_HW_FLAG_NO_START)
+		r600_query_hw_reset_buffers(rctx, query);
+
 	r600_query_hw_emit_stop(rctx, query);
 
 	if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))

From 5bda3d0958967b88e739302a480499d1428a0b2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nhaehnle@gmail.com>
Date: Fri, 20 Nov 2015 12:58:55 +0100
Subject: [PATCH 210/335] radeon: re-prepare query buffers on begin_query for
 predicate queries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The point of prepare_buffer is to ensure that the query buffer contains valid
initial data for conditional rendering: as long as the buffer is initialized
correctly, the GPU is able to tell whether query results have been written
already (and wait or fall back to unconditional rendering if desired).

This means prepare_buffer needs to be called again when a buffer is reused.

Conversely, for queries that cannot be used for conditional rendering
(notably pipeline statistics), we can re-use buffers immediately, and they
do not need to be initialized.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Tested-by: Andy Furniss <adf.lists@gmail.com>
---
 src/gallium/drivers/radeon/r600_query.c | 32 +++++++++++++------------
 src/gallium/drivers/radeon/r600_query.h |  1 +
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 526be16aa89..b1cfb6e462b 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -242,7 +242,7 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
 		pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM,
 				   PIPE_USAGE_STAGING, buf_size);
 
-	if (query->ops->prepare_buffer)
+	if (query->flags & R600_QUERY_HW_FLAG_PREDICATE)
 		query->ops->prepare_buffer(ctx, query, buf);
 
 	return buf;
@@ -251,15 +251,11 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
 static void r600_query_hw_prepare_buffer(struct r600_common_context *ctx,
 					 struct r600_query_hw *query,
 					 struct r600_resource *buffer)
- {
-	uint32_t *results;
-
-	if (query->b.type == PIPE_QUERY_TIME_ELAPSED ||
-	    query->b.type == PIPE_QUERY_TIMESTAMP)
-		return;
-
-	results = r600_buffer_map_sync_with_rings(ctx, buffer,
-						  PIPE_TRANSFER_WRITE);
+{
+	/* Callers ensure that the buffer is currently unused by the GPU. */
+	uint32_t *results = ctx->ws->buffer_map(buffer->cs_buf, NULL,
+						PIPE_TRANSFER_WRITE |
+						PIPE_TRANSFER_UNSYNCHRONIZED);
 
 	memset(results, 0, buffer->b.b.width0);
 
@@ -339,6 +335,7 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
 		query->result_size = 16 * rctx->max_db;
 		query->num_cs_dw_begin = 6;
 		query->num_cs_dw_end = 6;
+		query->flags |= R600_QUERY_HW_FLAG_PREDICATE;
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		query->result_size = 16;
@@ -361,6 +358,7 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
 		query->num_cs_dw_begin = 6;
 		query->num_cs_dw_end = 6;
 		query->stream = index;
+		query->flags |= R600_QUERY_HW_FLAG_PREDICATE;
 		break;
 	case PIPE_QUERY_PIPELINE_STATISTICS:
 		/* 11 values on EG, 8 on R600. */
@@ -667,11 +665,15 @@ static void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
 		FREE(qbuf);
 	}
 
-	/* Obtain a new buffer if the current one can't be mapped without a stall. */
-	if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
-	    !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
-		pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL);
-		query->buffer.buf = r600_new_query_buffer(rctx, query);
+	if (query->flags & R600_QUERY_HW_FLAG_PREDICATE) {
+		/* Obtain a new buffer if the current one can't be mapped without a stall. */
+		if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
+		    !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
+			pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL);
+			query->buffer.buf = r600_new_query_buffer(rctx, query);
+		} else {
+			query->ops->prepare_buffer(rctx, query, query->buffer.buf);
+		}
 	}
 
 	query->buffer.results_end = 0;
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index 9bd3b5d5a02..0ea5707ca45 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -71,6 +71,7 @@ struct r600_query {
 enum {
 	R600_QUERY_HW_FLAG_NO_START = (1 << 0),
 	R600_QUERY_HW_FLAG_TIMER = (1 << 1),
+	R600_QUERY_HW_FLAG_PREDICATE = (1 << 2),
 };
 
 struct r600_query_hw_ops {

From ec106e9f621cc1f8b3c1ee55aafa69d5ad159276 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 20 Nov 2015 13:45:18 -0500
Subject: [PATCH 211/335] freedreno/a4xx: fix dst_alpha blend for RGBX render
 targets

There are not native RGBX render formats, so we must manually force
dst_alpha to be one, same as for a3xx.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/fd4_blend.c | 13 +++++++++++--
 src/gallium/drivers/freedreno/a4xx/fd4_blend.h |  7 ++++++-
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c  | 17 +++++++++++++++--
 3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
index d5e823ef69d..e9a9ac19b79 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
@@ -27,6 +27,7 @@
  */
 
 #include "pipe/p_state.h"
+#include "util/u_blend.h"
 #include "util/u_string.h"
 #include "util/u_memory.h"
 
@@ -98,14 +99,22 @@ fd4_blend_state_create(struct pipe_context *pctx,
 		else
 			rt = &cso->rt[0];
 
-		so->rb_mrt[i].blend_control =
+		so->rb_mrt[i].blend_control_rgb =
 				A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
 				A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
-				A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) |
+				A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor));
+
+		so->rb_mrt[i].blend_control_alpha =
 				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
 				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) |
 				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor));
 
+		so->rb_mrt[i].blend_control_no_alpha_rgb =
+				A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_src_factor))) |
+				A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
+				A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_dst_factor)));
+
+
 		so->rb_mrt[i].control =
 				0xc00 | /* XXX ROP_CODE ?? */
 				A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
index 7620d00a625..6230fa7a50e 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
@@ -39,7 +39,12 @@ struct fd4_blend_stateobj {
 	struct {
 		uint32_t control;
 		uint32_t buf_info;
-		uint32_t blend_control;
+		/* Blend control bits for color if there is an alpha channel */
+		uint32_t blend_control_rgb;
+		/* Blend control bits for color if there is no alpha channel */
+		uint32_t blend_control_no_alpha_rgb;
+		/* Blend control bits for alpha channel */
+		uint32_t blend_control_alpha;
 	} rb_mrt[A4XX_MAX_RENDER_TARGETS];
 	uint32_t rb_fs_output;
 };
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 5a7b192f79d..99d1602d74b 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -626,11 +626,24 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		uint32_t i;
 
 		for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+			enum pipe_format format = pipe_surface_format(
+					ctx->framebuffer.cbufs[i]);
+			bool has_alpha = util_format_has_alpha(format);
+			uint32_t control = blend->rb_mrt[i].control;
+			uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha;
+
+			if (has_alpha) {
+				blend_control |= blend->rb_mrt[i].blend_control_rgb;
+			} else {
+				blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb;
+				control &= ~A4XX_RB_MRT_CONTROL_BLEND2;
+			}
+
 			OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
-			OUT_RING(ring, blend->rb_mrt[i].control);
+			OUT_RING(ring, control);
 
 			OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
-			OUT_RING(ring, blend->rb_mrt[i].blend_control);
+			OUT_RING(ring, blend_control);
 		}
 
 		OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);

From c1babbd85c4069d9d9b319d5ca95a614039ba609 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 20 Nov 2015 17:04:05 -0500
Subject: [PATCH 212/335] freedreno: always set all border colors

Instead of playing the guessing game as to which texture format reads
from which border color encoding type, just write both of them always.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../drivers/freedreno/freedreno_texture.c     | 38 ++++---------------
 1 file changed, 8 insertions(+), 30 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c
index f03b65b0ae5..f5611abaec8 100644
--- a/src/gallium/drivers/freedreno/freedreno_texture.c
+++ b/src/gallium/drivers/freedreno/freedreno_texture.c
@@ -197,37 +197,15 @@ fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr,
 					continue;
 
 				const struct util_format_channel_description *chan =
-						&desc->channel[desc->swizzle[j]];
-				int size = chan->size;
-
-				/* The Z16 texture format we use seems to look in the
-				 * 32-bit border color slots
-				 */
-				if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
-					size = 32;
-
-				/* Formats like R11G11B10 or RGB9_E5 don't specify
-				 * per-channel sizes properly.
-				 */
-				if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER)
-					size = 16;
-
-				/* We fake RGTC as if it were RGBA8 */
-				if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC)
-					size = 8;
-
-				if (chan->pure_integer && size > 16)
-					bcolor32[desc->swizzle[j] + 4] =
-							sampler->border_color.i[j];
-				else if (size > 16)
-					bcolor32[desc->swizzle[j]] =
-							fui(sampler->border_color.f[j]);
-				else if (chan->pure_integer)
-					bcolor[desc->swizzle[j] + 8] =
-							sampler->border_color.i[j];
-				else
+					&desc->channel[desc->swizzle[j]];
+				if (chan->pure_integer) {
+					bcolor32[desc->swizzle[j] + 4] = sampler->border_color.i[j];
+					bcolor[desc->swizzle[j] + 8] = sampler->border_color.i[j];
+				} else {
+					bcolor32[desc->swizzle[j]] = fui(sampler->border_color.f[j]);
 					bcolor[desc->swizzle[j]] =
-							util_float_to_half(sampler->border_color.f[j]);
+						util_float_to_half(sampler->border_color.f[j]);
+				}
 			}
 		}
 	}

From 4fd24caf92fc995e4a730181e0f179a7f2218e60 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 20 Nov 2015 16:51:07 -0500
Subject: [PATCH 213/335] ttn: add TEX2 support

This fixes CubeArrayShadow tests (where the shadow comes in via a second
arg to the TEX2 instruction).

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Rob Clark <robdclark@gmail.com>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 0539cfc16a1..1da00b2a650 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -1239,6 +1239,11 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
       op = nir_texop_tex;
       num_srcs = 1;
       break;
+   case TGSI_OPCODE_TEX2:
+      op = nir_texop_tex;
+      num_srcs = 1;
+      samp = 2;
+      break;
    case TGSI_OPCODE_TXP:
       op = nir_texop_tex;
       num_srcs = 2;
@@ -1394,10 +1399,12 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
    }
 
    if (instr->is_shadow) {
-      if (instr->coord_components < 3)
-         instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
-      else
+      if (instr->coord_components == 4)
+         instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
+      else if (instr->coord_components == 3)
          instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
+      else
+         instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
 
       instr->src[src_number].src_type = nir_tex_src_comparitor;
       src_number++;
@@ -1803,6 +1810,7 @@ ttn_emit_instruction(struct ttn_compile *c)
    case TGSI_OPCODE_TXL:
    case TGSI_OPCODE_TXB:
    case TGSI_OPCODE_TXD:
+   case TGSI_OPCODE_TEX2:
    case TGSI_OPCODE_TXL2:
    case TGSI_OPCODE_TXB2:
    case TGSI_OPCODE_TXQ_LZ:

From ff9450ecd1f7635f8917e3177f0ef18eb8f9f49b Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 20 Nov 2015 18:11:26 -0500
Subject: [PATCH 214/335] freedreno/a4xx: point regid to "red" even for
 alpha-only rb formats

Looks like a4xx hw does this in a more standard way and we don't need to
hack around it like we do on a3xx. Fixes GL_ALPHA formats in
fbo-blending-formats, fbo-colormask-formats, and fbo-alphatest-formats.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/freedreno/a4xx/fd4_program.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
index e3d5dabab4c..3df13543148 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -245,13 +245,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
 		color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
 	}
 
-	/* adjust regids for alpha output formats. there is no alpha render
-	 * format, so it's just treated like red
-	 */
-	for (i = 0; i < nr; i++)
-		if (util_format_is_alpha(pipe_surface_format(bufs[i])))
-			color_regid[i] += 3;
-
 	/* TODO get these dynamically: */
 	face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
 	coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);

From e1319dcdd6c63ce289a75cd06ac20355d48f01d7 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 20 Nov 2015 10:40:59 -0500
Subject: [PATCH 215/335] freedreno/a4xx: add 16-bit unorm/snorm format
 texturing/rendering

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h | 12 ++++
 .../drivers/freedreno/a4xx/fd4_format.c       | 58 +++++++++++--------
 2 files changed, 46 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 82c19b90b7d..cc48cdc4c61 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -52,6 +52,8 @@ enum a4xx_color_fmt {
 	RB4_R8G8_SNORM = 16,
 	RB4_R8G8_UINT = 17,
 	RB4_R8G8_SINT = 18,
+	RB4_R16_UNORM = 19,
+	RB4_R16_SNORM = 20,
 	RB4_R16_FLOAT = 21,
 	RB4_R16_UINT = 22,
 	RB4_R16_SINT = 23,
@@ -63,12 +65,16 @@ enum a4xx_color_fmt {
 	RB4_R10G10B10A2_UNORM = 31,
 	RB4_R10G10B10A2_UINT = 34,
 	RB4_R11G11B10_FLOAT = 39,
+	RB4_R16G16_UNORM = 40,
+	RB4_R16G16_SNORM = 41,
 	RB4_R16G16_FLOAT = 42,
 	RB4_R16G16_UINT = 43,
 	RB4_R16G16_SINT = 44,
 	RB4_R32_FLOAT = 45,
 	RB4_R32_UINT = 46,
 	RB4_R32_SINT = 47,
+	RB4_R16G16B16A16_UNORM = 52,
+	RB4_R16G16B16A16_SNORM = 53,
 	RB4_R16G16B16A16_FLOAT = 54,
 	RB4_R16G16B16A16_UINT = 55,
 	RB4_R16G16B16A16_SINT = 56,
@@ -172,6 +178,12 @@ enum a4xx_tex_fmt {
 	TFMT4_8_SINT = 7,
 	TFMT4_8_8_SINT = 17,
 	TFMT4_8_8_8_8_SINT = 31,
+	TFMT4_16_UNORM = 18,
+	TFMT4_16_16_UNORM = 38,
+	TFMT4_16_16_16_16_UNORM = 51,
+	TFMT4_16_SNORM = 19,
+	TFMT4_16_16_SNORM = 39,
+	TFMT4_16_16_16_16_SNORM = 52,
 	TFMT4_16_UINT = 21,
 	TFMT4_16_16_UINT = 41,
 	TFMT4_16_16_16_16_UINT = 54,
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index d109f36b53c..8eeeb96a395 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -99,20 +99,26 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	_T(S8_UINT,    8_UINT,  R8_UNORM, WZYX),
 
 	/* 16-bit */
-	V_(R16_UNORM,   16_UNORM, NONE,     WZYX),
-	V_(R16_SNORM,   16_SNORM, NONE,     WZYX),
-	VT(R16_UINT,    16_UINT,  R16_UINT, WZYX),
-	VT(R16_SINT,    16_SINT,  R16_SINT, WZYX),
-	V_(R16_USCALED, 16_UINT,  NONE,     WZYX),
-	V_(R16_SSCALED, 16_UINT,  NONE,     WZYX),
-	VT(R16_FLOAT,   16_FLOAT, R16_FLOAT,WZYX),
+	VT(R16_UNORM,   16_UNORM, R16_UNORM, WZYX),
+	VT(R16_SNORM,   16_SNORM, R16_SNORM, WZYX),
+	VT(R16_UINT,    16_UINT,  R16_UINT,  WZYX),
+	VT(R16_SINT,    16_SINT,  R16_SINT,  WZYX),
+	V_(R16_USCALED, 16_UINT,  NONE,      WZYX),
+	V_(R16_SSCALED, 16_UINT,  NONE,      WZYX),
+	VT(R16_FLOAT,   16_FLOAT, R16_FLOAT, WZYX),
 
-	_T(A16_UINT,    16_UINT,  NONE,     WZYX),
-	_T(A16_SINT,    16_SINT,  NONE,     WZYX),
-	_T(L16_UINT,    16_UINT,  NONE,     WZYX),
-	_T(L16_SINT,    16_SINT,  NONE,     WZYX),
-	_T(I16_UINT,    16_UINT,  NONE,     WZYX),
-	_T(I16_SINT,    16_SINT,  NONE,     WZYX),
+	_T(A16_UNORM,   16_UNORM, NONE,      WZYX),
+	_T(A16_SNORM,   16_SNORM, NONE,      WZYX),
+	_T(A16_UINT,    16_UINT,  NONE,      WZYX),
+	_T(A16_SINT,    16_SINT,  NONE,      WZYX),
+	_T(L16_UNORM,   16_UNORM, NONE,      WZYX),
+	_T(L16_SNORM,   16_SNORM, NONE,      WZYX),
+	_T(L16_UINT,    16_UINT,  NONE,      WZYX),
+	_T(L16_SINT,    16_SINT,  NONE,      WZYX),
+	_T(I16_UNORM,   16_UNORM, NONE,      WZYX),
+	_T(I16_SNORM,   16_SNORM, NONE,      WZYX),
+	_T(I16_UINT,    16_UINT,  NONE,      WZYX),
+	_T(I16_SINT,    16_SINT,  NONE,      WZYX),
 
 	VT(R8G8_UNORM,   8_8_UNORM, R8G8_UNORM, WZYX),
 	VT(R8G8_SNORM,   8_8_SNORM, R8G8_SNORM, WZYX),
@@ -151,16 +157,18 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	_T(I32_UINT,    32_UINT,  NONE,     WZYX),
 	_T(I32_SINT,    32_SINT,  NONE,     WZYX),
 
-	V_(R16G16_UNORM,   16_16_UNORM, NONE,        WZYX),
-	V_(R16G16_SNORM,   16_16_SNORM, NONE,        WZYX),
-	VT(R16G16_UINT,    16_16_UINT,  R16G16_UINT, WZYX),
-	VT(R16G16_SINT,    16_16_SINT,  R16G16_SINT, WZYX),
-	V_(R16G16_USCALED, 16_16_UINT,  NONE,        WZYX),
-	V_(R16G16_SSCALED, 16_16_SINT,  NONE,        WZYX),
-	VT(R16G16_FLOAT,   16_16_FLOAT, R16G16_FLOAT,WZYX),
+	VT(R16G16_UNORM,   16_16_UNORM, R16G16_UNORM, WZYX),
+	VT(R16G16_SNORM,   16_16_SNORM, R16G16_SNORM, WZYX),
+	VT(R16G16_UINT,    16_16_UINT,  R16G16_UINT,  WZYX),
+	VT(R16G16_SINT,    16_16_SINT,  R16G16_SINT,  WZYX),
+	V_(R16G16_USCALED, 16_16_UINT,  NONE,         WZYX),
+	V_(R16G16_SSCALED, 16_16_SINT,  NONE,         WZYX),
+	VT(R16G16_FLOAT,   16_16_FLOAT, R16G16_FLOAT, WZYX),
 
-	_T(L16A16_UINT,    16_16_UINT,  NONE,        WZYX),
-	_T(L16A16_SINT,    16_16_SINT,  NONE,        WZYX),
+	_T(L16A16_UNORM,   16_16_UNORM, NONE,         WZYX),
+	_T(L16A16_SNORM,   16_16_SNORM, NONE,         WZYX),
+	_T(L16A16_UINT,    16_16_UINT,  NONE,         WZYX),
+	_T(L16A16_SINT,    16_16_SINT,  NONE,         WZYX),
 
 	VT(R8G8B8A8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
 	_T(R8G8B8X8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
@@ -213,8 +221,10 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	V_(R16G16B16_FLOAT,   16_16_16_FLOAT, NONE, WZYX),
 
 	/* 64-bit */
-	V_(R16G16B16A16_UNORM,   16_16_16_16_UNORM, NONE,               WZYX),
-	V_(R16G16B16A16_SNORM,   16_16_16_16_SNORM, NONE,               WZYX),
+	VT(R16G16B16A16_UNORM,   16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX),
+	VT(R16G16B16X16_UNORM,   16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX),
+	VT(R16G16B16A16_SNORM,   16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX),
+	VT(R16G16B16X16_SNORM,   16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX),
 	VT(R16G16B16A16_UINT,    16_16_16_16_UINT,  R16G16B16A16_UINT,  WZYX),
 	_T(R16G16B16X16_UINT,    16_16_16_16_UINT,  R16G16B16A16_UINT,  WZYX),
 	VT(R16G16B16A16_SINT,    16_16_16_16_SINT,  R16G16B16A16_SINT,  WZYX),

From 3b77826cc19d7a6a7e5cc6af6552d0ba89ec2922 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 20 Nov 2015 13:27:49 -0500
Subject: [PATCH 216/335] freedreno/a4xx: logic op handling

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a3xx/a3xx.xml.h | 19 -------------------
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h |  9 +++++++--
 .../drivers/freedreno/a4xx/fd4_blend.c        | 11 +++++++----
 src/gallium/drivers/freedreno/a4xx/fd4_draw.c |  3 +--
 src/gallium/drivers/freedreno/a4xx/fd4_gmem.c |  3 +--
 .../drivers/freedreno/adreno_common.xml.h     | 19 +++++++++++++++++++
 6 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index 2e3abfc1611..a6940dfefea 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -262,25 +262,6 @@ enum a3xx_sp_perfcounter_select {
 	SP_ALU_ACTIVE_CYCLES = 29,
 };
 
-enum a3xx_rop_code {
-	ROP_CLEAR = 0,
-	ROP_NOR = 1,
-	ROP_AND_INVERTED = 2,
-	ROP_COPY_INVERTED = 3,
-	ROP_AND_REVERSE = 4,
-	ROP_INVERT = 5,
-	ROP_XOR = 6,
-	ROP_NAND = 7,
-	ROP_AND = 8,
-	ROP_EQUIV = 9,
-	ROP_NOOP = 10,
-	ROP_OR_INVERTED = 11,
-	ROP_COPY = 12,
-	ROP_OR_REVERSE = 13,
-	ROP_OR = 14,
-	ROP_SET = 15,
-};
-
 enum a3xx_rb_blend_opcode {
 	BLEND_DST_PLUS_SRC = 0,
 	BLEND_SRC_MINUS_DST = 1,
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index cc48cdc4c61..9f35654c6fe 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -418,8 +418,13 @@ static inline uint32_t REG_A4XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020a4
 #define A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE			0x00000008
 #define A4XX_RB_MRT_CONTROL_BLEND				0x00000010
 #define A4XX_RB_MRT_CONTROL_BLEND2				0x00000020
-#define A4XX_RB_MRT_CONTROL_FASTCLEAR				0x00000400
-#define A4XX_RB_MRT_CONTROL_B11					0x00000800
+#define A4XX_RB_MRT_CONTROL_ROP_ENABLE				0x00000040
+#define A4XX_RB_MRT_CONTROL_ROP_CODE__MASK			0x00000f00
+#define A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT			8
+static inline uint32_t A4XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val)
+{
+	return ((val) << A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A4XX_RB_MRT_CONTROL_ROP_CODE__MASK;
+}
 #define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK		0x0f000000
 #define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT		24
 static inline uint32_t A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
index e9a9ac19b79..98a96c131c5 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
@@ -60,12 +60,12 @@ fd4_blend_state_create(struct pipe_context *pctx,
 		const struct pipe_blend_state *cso)
 {
 	struct fd4_blend_stateobj *so;
-//	enum a3xx_rop_code rop = ROP_COPY;
+	enum a3xx_rop_code rop = ROP_COPY;
 	bool reads_dest = false;
 	unsigned i, mrt_blend = 0;
 
 	if (cso->logicop_enable) {
-//		rop = cso->logicop_func;  /* maps 1:1 */
+		rop = cso->logicop_func;  /* maps 1:1 */
 
 		switch (cso->logicop_func) {
 		case PIPE_LOGICOP_NOR:
@@ -116,7 +116,8 @@ fd4_blend_state_create(struct pipe_context *pctx,
 
 
 		so->rb_mrt[i].control =
-				0xc00 | /* XXX ROP_CODE ?? */
+				A4XX_RB_MRT_CONTROL_ROP_CODE(rop) |
+				COND(cso->logicop_enable, A4XX_RB_MRT_CONTROL_ROP_ENABLE) |
 				A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
 
 		if (rt->blend_enable) {
@@ -127,8 +128,10 @@ fd4_blend_state_create(struct pipe_context *pctx,
 			mrt_blend |= (1 << i);
 		}
 
-		if (reads_dest)
+		if (reads_dest) {
 			so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
+			mrt_blend |= (1 << i);
+		}
 
 		if (cso->dither)
 			so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index 54bd445d43b..8cbe68d5790 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -271,8 +271,7 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
 		mrt_comp[i] = (buffers & (PIPE_CLEAR_COLOR0 << i)) ? 0xf : 0x0;
 
 		OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
-		OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
-				A4XX_RB_MRT_CONTROL_B11 |
+		OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
 				A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
 
 		OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
index 3f8bbf3a124..221608127b4 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
@@ -347,8 +347,7 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
 		mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
 
 		OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
-		OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
-				A4XX_RB_MRT_CONTROL_B11 |
+		OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
 				A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
 
 		OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index e45cab76368..0e0f0e65e9b 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -119,6 +119,25 @@ enum adreno_rb_copy_control_mode {
 	RB_COPY_DEPTH_STENCIL = 5,
 };
 
+enum a3xx_rop_code {
+	ROP_CLEAR = 0,
+	ROP_NOR = 1,
+	ROP_AND_INVERTED = 2,
+	ROP_COPY_INVERTED = 3,
+	ROP_AND_REVERSE = 4,
+	ROP_INVERT = 5,
+	ROP_XOR = 6,
+	ROP_NAND = 7,
+	ROP_AND = 8,
+	ROP_EQUIV = 9,
+	ROP_NOOP = 10,
+	ROP_OR_INVERTED = 11,
+	ROP_COPY = 12,
+	ROP_OR_REVERSE = 13,
+	ROP_OR = 14,
+	ROP_SET = 15,
+};
+
 enum a3xx_render_mode {
 	RB_RENDERING_PASS = 0,
 	RB_TILING_PASS = 1,

From 39fa5c8419076f6ec84de0252905617feb70f279 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 20 Nov 2015 17:43:11 -0500
Subject: [PATCH 217/335] freedreno/a4xx: hook up RGB565 format

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h   | 2 +-
 src/gallium/drivers/freedreno/a4xx/fd4_format.c | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 9f35654c6fe..e46a11e94fe 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -47,7 +47,7 @@ enum a4xx_color_fmt {
 	RB4_R8_UNORM = 2,
 	RB4_R4G4B4A4_UNORM = 8,
 	RB4_R5G5B5A1_UNORM = 10,
-	RB4_R5G6R5_UNORM = 14,
+	RB4_R5G6B5_UNORM = 14,
 	RB4_R8G8_UNORM = 15,
 	RB4_R8G8_SNORM = 16,
 	RB4_R8G8_UINT = 17,
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index 8eeeb96a395..d2a1aee68d3 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -130,6 +130,7 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	_T(L8A8_UINT,    8_8_UINT,  NONE,       WZYX),
 	_T(L8A8_SINT,    8_8_SINT,  NONE,       WZYX),
 
+	_T(B5G6R5_UNORM,   5_6_5_UNORM,   R5G6B5_UNORM,   WXYZ),
 	_T(B5G5R5A1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ),
 	_T(B5G5R5X1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ),
 	_T(B4G4R4A4_UNORM, 4_4_4_4_UNORM, R4G4B4A4_UNORM, WXYZ),

From fe29330406f8ea277b2c02f9c6dd2da78796ea84 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 20 Nov 2015 18:34:03 -0500
Subject: [PATCH 218/335] freedreno/a4xx: use hardware RGTC texture samplers

a4xx hardware has real support for RGTC so there's no need to fake it
like we do on a3xx. Undo the hacks, and keep track of an "internal
format" of a resource, which on a3xx will be different, triggering the
transfer-time conversions to take place.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h |  4 +++
 .../drivers/freedreno/a4xx/fd4_format.c       | 27 ++++++-------------
 .../drivers/freedreno/a4xx/fd4_format.h       |  1 -
 .../drivers/freedreno/a4xx/fd4_texture.c      |  2 +-
 .../drivers/freedreno/freedreno_resource.c    |  8 +++---
 .../drivers/freedreno/freedreno_resource.h    |  1 +
 6 files changed, 19 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index e46a11e94fe..4436697aad4 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -207,6 +207,10 @@ enum a4xx_tex_fmt {
 	TFMT4_DXT1 = 86,
 	TFMT4_DXT3 = 87,
 	TFMT4_DXT5 = 88,
+	TFMT4_RGTC1_UNORM = 90,
+	TFMT4_RGTC1_SNORM = 91,
+	TFMT4_RGTC2_UNORM = 94,
+	TFMT4_RGTC2_SNORM = 95,
 	TFMT4_BPTC_UFLOAT = 97,
 	TFMT4_BPTC_FLOAT = 98,
 	TFMT4_BPTC = 99,
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index d2a1aee68d3..a97e01aa6c2 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -291,15 +291,14 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	_T(BPTC_RGB_FLOAT,  BPTC_FLOAT,  NONE, WZYX),
 	_T(BPTC_RGB_UFLOAT, BPTC_UFLOAT, NONE, WZYX),
 
-	/* faked */
-	_T(RGTC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
-	_T(RGTC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
-	_T(RGTC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
-	_T(RGTC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
-	_T(LATC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
-	_T(LATC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
-	_T(LATC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
-	_T(LATC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
+	_T(RGTC1_UNORM, RGTC1_UNORM, NONE, WZYX),
+	_T(RGTC1_SNORM, RGTC1_SNORM, NONE, WZYX),
+	_T(RGTC2_UNORM, RGTC2_UNORM, NONE, WZYX),
+	_T(RGTC2_SNORM, RGTC2_SNORM, NONE, WZYX),
+	_T(LATC1_UNORM, RGTC1_UNORM, NONE, WZYX),
+	_T(LATC1_SNORM, RGTC1_SNORM, NONE, WZYX),
+	_T(LATC2_UNORM, RGTC2_UNORM, NONE, WZYX),
+	_T(LATC2_SNORM, RGTC2_SNORM, NONE, WZYX),
 };
 
 /* convert pipe format to vertex buffer format: */
@@ -342,8 +341,6 @@ fd4_pipe2fetchsize(enum pipe_format format)
 {
 	if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
 		format = PIPE_FORMAT_Z32_FLOAT;
-	else if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
-		format = PIPE_FORMAT_R8G8B8A8_UNORM;
 
 	switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) {
 	case 8:   return TFETCH4_1_BYTE;
@@ -359,14 +356,6 @@ fd4_pipe2fetchsize(enum pipe_format format)
 	}
 }
 
-unsigned
-fd4_pipe2nblocksx(enum pipe_format format, unsigned width)
-{
-	if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
-		format = PIPE_FORMAT_R8G8B8A8_UNORM;
-	return util_format_get_nblocksx(format, width);
-}
-
 /* we need to special case a bit the depth/stencil restore, because we are
  * using the texture sampler to blit into the depth/stencil buffer, *not*
  * into a color buffer.  Otherwise fd4_tex_swiz() will do the wrong thing,
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.h b/src/gallium/drivers/freedreno/a4xx/fd4_format.h
index 8c365f081de..04837da650b 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.h
@@ -38,7 +38,6 @@ enum a4xx_tex_fmt fd4_pipe2tex(enum pipe_format format);
 enum a4xx_color_fmt fd4_pipe2color(enum pipe_format format);
 enum pipe_format fd4_gmem_restore_format(enum pipe_format format);
 enum a3xx_color_swap fd4_pipe2swap(enum pipe_format format);
-unsigned fd4_pipe2nblocksx(enum pipe_format format, unsigned width);
 enum a4xx_tex_fetchsize fd4_pipe2fetchsize(enum pipe_format format);
 enum a4xx_depth_format fd4_pipe2depth(enum pipe_format format);
 
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index 00c257b78e5..75b083b8ca0 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -240,7 +240,7 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 	so->texconst2 =
 		A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
 		A4XX_TEX_CONST_2_PITCH(
-			fd4_pipe2nblocksx(
+			util_format_get_nblocksx(
 				cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
 
 	switch (prsc->target) {
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index 5b1cee8d18d..43b818f4014 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -388,7 +388,8 @@ fd_resource_transfer_map(struct pipe_context *pctx,
 
 		buf = trans->staging;
 		offset = 0;
-	} else if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+	} else if (rsc->internal_format != format &&
+			   util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
 		assert(trans->base.box.depth == 1);
 
 		trans->base.stride = util_format_get_stride(
@@ -574,9 +575,10 @@ fd_resource_create(struct pipe_screen *pscreen,
 
 	if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
 		format = PIPE_FORMAT_Z32_FLOAT;
-	else if (util_format_description(format)->layout ==
-			 UTIL_FORMAT_LAYOUT_RGTC)
+	else if (fd_screen(pscreen)->gpu_id < 400 &&
+			 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
 		format = PIPE_FORMAT_R8G8B8A8_UNORM;
+	rsc->internal_format = format;
 	rsc->cpp = util_format_get_blocksize(format);
 
 	assert(rsc->cpp);
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h
index 10f5242da57..9a9b0d08244 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.h
+++ b/src/gallium/drivers/freedreno/freedreno_resource.h
@@ -73,6 +73,7 @@ struct fd_resource {
 	struct u_resource base;
 	struct fd_bo *bo;
 	uint32_t cpp;
+	enum pipe_format internal_format;
 	bool layer_first;        /* see above description */
 	uint32_t layer_size;
 	struct fd_resource_slice slices[MAX_MIP_LEVELS];

From c28b574170706d4f9625e7ffa72c87862aa1ca31 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Fri, 13 Nov 2015 17:50:27 -0800
Subject: [PATCH 219/335] nir: Add support for gl_HelperInvocation system
 value.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/glsl/nir/nir.c            | 4 ++++
 src/glsl/nir/nir_intrinsics.h | 1 +
 2 files changed, 5 insertions(+)

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index dba18c96a90..8956b7e56ca 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1577,6 +1577,8 @@ nir_intrinsic_from_system_value(gl_system_value val)
       return nir_intrinsic_load_tess_level_inner;
    case SYSTEM_VALUE_VERTICES_IN:
       return nir_intrinsic_load_patch_vertices_in;
+   case SYSTEM_VALUE_HELPER_INVOCATION:
+      return nir_intrinsic_load_helper_invocation;
    default:
       unreachable("system value does not directly correspond to intrinsic");
    }
@@ -1620,6 +1622,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
       return SYSTEM_VALUE_TESS_LEVEL_INNER;
    case nir_intrinsic_load_patch_vertices_in:
       return SYSTEM_VALUE_VERTICES_IN;
+   case nir_intrinsic_load_helper_invocation:
+      return SYSTEM_VALUE_HELPER_INVOCATION;
    default:
       unreachable("intrinsic doesn't produce a system value");
    }
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index b8d7d6c68cb..b2565c54b20 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -225,6 +225,7 @@ SYSTEM_VALUE(local_invocation_id, 3, 0)
 SYSTEM_VALUE(work_group_id, 3, 0)
 SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
 SYSTEM_VALUE(num_work_groups, 3, 0)
+SYSTEM_VALUE(helper_invocation, 1, 0)
 
 /*
  * The format of the indices depends on the type of the load.  For uniforms,

From ce11d4f3695506bd00b319b6ebbfcb9168eb3b84 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 16 Nov 2015 09:28:02 -0800
Subject: [PATCH 220/335] i965: Don't bother setting regioning on immediates.

The region fields are unioned with the immediate storage.
---
 src/mesa/drivers/dri/i965/brw_reg.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index e34e7ea0a52..15de93aa8ad 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -618,9 +618,6 @@ static inline struct brw_reg
 brw_imm_v(unsigned v)
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
-   imm.vstride = BRW_VERTICAL_STRIDE_0;
-   imm.width = BRW_WIDTH_8;
-   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
    imm.ud = v;
    return imm;
 }
@@ -630,9 +627,6 @@ static inline struct brw_reg
 brw_imm_vf(unsigned v)
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
-   imm.vstride = BRW_VERTICAL_STRIDE_0;
-   imm.width = BRW_WIDTH_4;
-   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
    imm.ud = v;
    return imm;
 }

From 4b15281295791278b3cd63baccb528742de1d21f Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 16 Nov 2015 09:29:01 -0800
Subject: [PATCH 221/335] i965: Add brw_imm_uv().

---
 src/mesa/drivers/dri/i965/brw_reg.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index 15de93aa8ad..fa912c96c36 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -622,6 +622,15 @@ brw_imm_v(unsigned v)
    return imm;
 }
 
+/** Construct vector of eight unsigned half-byte values */
+static inline struct brw_reg
+brw_imm_uv(unsigned uv)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UV);
+   imm.ud = uv;
+   return imm;
+}
+
 /** Construct vector of four 8-bit float values */
 static inline struct brw_reg
 brw_imm_vf(unsigned v)

From c875e3cdd21811ad6669160d59fa39a4526ef872 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Fri, 13 Nov 2015 17:51:12 -0800
Subject: [PATCH 222/335] i965/fs: Add support for gl_HelperInvocation system
 value.

In most cases (when the negate is copy propagated and the MOV removed),
this is two instructions on Gen >= 8 and only two instructions on
earlier platforms -- and it doesn't use the flag register.

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 52 ++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index b6e5ed6a72b..72190f3312c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -250,6 +250,57 @@ emit_system_values_block(nir_block *block, void *void_visitor)
             *reg = *v->emit_cs_work_group_id_setup();
          break;
 
+      case nir_intrinsic_load_helper_invocation:
+         assert(v->stage == MESA_SHADER_FRAGMENT);
+         reg = &v->nir_system_values[SYSTEM_VALUE_HELPER_INVOCATION];
+         if (reg->file == BAD_FILE) {
+            const fs_builder abld =
+               v->bld.annotate("gl_HelperInvocation", NULL);
+
+            /* On Gen6+ (gl_HelperInvocation is only exposed on Gen7+) the
+             * pixel mask is in g1.7 of the thread payload.
+             *
+             * We move the per-channel pixel enable bit to the low bit of each
+             * channel by shifting the byte containing the pixel mask by the
+             * vector immediate 0x76543210UV.
+             *
+             * The region of <1,8,0> reads only 1 byte (the pixel masks for
+             * subspans 0 and 1) in SIMD8 and an additional byte (the pixel
+             * masks for 2 and 3) in SIMD16.
+             */
+            fs_reg shifted = abld.vgrf(BRW_REGISTER_TYPE_UW, 1);
+            abld.SHR(shifted,
+                     stride(byte_offset(retype(brw_vec1_grf(1, 0),
+                                               BRW_REGISTER_TYPE_UB), 28),
+                            1, 8, 0),
+                     brw_imm_uv(0x76543210));
+
+            /* A set bit in the pixel mask means the channel is enabled, but
+             * that is the opposite of gl_HelperInvocation so we need to invert
+             * the mask.
+             *
+             * The negate source-modifier bit of logical instructions on Gen8+
+             * performs 1's complement negation, so we can use that instead of
+             * a NOT instruction.
+             */
+            fs_reg inverted = negate(shifted);
+            if (v->devinfo->gen < 8) {
+               inverted = abld.vgrf(BRW_REGISTER_TYPE_UW);
+               abld.NOT(inverted, shifted);
+            }
+
+            /* We then resolve the 0/1 result to 0/~0 boolean values by ANDing
+             * with 1 and negating.
+             */
+            fs_reg anded = abld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+            abld.AND(anded, inverted, brw_imm_uw(1));
+
+            fs_reg dst = abld.vgrf(BRW_REGISTER_TYPE_D, 1);
+            abld.MOV(dst, negate(retype(anded, BRW_REGISTER_TYPE_D)));
+            *reg = dst;
+         }
+         break;
+
       default:
          break;
       }
@@ -1776,6 +1827,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
       break;
    }
 
+   case nir_intrinsic_load_helper_invocation:
    case nir_intrinsic_load_sample_mask_in:
    case nir_intrinsic_load_sample_id: {
       gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);

From 0684aed8abc51308945ead050d2452b522937c0a Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Thu, 19 Nov 2015 21:48:47 -0800
Subject: [PATCH 223/335] i965/vec4: Initialize nir_inputs with src_reg().

nir_locals, nir_ssa_values, and nir_system_values are all dst_reg (not
that that makes a whole lot of sense to me), and only nir_inputs is a
src_reg.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 28656d45b33..7e17a6a6246 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -122,7 +122,7 @@ vec4_visitor::nir_setup_inputs()
 {
    nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs);
    for (unsigned i = 0; i < nir->num_inputs; i++) {
-      nir_inputs[i] = dst_reg();
+      nir_inputs[i] = src_reg();
    }
 
    nir_foreach_variable(var, &nir->inputs) {

From f450030f66036e560be87067c0a0cef33cd7b5ed Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Wed, 4 Nov 2015 14:31:59 -0800
Subject: [PATCH 224/335] i965: Use ldexpf() in VF float test set up.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 .../drivers/dri/i965/test_vf_float_conversions.cpp    | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp b/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp
index 6a8bceabf16..7f0342548f6 100644
--- a/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp
+++ b/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp
@@ -40,15 +40,10 @@ void vf_float_conversion_test::SetUp() {
       int ebits = (vf >> 4) & 0x7;
       int mbits = vf & 0xf;
 
-      int e = ebits - 3;
+      float x = 1.0f + mbits / 16.0f;
+      int exp = ebits - 3;
 
-      float value = 1.0f;
-
-      value += mbits / 16.0f;
-
-      value *= exp2f(e);
-
-      vf_to_float[vf] = value;
+      vf_to_float[vf] = ldexpf(x, exp);
    }
 }
 

From f6986a81c93de9796e2c805ef831bb110109ed23 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Wed, 4 Nov 2015 14:38:27 -0800
Subject: [PATCH 225/335] i965: Test that nonrepresentable floats cannot be
 converted to VF.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 .../dri/i965/test_vf_float_conversions.cpp        | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp b/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp
index 7f0342548f6..7af97d0d097 100644
--- a/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp
+++ b/src/mesa/drivers/dri/i965/test_vf_float_conversions.cpp
@@ -93,3 +93,18 @@ TEST_F(vf_float_conversion_test, test_special_case_0)
    EXPECT_EQ(f2u(brw_vf_to_float(brw_float_to_vf(+0.0f))), f2u(+0.0f));
    EXPECT_EQ(f2u(brw_vf_to_float(brw_float_to_vf(-0.0f))), f2u(-0.0f));
 }
+
+TEST_F(vf_float_conversion_test, test_nonrepresentable_float_input)
+{
+   EXPECT_EQ(brw_float_to_vf(+32.0f), -1);
+   EXPECT_EQ(brw_float_to_vf(-32.0f), -1);
+
+   EXPECT_EQ(brw_float_to_vf(+16.5f), -1);
+   EXPECT_EQ(brw_float_to_vf(-16.5f), -1);
+
+   EXPECT_EQ(brw_float_to_vf(+8.25f), -1);
+   EXPECT_EQ(brw_float_to_vf(-8.25f), -1);
+
+   EXPECT_EQ(brw_float_to_vf(+4.125f), -1);
+   EXPECT_EQ(brw_float_to_vf(-4.125f), -1);
+}

From d8c26969d5ea585bf53d6efdc5ba2c634b84a2a6 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 20 Nov 2015 20:40:45 -0500
Subject: [PATCH 226/335] freedreno/a4xx: add missing formats to enable
 ARB_vertex_type_2_10_10_10_rev

Same as commit 84d087aea but for a4xx. The RE'd enums had the same issue
too.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 docs/relnotes/11.1.0.html                       | 2 +-
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h   | 8 ++++----
 src/gallium/drivers/freedreno/a4xx/fd4_format.c | 4 ++++
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index c1d30e8e9ec..3c1150a6d2b 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -61,7 +61,7 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
 <li>GL_ARB_texture_query_lod on softpipe</li>
 <li>GL_ARB_texture_view on radeonsi and r600 (for evergeen and newer)</li>
-<li>GL_ARB_vertex_type_2_10_10_10_rev on freedreno (a3xx)</li>
+<li>GL_ARB_vertex_type_2_10_10_10_rev on freedreno (a3xx, a4xx)</li>
 <li>GL_EXT_blend_func_extended on all drivers that support the ARB version</li>
 <li>GL_EXT_buffer_storage implemented for when ES 3.1 support is gained</li>
 <li>GL_EXT_draw_elements_base_vertex on all drivers</li>
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 4436697aad4..1192fc3c5fe 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -152,10 +152,10 @@ enum a4xx_vtx_fmt {
 	VFMT4_8_8_SNORM = 53,
 	VFMT4_8_8_8_SNORM = 54,
 	VFMT4_8_8_8_8_SNORM = 55,
-	VFMT4_10_10_10_2_UINT = 60,
-	VFMT4_10_10_10_2_UNORM = 61,
-	VFMT4_10_10_10_2_SINT = 62,
-	VFMT4_10_10_10_2_SNORM = 63,
+	VFMT4_10_10_10_2_UINT = 56,
+	VFMT4_10_10_10_2_UNORM = 57,
+	VFMT4_10_10_10_2_SINT = 58,
+	VFMT4_10_10_10_2_SNORM = 59,
 };
 
 enum a4xx_tex_fmt {
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index a97e01aa6c2..eeaa0a2b847 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -200,9 +200,13 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	VT(B10G10R10A2_UNORM,   10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
 	_T(B10G10R10X2_UNORM,   10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
 	V_(R10G10B10A2_SNORM,   10_10_10_2_SNORM, NONE,              WZYX),
+	V_(B10G10R10A2_SNORM,   10_10_10_2_SNORM, NONE,              WXYZ),
 	V_(R10G10B10A2_UINT,    10_10_10_2_UINT,  NONE,              WZYX),
+	V_(B10G10R10A2_UINT,    10_10_10_2_UINT,  NONE,              WXYZ),
 	V_(R10G10B10A2_USCALED, 10_10_10_2_UINT,  NONE,              WZYX),
+	V_(B10G10R10A2_USCALED, 10_10_10_2_UINT,  NONE,              WXYZ),
 	V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT,  NONE,              WZYX),
+	V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT,  NONE,              WXYZ),
 
 	_T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
 	_T(R9G9B9E5_FLOAT,  9_9_9_E5_FLOAT, NONE,            WZYX),

From 380aec170331598f4098d6670baf67666535b264 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Sat, 21 Nov 2015 11:38:20 +0000
Subject: [PATCH 227/335] docs: add release notes for 11.0.6

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
(cherry picked from commit 04fd3a6f629b5098cc2a4da1f2392678349ecf56)
---
 docs/relnotes/11.0.6.html | 144 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)
 create mode 100644 docs/relnotes/11.0.6.html

diff --git a/docs/relnotes/11.0.6.html b/docs/relnotes/11.0.6.html
new file mode 100644
index 00000000000..2da9e98185c
--- /dev/null
+++ b/docs/relnotes/11.0.6.html
@@ -0,0 +1,144 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.6 Release Notes / November 21, 2015</h1>
+
+<p>
+Mesa 11.0.6 is a bug fix release which fixes bugs found since the 11.0.5 release.
+</p>
+<p>
+Mesa 11.0.6 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91780">Bug 91780</a> - Rendering issues with geometry shader</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92588">Bug 92588</a> - [HSW,BDW,BSW,SKL-Y][GLES 3.1 CTS] ES31-CTS.arrays_of_arrays.InteractionFunctionCalls2 - assert</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92738">Bug 92738</a> - Randon R7 240 doesn't work on 16KiB page size platform</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92860">Bug 92860</a> - [radeonsi][bisected] st/mesa: implement ARB_copy_image - Corruption in ARK Survival Evolved</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92900">Bug 92900</a> - [regression bisected] About 700 piglit regressions is what could go wrong</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Deucher (1):</p>
+<ul>
+  <li>radeonsi: enable optimal raster config setting for fiji (v2)</li>
+</ul>
+
+<p>Ben Widawsky (1):</p>
+<ul>
+  <li>i965/skl/gt4: Fix URB programming restriction.</li>
+</ul>
+
+<p>Boyuan Zhang (2):</p>
+<ul>
+  <li>st/vaapi: fix vaapi VC-1 simple/main corruption v2</li>
+  <li>radeon/uvd: fix VC-1 simple/main profile decode v2</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>r600: initialised PGM_RESOURCES_2 for ES/GS</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.5</li>
+  <li>cherry-ignore: add the swrast front buffer support</li>
+  <li>automake: use static llvm for make distcheck</li>
+  <li>Update version to 11.0.6</li>
+</ul>
+
+<p>Eric Anholt (3):</p>
+<ul>
+  <li>vc4: Return GL_OUT_OF_MEMORY when buffer allocation fails.</li>
+  <li>vc4: Return NULL when we can't make our shadow for a sampler view.</li>
+  <li>vc4: Add support for nir_op_uge, using the carry bit on QPU_A_SUB.</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>meta/generate_mipmap: Don't leak the sampler object</li>
+  <li>meta/generate_mipmap: Only modify the draw framebuffer binding in fallback_required</li>
+</ul>
+
+<p>Ilia Mirkin (2):</p>
+<ul>
+  <li>mesa/copyimage: allow width/height to not be multiples of block</li>
+  <li>nouveau: don't expose HEVC decoding support</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>nir/vars_to_ssa: Rework copy set handling in lower_copies_to_load_store</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>glsl: Allow implicit int -&gt; uint conversions for the % operator.</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>radeonsi: initialize SX_PS_DOWNCONVERT to 0 on Stoney</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>winsys/radeon: Use CPU page size instead of hardcoding 4096 bytes v3</li>
+</ul>
+
+<p>Oded Gabbay (1):</p>
+<ul>
+  <li>llvmpipe: use simple coeffs calc for 128bit vectors</li>
+</ul>
+
+<p>Roland Scheidegger (2):</p>
+<ul>
+  <li>radeon: fix bgrx8/xrgb8 blits</li>
+  <li>r200: fix bgrx8/xrgb8 blits</li>
+</ul>
+
+
+</div>
+</body>
+</html>

From da2cb8a2eeca83b7f95978798ab77807a1c97aa2 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Sat, 21 Nov 2015 12:40:06 +0000
Subject: [PATCH 228/335] docs: add sha256 checksums for 11.0.6

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
(cherry picked from commit 2555e000fc1771dd231207247b5d9dfec4600fe8)
---
 docs/relnotes/11.0.6.html | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/relnotes/11.0.6.html b/docs/relnotes/11.0.6.html
index 2da9e98185c..4a1083c43b2 100644
--- a/docs/relnotes/11.0.6.html
+++ b/docs/relnotes/11.0.6.html
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.
 
 <h2>SHA256 checksums</h2>
 <pre>
-TBD
+4bdf054af66ebabf3eca0616f9f5e44c2f234695661b570261c391bc2f4f7482  mesa-11.0.6.tar.gz
+8340e64cdc91999840404c211496f3de38e7b4cb38db34e2f72f1642c5134760  mesa-11.0.6.tar.xz
 </pre>
 
 

From 1a18457a52b22651a282ea0f94e6f7fc5e07ad22 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Sat, 21 Nov 2015 12:42:48 +0000
Subject: [PATCH 229/335] docs: add news item and link release notes for 11.0.6

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
---
 docs/index.html    | 6 ++++++
 docs/relnotes.html | 1 +
 2 files changed, 7 insertions(+)

diff --git a/docs/index.html b/docs/index.html
index 0e317fdf411..1f50d6b40d3 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -16,6 +16,12 @@
 
 <h1>News</h1>
 
+<h2>November 21, 2015</h2>
+<p>
+<a href="relnotes/11.0.6.html">Mesa 11.0.6</a> is released.
+This is a bug-fix release.
+</p>
+
 <h2>November 11, 2015</h2>
 <p>
 <a href="relnotes/11.0.5.html">Mesa 11.0.5</a> is released.
diff --git a/docs/relnotes.html b/docs/relnotes.html
index 588cf0b2c83..6a8ed324eb5 100644
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.
 </p>
 
 <ul>
+<li><a href="relnotes/11.0.6.html">11.0.6 release notes</a>
 <li><a href="relnotes/11.0.5.html">11.0.5 release notes</a>
 <li><a href="relnotes/11.0.4.html">11.0.4 release notes</a>
 <li><a href="relnotes/11.0.3.html">11.0.3 release notes</a>

From e762a46a074c2e95c0d4bf1765cb16044c1d0d9e Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 7 Oct 2015 18:05:04 +0100
Subject: [PATCH 230/335] configure: remove obsolete _CLIENT comment

The referenced variable(s) have been removed with commit abc20120e4a
(automake: pipe-loader: remove the 'client' pipe-loader)

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 configure.ac | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/configure.ac b/configure.ac
index 32fb989a898..027cfb1de90 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2273,11 +2273,6 @@ AM_CONDITIONAL(HAVE_GALLIUM_VIRGL, test "x$HAVE_GALLIUM_VIRGL" = xyes)
 
 AM_CONDITIONAL(HAVE_GALLIUM_STATIC_TARGETS, test "x$enable_shared_pipe_drivers" = xno)
 
-# NOTE: anything using xcb or other client side libs ends up in separate
-#       _CLIENT variables.  The pipe loader is built in two variants,
-#       one that is standalone and does not link any x client libs (for
-#       use by XA tracker in particular, but could be used in any case
-#       where communication with xserver is not desired).
 if test "x$enable_gallium_loader" = xyes; then
     if test "x$enable_dri" = xyes; then
         GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRI"

From c751d33a2082a44bddb23ee90225be4db0da587f Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Tue, 6 Oct 2015 22:25:47 +0100
Subject: [PATCH 231/335] gallium/trace: remove useless NULL check from
 trace_screen_create()

Currently every target makes sure that the screen is non-null prior to
using the debug (trace including) wrappers. If that no longer holds true
we want to know and fix this ASAP rather than silently bailing out.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/trace/tr_screen.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index 8b02680c77e..62a51e9a94d 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -456,9 +456,6 @@ trace_screen_create(struct pipe_screen *screen)
 {
    struct trace_screen *tr_scr;
 
-   if(!screen)
-      goto error1;
-
    if (!trace_enabled())
       goto error1;
 

From b7875ca4939bc5db145b463b7a9a1ea0f9fbccb8 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 16:20:32 +0100
Subject: [PATCH 232/335] pipe-loader: remove HAVE_PIPE_LOADER_foo function
 prototype guards

They serve little to no purpose, as we don't need any additional
dependencies (headers and/or symbols). On the other hand dropping them
will allow us to use GALLIUM_PIPE_LOADER_DEFINES in only one single
place - the pipe-loader.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/auxiliary/pipe-loader/pipe_loader.h | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.h b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
index 9b8712666bb..7aa9c67d504 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader.h
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
@@ -112,8 +112,6 @@ pipe_loader_configuration(struct pipe_loader_device *dev,
 void
 pipe_loader_release(struct pipe_loader_device **devs, int ndev);
 
-#ifdef HAVE_PIPE_LOADER_DRI
-
 /**
  * Initialize sw dri device give the drisw_loader_funcs.
  *
@@ -125,8 +123,6 @@ bool
 pipe_loader_sw_probe_dri(struct pipe_loader_device **devs,
                          struct drisw_loader_funcs *drisw_lf);
 
-#endif
-
 /**
  * Initialize a null sw device.
  *
@@ -158,8 +154,6 @@ boolean
 pipe_loader_sw_probe_wrapped(struct pipe_loader_device **dev,
                              struct pipe_screen *screen);
 
-#ifdef HAVE_PIPE_LOADER_DRM
-
 /**
  * Get a list of known DRM devices.
  *
@@ -180,8 +174,6 @@ pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev);
 bool
 pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd);
 
-#endif
-
 #ifdef __cplusplus
 }
 #endif

From 6bcd5f0d02fff2db0c330af7d139d7a60b59e0e2 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 16:23:50 +0100
Subject: [PATCH 233/335] automake: use GALLIUM_PIPE_LOADER_DEFINES only where
 applicable

As of last commit we no longer need the defines in order to have the
function prototypes.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/auxiliary/Makefile.am             | 1 -
 src/gallium/state_trackers/clover/Makefile.am | 1 -
 src/gallium/state_trackers/dri/Makefile.am    | 1 -
 src/gallium/state_trackers/xa/Makefile.am     | 1 -
 src/gallium/targets/d3dadapter9/Makefile.am   | 3 +--
 src/gallium/tests/trivial/Makefile.am         | 3 +--
 6 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am
index a728162bd9d..c33def742f1 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -67,7 +67,6 @@ COMMON_VL_CFLAGS = \
 	$(VL_CFLAGS) \
 	$(DRI2PROTO_CFLAGS) \
 	$(LIBDRM_CFLAGS) \
-	$(GALLIUM_PIPE_LOADER_DEFINES) \
 	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\"
 
 if HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/state_trackers/clover/Makefile.am b/src/gallium/state_trackers/clover/Makefile.am
index fd0ccf88cc5..c6528ff97cb 100644
--- a/src/gallium/state_trackers/clover/Makefile.am
+++ b/src/gallium/state_trackers/clover/Makefile.am
@@ -1,7 +1,6 @@
 include Makefile.sources
 
 AM_CPPFLAGS = \
-	$(GALLIUM_PIPE_LOADER_DEFINES) \
 	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src \
diff --git a/src/gallium/state_trackers/dri/Makefile.am b/src/gallium/state_trackers/dri/Makefile.am
index 9f4deba0c1e..e407304fed9 100644
--- a/src/gallium/state_trackers/dri/Makefile.am
+++ b/src/gallium/state_trackers/dri/Makefile.am
@@ -25,7 +25,6 @@ include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
 AM_CPPFLAGS = \
-	$(GALLIUM_PIPE_LOADER_DEFINES) \
 	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src/mapi \
diff --git a/src/gallium/state_trackers/xa/Makefile.am b/src/gallium/state_trackers/xa/Makefile.am
index 5051e8246e3..400a70b300f 100644
--- a/src/gallium/state_trackers/xa/Makefile.am
+++ b/src/gallium/state_trackers/xa/Makefile.am
@@ -29,7 +29,6 @@ AM_CFLAGS = \
 	$(VISIBILITY_CFLAGS)
 
 AM_CPPFLAGS = \
-	$(GALLIUM_PIPE_LOADER_DEFINES) \
 	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\"
 
 if HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/d3dadapter9/Makefile.am b/src/gallium/targets/d3dadapter9/Makefile.am
index b5221472ef0..d36a8b80b74 100644
--- a/src/gallium/targets/d3dadapter9/Makefile.am
+++ b/src/gallium/targets/d3dadapter9/Makefile.am
@@ -42,8 +42,7 @@ AM_CPPFLAGS = \
 else
 
 AM_CPPFLAGS = \
-	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" \
-	$(GALLIUM_PIPE_LOADER_DEFINES)
+	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\"
 
 endif
 
diff --git a/src/gallium/tests/trivial/Makefile.am b/src/gallium/tests/trivial/Makefile.am
index 56b7f3ffc66..26783ab6f6d 100644
--- a/src/gallium/tests/trivial/Makefile.am
+++ b/src/gallium/tests/trivial/Makefile.am
@@ -8,8 +8,7 @@ AM_CFLAGS = \
 AM_CPPFLAGS = \
 	-I$(top_srcdir)/src/gallium/drivers \
 	-I$(top_srcdir)/src/gallium/winsys \
-	-DPIPE_SEARCH_DIR=\"$(PIPE_SRC_DIR)/.libs\" \
-	$(GALLIUM_PIPE_LOADER_DEFINES)
+	-DPIPE_SEARCH_DIR=\"$(PIPE_SRC_DIR)/.libs\"
 
 LDADD = \
 	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \

From f9c9471b768593d61c486f7a9c0dac52e22fff65 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Tue, 6 Oct 2015 23:04:22 +0100
Subject: [PATCH 234/335] targets/nine: use the existing sw_screen_wrap() over
 our custom version

Cc: Axel Davy <axel.davy@ens.fr>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 .../target-helpers/inline_sw_helper.h         | 27 -------------------
 src/gallium/targets/d3dadapter9/Makefile.am   |  1 -
 src/gallium/targets/d3dadapter9/drm.c         |  4 +--
 3 files changed, 2 insertions(+), 30 deletions(-)

diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
index f3693fb1f39..7e10c28f542 100644
--- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
@@ -104,33 +104,6 @@ drisw_create_screen(struct drisw_loader_funcs *lf)
 }
 #endif // DRI_TARGET
 
-#if defined(NINE_TARGET)
-#include "sw/wrapper/wrapper_sw_winsys.h"
-#include "target-helpers/inline_debug_helper.h"
-
-extern struct pipe_screen *ninesw_create_screen(struct pipe_screen *screen);
-
-inline struct pipe_screen *
-ninesw_create_screen(struct pipe_screen *pscreen)
-{
-   struct sw_winsys *winsys = NULL;
-   struct pipe_screen *screen = NULL;
-
-   winsys = wrapper_sw_winsys_wrap_pipe_screen(pscreen);
-   if (winsys == NULL)
-      return NULL;
-
-   screen = sw_screen_create(winsys);
-   if (screen == NULL) {
-      winsys->destroy(winsys);
-      return NULL;
-   }
-
-   screen = debug_screen_wrap(screen);
-   return screen;
-}
-#endif // NINE_TARGET
-
 #endif // GALLIUM_SOFTPIPE
 
 
diff --git a/src/gallium/targets/d3dadapter9/Makefile.am b/src/gallium/targets/d3dadapter9/Makefile.am
index d36a8b80b74..7ec5c834419 100644
--- a/src/gallium/targets/d3dadapter9/Makefile.am
+++ b/src/gallium/targets/d3dadapter9/Makefile.am
@@ -36,7 +36,6 @@ AM_CFLAGS = \
 
 if HAVE_GALLIUM_STATIC_TARGETS
 AM_CPPFLAGS = \
-	-DNINE_TARGET \
 	-DGALLIUM_STATIC_TARGETS=1
 
 else
diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c
index fabc820f268..89789fa3da9 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -30,7 +30,7 @@
 #include "pipe/p_state.h"
 
 #include "target-helpers/inline_drm_helper.h"
-#include "target-helpers/inline_sw_helper.h"
+#include "target-helpers/inline_wrapper_sw_helper.h"
 #include "state_tracker/drm_driver.h"
 
 #include "d3dadapter/d3dadapter9.h"
@@ -309,7 +309,7 @@ drm_create_adapter( int fd,
     driDestroyOptionInfo(&defaultInitOptions);
 
 #if GALLIUM_STATIC_TARGETS
-    ctx->base.ref = ninesw_create_screen(ctx->base.hal);
+    ctx->base.ref = sw_screen_wrap(ctx->base.hal);
 #else
     /* wrap it to create a software screen that can share resources */
     if (pipe_loader_sw_probe_wrapped(&ctx->swdev, ctx->base.hal)) {

From be430726e2586e1c9932953325b45e0e6a39f301 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Sun, 11 Oct 2015 11:53:45 +0100
Subject: [PATCH 235/335] configure: use HAVE_DRISW_KMS when handling kms
 swrast

Using HAVE_DRI2 to manage it seems counter-intuitive.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 configure.ac                              | 5 +++++
 src/gallium/Makefile.am                   | 2 +-
 src/gallium/drivers/softpipe/Automake.inc | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 027cfb1de90..98acfdda0a2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -956,8 +956,13 @@ gnu*|cygwin*)
     dri_platform='drm' ;;
 esac
 
+if test "x$enable_dri" = xyes -a "x$dri_platform" = xdrm -a "x$have_libdrm" = xyes; then
+    have_drisw_kms='yes'
+fi
+
 AM_CONDITIONAL(HAVE_DRICOMMON, test "x$enable_dri" = xyes )
 AM_CONDITIONAL(HAVE_DRISW, test "x$enable_dri" = xyes )
+AM_CONDITIONAL(HAVE_DRISW_KMS, test "x$have_drisw_kms" = xyes )
 AM_CONDITIONAL(HAVE_DRI2, test "x$enable_dri" = xyes -a "x$dri_platform" = xdrm -a "x$have_libdrm" = xyes )
 AM_CONDITIONAL(HAVE_DRI3, test "x$enable_dri3" = xyes -a "x$dri_platform" = xdrm -a "x$have_libdrm" = xyes )
 AM_CONDITIONAL(HAVE_APPLEDRI, test "x$enable_dri" = xyes -a "x$dri_platform" = xapple )
diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am
index 611d55fafe2..2c7508385ca 100644
--- a/src/gallium/Makefile.am
+++ b/src/gallium/Makefile.am
@@ -98,7 +98,7 @@ if HAVE_DRISW
 SUBDIRS += winsys/sw/dri
 endif
 
-if HAVE_DRI2
+if HAVE_DRISW_KMS
 SUBDIRS += winsys/sw/kms-dri
 endif
 
diff --git a/src/gallium/drivers/softpipe/Automake.inc b/src/gallium/drivers/softpipe/Automake.inc
index 6455f3caa3d..5cedcef9772 100644
--- a/src/gallium/drivers/softpipe/Automake.inc
+++ b/src/gallium/drivers/softpipe/Automake.inc
@@ -6,7 +6,7 @@ TARGET_LIB_DEPS += \
 	$(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la \
 	$(top_builddir)/src/gallium/drivers/softpipe/libsoftpipe.la
 
-if HAVE_DRI2
+if HAVE_DRISW_KMS
 TARGET_DRIVERS += kms_swrast
 TARGET_LIB_DEPS += \
 	$(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la

From 33f1db1eb412382d2bd6552369e6f63bad52ca8d Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 16:24:55 +0100
Subject: [PATCH 236/335] pipe-loader: add pipe_loader_sw_probe_kms()
 implementation

Will be used as a counterpart for target-helpers'
kms_swrast_create_screen().

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 configure.ac                                  |  4 ++++
 src/gallium/Automake.inc                      |  5 ++++
 .../auxiliary/pipe-loader/pipe_loader.h       | 10 ++++++++
 .../auxiliary/pipe-loader/pipe_loader_sw.c    | 24 +++++++++++++++++++
 4 files changed, 43 insertions(+)

diff --git a/configure.ac b/configure.ac
index 98acfdda0a2..322f7b643bd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2283,6 +2283,10 @@ if test "x$enable_gallium_loader" = xyes; then
         GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRI"
     fi
 
+    if test "x$have_drisw_kms" = xyes; then
+        GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_KMS"
+    fi
+
     if test "x$enable_gallium_drm_loader" = xyes; then
         GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRM"
     fi
diff --git a/src/gallium/Automake.inc b/src/gallium/Automake.inc
index ee07ab6c8f9..095e6ec55fb 100644
--- a/src/gallium/Automake.inc
+++ b/src/gallium/Automake.inc
@@ -67,3 +67,8 @@ if HAVE_DRISW
 GALLIUM_PIPE_LOADER_WINSYS_LIBS += \
 	$(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
 endif
+
+if HAVE_DRISW_KMS
+GALLIUM_PIPE_LOADER_WINSYS_LIBS += \
+	$(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la
+endif
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.h b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
index 7aa9c67d504..8eba8a6f008 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader.h
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
@@ -123,6 +123,16 @@ bool
 pipe_loader_sw_probe_dri(struct pipe_loader_device **devs,
                          struct drisw_loader_funcs *drisw_lf);
 
+/**
+ * Initialize a kms backed sw device given an fd.
+ *
+ * This function is platform-specific.
+ *
+ * \sa pipe_loader_probe
+ */
+bool
+pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd);
+
 /**
  * Initialize a null sw device.
  *
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index 6794930193d..86039a35ef7 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -30,6 +30,7 @@
 #include "util/u_memory.h"
 #include "util/u_dl.h"
 #include "sw/dri/dri_sw_winsys.h"
+#include "sw/kms-dri/kms_dri_sw_winsys.h"
 #include "sw/null/null_sw_winsys.h"
 #include "sw/wrapper/wrapper_sw_winsys.h"
 #include "target-helpers/inline_sw_helper.h"
@@ -72,6 +73,29 @@ pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_f
 }
 #endif
 
+#ifdef HAVE_PIPE_LOADER_KMS
+bool
+pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd)
+{
+   struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
+
+   if (!sdev)
+      return false;
+
+   sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
+   sdev->base.driver_name = "swrast";
+   sdev->base.ops = &pipe_loader_sw_ops;
+   sdev->ws = kms_dri_create_winsys(fd);
+   if (!sdev->ws) {
+      FREE(sdev);
+      return false;
+   }
+   *devs = &sdev->base;
+
+   return true;
+}
+#endif
+
 bool
 pipe_loader_sw_probe_null(struct pipe_loader_device **devs)
 {

From 149454bb13aba2815e1a577073e5a834507cbb4c Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 16:26:53 +0100
Subject: [PATCH 237/335] pipe-loader: remove HAVE_DRM_LOADER_GALLIUM and
 HAVE_PIPE_LOADER_DRM

... in favour of HAVE_LIBDRM. After all we solely want to build the code
when the latter is available.

In the not too distant future we will remove the libudev/sysfs
dependency and simplify configure.ac even further.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 configure.ac                                    | 9 ---------
 src/gallium/auxiliary/pipe-loader/Makefile.am   | 2 +-
 src/gallium/auxiliary/pipe-loader/pipe_loader.c | 2 +-
 3 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/configure.ac b/configure.ac
index 322f7b643bd..55c05018e64 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2077,10 +2077,6 @@ gallium_require_drm_loader() {
         if test "x$need_pci_id$have_pci_id" = xyesno; then
             AC_MSG_ERROR([Gallium drm loader requires libudev >= $LIBUDEV_REQUIRED or sysfs])
         fi
-        enable_gallium_drm_loader=yes
-    fi
-    if test "x$enable_va" = xyes && test "x$7" != x; then
-         GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $7"
     fi
 }
 
@@ -2287,10 +2283,6 @@ if test "x$enable_gallium_loader" = xyes; then
         GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_KMS"
     fi
 
-    if test "x$enable_gallium_drm_loader" = xyes; then
-        GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRM"
-    fi
-
     AC_SUBST([GALLIUM_PIPE_LOADER_DEFINES])
 fi
 
@@ -2308,7 +2300,6 @@ AM_CONDITIONAL(NEED_WINSYS_XLIB, test "x$NEED_WINSYS_XLIB" = xyes)
 AM_CONDITIONAL(NEED_RADEON_LLVM, test x$NEED_RADEON_LLVM = xyes)
 AM_CONDITIONAL(USE_R600_LLVM_COMPILER, test x$USE_R600_LLVM_COMPILER = xyes)
 AM_CONDITIONAL(HAVE_LOADER_GALLIUM, test x$enable_gallium_loader = xyes)
-AM_CONDITIONAL(HAVE_DRM_LOADER_GALLIUM, test x$enable_gallium_drm_loader = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_COMPUTE, test x$enable_opencl = xyes)
 AM_CONDITIONAL(HAVE_MESA_LLVM, test x$MESA_LLVM = x1)
 AM_CONDITIONAL(USE_VC4_SIMULATOR, test x$USE_VC4_SIMULATOR = xyes)
diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am b/src/gallium/auxiliary/pipe-loader/Makefile.am
index 8c837996539..e12620c4418 100644
--- a/src/gallium/auxiliary/pipe-loader/Makefile.am
+++ b/src/gallium/auxiliary/pipe-loader/Makefile.am
@@ -14,7 +14,7 @@ noinst_LTLIBRARIES = libpipe_loader.la
 libpipe_loader_la_SOURCES = \
 	$(COMMON_SOURCES)
 
-if HAVE_DRM_LOADER_GALLIUM
+if HAVE_LIBDRM
 AM_CFLAGS += \
 	$(LIBDRM_CFLAGS)
 
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.c b/src/gallium/auxiliary/pipe-loader/pipe_loader.c
index 8e79f853b0a..0ca2e8ce4cd 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.c
@@ -35,7 +35,7 @@
 #define MODULE_PREFIX "pipe_"
 
 static int (*backends[])(struct pipe_loader_device **, int) = {
-#ifdef HAVE_PIPE_LOADER_DRM
+#ifdef HAVE_LIBDRM
    &pipe_loader_drm_probe,
 #endif
    &pipe_loader_sw_probe

From cbc4d9730a7c48744fe7ac62d55b40a80e8fc2e2 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 14:52:37 +0100
Subject: [PATCH 238/335] targets/nine: remove the custom pipe-driver path
 management

Since the up-streaming of nine, the static target was used by default.
The dynamic pipe-drivers being available only via manual tweak of
configure.ac.

As we'll be removing the library_path argument from the pipe-loader with
follow-up commits, we can remove D3D9_DRIVERS_PATH/D3D9_DRIVERS_DIR.
Everyone doing local hacking on nine, or wishing to have a env override
can bring them back within the pipe-loader.

Cc: Axel Davy <axel.davy@ens.fr>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/targets/d3dadapter9/drm.c | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c
index 89789fa3da9..92567ac65a3 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -215,14 +215,6 @@ drm_create_adapter( int fd,
     driOptionCache userInitOptions;
     int throttling_value_user = -2;
 
-#if !GALLIUM_STATIC_TARGETS
-    const char *paths[] = {
-        getenv("D3D9_DRIVERS_PATH"),
-        getenv("D3D9_DRIVERS_DIR"),
-        PIPE_SEARCH_DIR
-    };
-#endif
-
     if (!ctx) { return E_OUTOFMEMORY; }
 
     ctx->base.destroy = drm_destroy;
@@ -243,11 +235,7 @@ drm_create_adapter( int fd,
     }
 
     /* use pipe-loader to create a drm screen (hal) */
-    ctx->base.hal = NULL;
-    for (i = 0; !ctx->base.hal && i < Elements(paths); ++i) {
-        if (!paths[i]) { continue; }
-        ctx->base.hal = pipe_loader_create_screen(ctx->dev, paths[i]);
-    }
+    ctx->base.hal = pipe_loader_create_screen(ctx->dev, PIPE_SEARCH_DIR);
 #endif
     if (!ctx->base.hal) {
         ERR("Unable to load requested driver.\n");
@@ -313,11 +301,7 @@ drm_create_adapter( int fd,
 #else
     /* wrap it to create a software screen that can share resources */
     if (pipe_loader_sw_probe_wrapped(&ctx->swdev, ctx->base.hal)) {
-        ctx->base.ref = NULL;
-        for (i = 0; !ctx->base.ref && i < Elements(paths); ++i) {
-            if (!paths[i]) { continue; }
-            ctx->base.ref = pipe_loader_create_screen(ctx->swdev, paths[i]);
-        }
+        ctx->base.ref = pipe_loader_create_screen(ctx->swdev, PIPE_SEARCH_DIR);
     }
 #endif
     if (!ctx->base.ref) {

From 74d41a32bc179425e866f8afa33a222488ea7760 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 16:37:37 +0100
Subject: [PATCH 239/335] gallium: remove library_path argument from
 pipe_loader_create_screen()

Currently the location is determined at configure/build time and
consistently copied across gallium. Just remove the extra argument, and
use PIPE_SEARCH_DIR where appropriate.

This will allow us to remove the duplication in the *configuration and
*screen_create APIs by moving util_dl_get_proc_address() and friends to
probe time.

v2: rebase on top of vl_winsys_drm.c addition

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/auxiliary/Makefile.am                    | 3 +--
 src/gallium/auxiliary/pipe-loader/Makefile.am        | 1 +
 src/gallium/auxiliary/pipe-loader/pipe_loader.c      | 5 ++---
 src/gallium/auxiliary/pipe-loader/pipe_loader.h      | 6 +-----
 src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c  | 5 ++---
 src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h | 3 +--
 src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c   | 5 ++---
 src/gallium/auxiliary/vl/vl_winsys_dri.c             | 2 +-
 src/gallium/auxiliary/vl/vl_winsys_drm.c             | 6 ++----
 src/gallium/state_trackers/clover/Makefile.am        | 1 -
 src/gallium/state_trackers/clover/core/device.cpp    | 2 +-
 src/gallium/state_trackers/dri/Makefile.am           | 1 -
 src/gallium/state_trackers/dri/dri2.c                | 2 +-
 src/gallium/state_trackers/xa/Makefile.am            | 5 +----
 src/gallium/state_trackers/xa/xa_tracker.c           | 2 +-
 src/gallium/targets/d3dadapter9/Makefile.am          | 5 -----
 src/gallium/targets/d3dadapter9/drm.c                | 4 ++--
 src/gallium/tests/trivial/Makefile.am                | 3 +--
 src/gallium/tests/trivial/compute.c                  | 2 +-
 src/gallium/tests/trivial/quad-tex.c                 | 2 +-
 src/gallium/tests/trivial/tri.c                      | 2 +-
 21 files changed, 23 insertions(+), 44 deletions(-)

diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am
index c33def742f1..0908608d2db 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -66,8 +66,7 @@ COMMON_VL_CFLAGS = \
 	$(AM_CFLAGS) \
 	$(VL_CFLAGS) \
 	$(DRI2PROTO_CFLAGS) \
-	$(LIBDRM_CFLAGS) \
-	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\"
+	$(LIBDRM_CFLAGS)
 
 if HAVE_GALLIUM_STATIC_TARGETS
 COMMON_VL_CFLAGS += \
diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am b/src/gallium/auxiliary/pipe-loader/Makefile.am
index e12620c4418..1597b79598e 100644
--- a/src/gallium/auxiliary/pipe-loader/Makefile.am
+++ b/src/gallium/auxiliary/pipe-loader/Makefile.am
@@ -5,6 +5,7 @@ include $(top_srcdir)/src/gallium/Automake.inc
 AM_CFLAGS = \
 	-I$(top_srcdir)/src/loader \
 	-I$(top_srcdir)/src/gallium/winsys \
+	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" \
 	$(GALLIUM_PIPE_LOADER_DEFINES) \
 	$(GALLIUM_CFLAGS) \
 	$(VISIBILITY_CFLAGS)
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.c b/src/gallium/auxiliary/pipe-loader/pipe_loader.c
index 0ca2e8ce4cd..40df2167797 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.c
@@ -69,10 +69,9 @@ pipe_loader_configuration(struct pipe_loader_device *dev,
 }
 
 struct pipe_screen *
-pipe_loader_create_screen(struct pipe_loader_device *dev,
-                          const char *library_paths)
+pipe_loader_create_screen(struct pipe_loader_device *dev)
 {
-   return dev->ops->create_screen(dev, library_paths);
+   return dev->ops->create_screen(dev);
 }
 
 struct util_dl_library *
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.h b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
index 8eba8a6f008..690d088ed82 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader.h
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
@@ -82,13 +82,9 @@ pipe_loader_probe(struct pipe_loader_device **devs, int ndev);
  * Create a pipe_screen for the specified device.
  *
  * \param dev Device the screen will be created for.
- * \param library_paths Colon-separated list of filesystem paths that
- *                      will be used to look for the pipe driver
- *                      module that handles this device.
  */
 struct pipe_screen *
-pipe_loader_create_screen(struct pipe_loader_device *dev,
-                          const char *library_paths);
+pipe_loader_create_screen(struct pipe_loader_device *dev);
 
 /**
  * Query the configuration parameters for the specified device.
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index 1799df7e4c5..3b858e7425c 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -165,14 +165,13 @@ pipe_loader_drm_configuration(struct pipe_loader_device *dev,
 }
 
 static struct pipe_screen *
-pipe_loader_drm_create_screen(struct pipe_loader_device *dev,
-                              const char *library_paths)
+pipe_loader_drm_create_screen(struct pipe_loader_device *dev)
 {
    struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(dev);
    const struct drm_driver_descriptor *dd;
 
    if (!ddev->lib)
-      ddev->lib = pipe_loader_find_module(dev, library_paths);
+      ddev->lib = pipe_loader_find_module(&ddev->base, PIPE_SEARCH_DIR);
    if (!ddev->lib)
       return NULL;
 
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h b/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h
index d3b025221c5..da2ca8c6e1f 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h
@@ -31,8 +31,7 @@
 #include "pipe_loader.h"
 
 struct pipe_loader_ops {
-   struct pipe_screen *(*create_screen)(struct pipe_loader_device *dev,
-                                        const char *library_paths);
+   struct pipe_screen *(*create_screen)(struct pipe_loader_device *dev);
 
    const struct drm_conf_ret *(*configuration)(struct pipe_loader_device *dev,
                                                enum drm_conf conf);
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index 86039a35ef7..c79f7c9ee34 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -180,14 +180,13 @@ pipe_loader_sw_configuration(struct pipe_loader_device *dev,
 }
 
 static struct pipe_screen *
-pipe_loader_sw_create_screen(struct pipe_loader_device *dev,
-                             const char *library_paths)
+pipe_loader_sw_create_screen(struct pipe_loader_device *dev)
 {
    struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(dev);
    struct pipe_screen *(*init)(struct sw_winsys *);
 
    if (!sdev->lib)
-      sdev->lib = pipe_loader_find_module(dev, library_paths);
+      sdev->lib = pipe_loader_find_module(&sdev->base, PIPE_SEARCH_DIR);
    if (!sdev->lib)
       return NULL;
 
diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri.c b/src/gallium/auxiliary/vl/vl_winsys_dri.c
index e0683a5e7d6..fb16adc966c 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c
@@ -406,7 +406,7 @@ vl_dri2_screen_create(Display *display, int screen)
    scrn->base.pscreen = dd_create_screen(fd);
 #else
    if (pipe_loader_drm_probe_fd(&scrn->base.dev, fd))
-      scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev, PIPE_SEARCH_DIR);
+      scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev);
 #endif // GALLIUM_STATIC_TARGETS
 
    if (!scrn->base.pscreen)
diff --git a/src/gallium/auxiliary/vl/vl_winsys_drm.c b/src/gallium/auxiliary/vl/vl_winsys_drm.c
index c96187bf1c5..f4e8306b67c 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_drm.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_drm.c
@@ -49,10 +49,8 @@ vl_drm_screen_create(int fd)
 #if GALLIUM_STATIC_TARGETS
    vscreen->pscreen = dd_create_screen(fd);
 #else
-   if (pipe_loader_drm_probe_fd(&vscreen->dev, dup(fd))) {
-      vscreen->pscreen =
-         pipe_loader_create_screen(vscreen->dev, PIPE_SEARCH_DIR);
-   }
+   if (pipe_loader_drm_probe_fd(&vscreen->dev, dup(fd)))
+      vscreen->pscreen = pipe_loader_create_screen(vscreen->dev);
 #endif
 
    if (!vscreen->pscreen)
diff --git a/src/gallium/state_trackers/clover/Makefile.am b/src/gallium/state_trackers/clover/Makefile.am
index c6528ff97cb..3c9421692fc 100644
--- a/src/gallium/state_trackers/clover/Makefile.am
+++ b/src/gallium/state_trackers/clover/Makefile.am
@@ -1,7 +1,6 @@
 include Makefile.sources
 
 AM_CPPFLAGS = \
-	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src \
 	-I$(top_srcdir)/src/gallium/include \
diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp
index 6efff79c7f4..1be2f6413f4 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -41,7 +41,7 @@ namespace {
 
 device::device(clover::platform &platform, pipe_loader_device *ldev) :
    platform(platform), ldev(ldev) {
-   pipe = pipe_loader_create_screen(ldev, PIPE_SEARCH_DIR);
+   pipe = pipe_loader_create_screen(ldev);
    if (!pipe || !pipe->get_param(pipe, PIPE_CAP_COMPUTE)) {
       if (pipe)
          pipe->destroy(pipe);
diff --git a/src/gallium/state_trackers/dri/Makefile.am b/src/gallium/state_trackers/dri/Makefile.am
index e407304fed9..102b84390bb 100644
--- a/src/gallium/state_trackers/dri/Makefile.am
+++ b/src/gallium/state_trackers/dri/Makefile.am
@@ -25,7 +25,6 @@ include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
 AM_CPPFLAGS = \
-	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src/mapi \
 	-I$(top_srcdir)/src/mesa \
diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c
index 019414b56fe..a11f3b8d21c 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -1464,7 +1464,7 @@ dri2_init_screen(__DRIscreen * sPriv)
    dmabuf_ret = dd_configuration(DRM_CONF_SHARE_FD);
 #else
    if (pipe_loader_drm_probe_fd(&screen->dev, screen->fd)) {
-      pscreen = pipe_loader_create_screen(screen->dev, PIPE_SEARCH_DIR);
+      pscreen = pipe_loader_create_screen(screen->dev);
 
       throttle_ret = pipe_loader_configuration(screen->dev, DRM_CONF_THROTTLE);
       dmabuf_ret = pipe_loader_configuration(screen->dev, DRM_CONF_SHARE_FD);
diff --git a/src/gallium/state_trackers/xa/Makefile.am b/src/gallium/state_trackers/xa/Makefile.am
index 400a70b300f..0d50c27253b 100644
--- a/src/gallium/state_trackers/xa/Makefile.am
+++ b/src/gallium/state_trackers/xa/Makefile.am
@@ -28,11 +28,8 @@ AM_CFLAGS = \
 	$(GALLIUM_CFLAGS) \
 	$(VISIBILITY_CFLAGS)
 
-AM_CPPFLAGS = \
-	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\"
-
 if HAVE_GALLIUM_STATIC_TARGETS
-AM_CPPFLAGS += \
+AM_CPPFLAGS = \
 	-DGALLIUM_STATIC_TARGETS=1
 endif
 
diff --git a/src/gallium/state_trackers/xa/xa_tracker.c b/src/gallium/state_trackers/xa/xa_tracker.c
index 4fdbdc96ae6..3011598e0d9 100644
--- a/src/gallium/state_trackers/xa/xa_tracker.c
+++ b/src/gallium/state_trackers/xa/xa_tracker.c
@@ -165,7 +165,7 @@ xa_tracker_create(int drm_fd)
     if (loader_fd == -1)
         return NULL;
     if (pipe_loader_drm_probe_fd(&xa->dev, loader_fd))
-	xa->screen = pipe_loader_create_screen(xa->dev, PIPE_SEARCH_DIR);
+	xa->screen = pipe_loader_create_screen(xa->dev);
 #endif
     if (!xa->screen)
 	goto out_no_screen;
diff --git a/src/gallium/targets/d3dadapter9/Makefile.am b/src/gallium/targets/d3dadapter9/Makefile.am
index 7ec5c834419..bd6d620e819 100644
--- a/src/gallium/targets/d3dadapter9/Makefile.am
+++ b/src/gallium/targets/d3dadapter9/Makefile.am
@@ -38,11 +38,6 @@ if HAVE_GALLIUM_STATIC_TARGETS
 AM_CPPFLAGS = \
 	-DGALLIUM_STATIC_TARGETS=1
 
-else
-
-AM_CPPFLAGS = \
-	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\"
-
 endif
 
 ninedir = $(D3D_DRIVER_INSTALL_DIR)
diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c
index 92567ac65a3..78896cb34fd 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -235,7 +235,7 @@ drm_create_adapter( int fd,
     }
 
     /* use pipe-loader to create a drm screen (hal) */
-    ctx->base.hal = pipe_loader_create_screen(ctx->dev, PIPE_SEARCH_DIR);
+    ctx->base.hal = pipe_loader_create_screen(ctx->dev);
 #endif
     if (!ctx->base.hal) {
         ERR("Unable to load requested driver.\n");
@@ -301,7 +301,7 @@ drm_create_adapter( int fd,
 #else
     /* wrap it to create a software screen that can share resources */
     if (pipe_loader_sw_probe_wrapped(&ctx->swdev, ctx->base.hal)) {
-        ctx->base.ref = pipe_loader_create_screen(ctx->swdev, PIPE_SEARCH_DIR);
+        ctx->base.ref = pipe_loader_create_screen(ctx->swdev);
     }
 #endif
     if (!ctx->base.ref) {
diff --git a/src/gallium/tests/trivial/Makefile.am b/src/gallium/tests/trivial/Makefile.am
index 26783ab6f6d..4a86dc8cfa0 100644
--- a/src/gallium/tests/trivial/Makefile.am
+++ b/src/gallium/tests/trivial/Makefile.am
@@ -7,8 +7,7 @@ AM_CFLAGS = \
 
 AM_CPPFLAGS = \
 	-I$(top_srcdir)/src/gallium/drivers \
-	-I$(top_srcdir)/src/gallium/winsys \
-	-DPIPE_SEARCH_DIR=\"$(PIPE_SRC_DIR)/.libs\"
+	-I$(top_srcdir)/src/gallium/winsys
 
 LDADD = \
 	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
diff --git a/src/gallium/tests/trivial/compute.c b/src/gallium/tests/trivial/compute.c
index b344f78b25c..bcdfb11c4f1 100644
--- a/src/gallium/tests/trivial/compute.c
+++ b/src/gallium/tests/trivial/compute.c
@@ -74,7 +74,7 @@ static void init_ctx(struct context *ctx)
         ret = pipe_loader_probe(&ctx->dev, 1);
         assert(ret);
 
-        ctx->screen = pipe_loader_create_screen(ctx->dev, PIPE_SEARCH_DIR);
+        ctx->screen = pipe_loader_create_screen(ctx->dev);
         assert(ctx->screen);
 
         ctx->pipe = ctx->screen->context_create(ctx->screen, NULL, 0);
diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c
index f66f63043da..4c5a9200a52 100644
--- a/src/gallium/tests/trivial/quad-tex.c
+++ b/src/gallium/tests/trivial/quad-tex.c
@@ -96,7 +96,7 @@ static void init_prog(struct program *p)
 	assert(ret);
 
 	/* init a pipe screen */
-	p->screen = pipe_loader_create_screen(p->dev, PIPE_SEARCH_DIR);
+	p->screen = pipe_loader_create_screen(p->dev);
 	assert(p->screen);
 
 	/* create the pipe driver context and cso context */
diff --git a/src/gallium/tests/trivial/tri.c b/src/gallium/tests/trivial/tri.c
index a555200842e..c71a63f44e5 100644
--- a/src/gallium/tests/trivial/tri.c
+++ b/src/gallium/tests/trivial/tri.c
@@ -91,7 +91,7 @@ static void init_prog(struct program *p)
 	assert(ret);
 
 	/* init a pipe screen */
-	p->screen = pipe_loader_create_screen(p->dev, PIPE_SEARCH_DIR);
+	p->screen = pipe_loader_create_screen(p->dev);
 	assert(p->screen);
 
 	/* create the pipe driver context and cso context */

From 6d68d714c0ef6afa9666b6ed5f45bf998024805f Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 16:16:52 +0100
Subject: [PATCH 240/335] gallium/tests: remove unneeded include paths

The tests don't (and shouldn't) need to have anything driver and/or
winsys specific.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/tests/trivial/Makefile.am | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/gallium/tests/trivial/Makefile.am b/src/gallium/tests/trivial/Makefile.am
index 4a86dc8cfa0..b30cb13eee6 100644
--- a/src/gallium/tests/trivial/Makefile.am
+++ b/src/gallium/tests/trivial/Makefile.am
@@ -5,10 +5,6 @@ PIPE_SRC_DIR = $(top_builddir)/src/gallium/targets/pipe-loader
 AM_CFLAGS = \
 	$(GALLIUM_CFLAGS)
 
-AM_CPPFLAGS = \
-	-I$(top_srcdir)/src/gallium/drivers \
-	-I$(top_srcdir)/src/gallium/winsys
-
 LDADD = \
 	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
 	$(top_builddir)/src/gallium/auxiliary/libgallium.la \

From 4e3c06a501cbba8cc463c3c244d6cb838e3be782 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 19:35:51 +0100
Subject: [PATCH 241/335] pipe-loader: add pipe_loader_sw_probe_init_common()
 helper

Allows us to fold the duplication in pipe_loader_sw_probe_*().

Cc: Tom Stellard <thomas.stellard@amd.com>
Cc: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 .../auxiliary/pipe-loader/pipe_loader_sw.c    | 38 +++++++++----------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index c79f7c9ee34..4b6e884cd9f 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -50,6 +50,19 @@ static struct sw_winsys *(*backends[])() = {
    null_sw_create
 };
 
+static bool
+pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev)
+{
+   if (!sdev->ws)
+      return false;
+
+   sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
+   sdev->base.driver_name = "swrast";
+   sdev->base.ops = &pipe_loader_sw_ops;
+
+   return true;
+}
+
 #ifdef HAVE_PIPE_LOADER_DRI
 bool
 pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_funcs *drisw_lf)
@@ -59,11 +72,8 @@ pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_f
    if (!sdev)
       return false;
 
-   sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
-   sdev->base.driver_name = "swrast";
-   sdev->base.ops = &pipe_loader_sw_ops;
    sdev->ws = dri_create_sw_winsys(drisw_lf);
-   if (!sdev->ws) {
+   if (!pipe_loader_sw_probe_init_common(sdev)) {
       FREE(sdev);
       return false;
    }
@@ -82,11 +92,8 @@ pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd)
    if (!sdev)
       return false;
 
-   sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
-   sdev->base.driver_name = "swrast";
-   sdev->base.ops = &pipe_loader_sw_ops;
    sdev->ws = kms_dri_create_winsys(fd);
-   if (!sdev->ws) {
+   if (!pipe_loader_sw_probe_init_common(sdev)) {
       FREE(sdev);
       return false;
    }
@@ -104,11 +111,8 @@ pipe_loader_sw_probe_null(struct pipe_loader_device **devs)
    if (!sdev)
       return false;
 
-   sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
-   sdev->base.driver_name = "swrast";
-   sdev->base.ops = &pipe_loader_sw_ops;
    sdev->ws = null_sw_create();
-   if (!sdev->ws) {
+   if (!pipe_loader_sw_probe_init_common(sdev)) {
       FREE(sdev);
       return false;
    }
@@ -127,10 +131,8 @@ pipe_loader_sw_probe(struct pipe_loader_device **devs, int ndev)
          struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
 	 /* TODO: handle CALLOC_STRUCT failure */
 
-         sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
-         sdev->base.driver_name = "swrast";
-         sdev->base.ops = &pipe_loader_sw_ops;
          sdev->ws = backends[i]();
+         pipe_loader_sw_probe_init_common(sdev);
          devs[i] = &sdev->base;
       }
    }
@@ -147,12 +149,8 @@ pipe_loader_sw_probe_wrapped(struct pipe_loader_device **dev,
    if (!sdev)
       return false;
 
-   sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
-   sdev->base.driver_name = "swrast";
-   sdev->base.ops = &pipe_loader_sw_ops;
    sdev->ws = wrapper_sw_winsys_wrap_pipe_screen(screen);
-
-   if (!sdev->ws) {
+   if (!pipe_loader_sw_probe_init_common(sdev)) {
       FREE(sdev);
       return false;
    }

From ff9cd8a67cabe4a7bbfec941666a216617f18103 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 21:31:24 +0100
Subject: [PATCH 242/335] pipe-loader: directly use pipe_loader_sw_probe_null()
 at probe time

Due to the nature of the other sw winsys' we cannot use them during the
generic probe stage. As such there is little point in keeping the
abstraction layer.

Cc: Tom Stellard <thomas.stellard@amd.com>
Cc: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 .../auxiliary/pipe-loader/pipe_loader_sw.c      | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index 4b6e884cd9f..c61f2b8882c 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -46,10 +46,6 @@ struct pipe_loader_sw_device {
 
 static struct pipe_loader_ops pipe_loader_sw_ops;
 
-static struct sw_winsys *(*backends[])() = {
-   null_sw_create
-};
-
 static bool
 pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev)
 {
@@ -124,16 +120,11 @@ pipe_loader_sw_probe_null(struct pipe_loader_device **devs)
 int
 pipe_loader_sw_probe(struct pipe_loader_device **devs, int ndev)
 {
-   int i;
+   int i = 1;
 
-   for (i = 0; i < Elements(backends); i++) {
-      if (i < ndev) {
-         struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
-	 /* TODO: handle CALLOC_STRUCT failure */
-
-         sdev->ws = backends[i]();
-         pipe_loader_sw_probe_init_common(sdev);
-         devs[i] = &sdev->base;
+   if (i < ndev) {
+      if (!pipe_loader_sw_probe_null(devs)) {
+         i--;
       }
    }
 

From f58a6f7be3efa6a13d7ac321f304de2703870def Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Sat, 17 Oct 2015 23:23:49 +0100
Subject: [PATCH 243/335] gallium: keep the libdrm link alongside libkmsdri.la

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/Automake.inc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/Automake.inc b/src/gallium/Automake.inc
index 095e6ec55fb..6fe2e22fecf 100644
--- a/src/gallium/Automake.inc
+++ b/src/gallium/Automake.inc
@@ -70,5 +70,6 @@ endif
 
 if HAVE_DRISW_KMS
 GALLIUM_PIPE_LOADER_WINSYS_LIBS += \
-	$(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la
+	$(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \
+	$(LIBDRM_LIBS)
 endif

From d54ca54faa2a6dde3c4d2125fd41d10dfcf2f91e Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Sat, 17 Oct 2015 21:51:24 +0100
Subject: [PATCH 244/335] pipe-loader: rework the sw backend

Move the winsys into the pipe-target, similar to the hardware
pipe-driver.

v2:
 - move int declaration outside of loop (Brian)
 - fold the teardown into a goto + separate function.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 .../auxiliary/pipe-loader/pipe_loader_sw.c    | 133 +++++++++++++-----
 src/gallium/include/state_tracker/sw_driver.h |  21 +++
 src/gallium/targets/d3dadapter9/Makefile.am   |   3 +-
 src/gallium/targets/dri/Makefile.am           |   3 +-
 src/gallium/targets/omx/Makefile.am           |   3 +-
 src/gallium/targets/opencl/Makefile.am        |   1 -
 src/gallium/targets/pipe-loader/Makefile.am   |   5 +
 src/gallium/targets/pipe-loader/pipe.sym      |   2 +-
 src/gallium/targets/pipe-loader/pipe_swrast.c |  34 ++++-
 src/gallium/targets/va/Makefile.am            |   3 +-
 src/gallium/targets/vdpau/Makefile.am         |   3 +-
 src/gallium/targets/xa/Makefile.am            |   3 +-
 src/gallium/targets/xvmc/Makefile.am          |   3 +-
 src/gallium/tests/trivial/Makefile.am         |   1 -
 14 files changed, 161 insertions(+), 57 deletions(-)
 create mode 100644 src/gallium/include/state_tracker/sw_driver.h

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index c61f2b8882c..816ff1c85d3 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -35,9 +35,11 @@
 #include "sw/wrapper/wrapper_sw_winsys.h"
 #include "target-helpers/inline_sw_helper.h"
 #include "state_tracker/drisw_api.h"
+#include "state_tracker/sw_driver.h"
 
 struct pipe_loader_sw_device {
    struct pipe_loader_device base;
+   const struct sw_driver_descriptor *dd;
    struct util_dl_library *lib;
    struct sw_winsys *ws;
 };
@@ -49,33 +51,62 @@ static struct pipe_loader_ops pipe_loader_sw_ops;
 static bool
 pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev)
 {
-   if (!sdev->ws)
-      return false;
-
    sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
    sdev->base.driver_name = "swrast";
    sdev->base.ops = &pipe_loader_sw_ops;
 
+   sdev->lib = pipe_loader_find_module(&sdev->base, PIPE_SEARCH_DIR);
+   if (!sdev->lib)
+      return false;
+
+   sdev->dd = (const struct sw_driver_descriptor *)
+      util_dl_get_proc_address(sdev->lib, "swrast_driver_descriptor");
+
+   if (!sdev->dd){
+      util_dl_close(sdev->lib);
+      sdev->lib = NULL;
+      return false;
+   }
+
    return true;
 }
 
+static void
+pipe_loader_sw_probe_teardown_common(struct pipe_loader_sw_device *sdev)
+{
+   if (sdev->lib)
+      util_dl_close(sdev->lib);
+}
+
 #ifdef HAVE_PIPE_LOADER_DRI
 bool
 pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_funcs *drisw_lf)
 {
    struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
+   int i;
 
    if (!sdev)
       return false;
 
-   sdev->ws = dri_create_sw_winsys(drisw_lf);
-   if (!pipe_loader_sw_probe_init_common(sdev)) {
-      FREE(sdev);
-      return false;
-   }
-   *devs = &sdev->base;
+   if (!pipe_loader_sw_probe_init_common(sdev))
+      goto fail;
 
+   for (i = 0; sdev->dd->winsys; i++) {
+      if (strcmp(sdev->dd->winsys[i].name, "dri") == 0) {
+         sdev->ws = sdev->dd->winsys[i].create_winsys(drisw_lf);
+         break;
+      }
+   }
+   if (!sdev->ws)
+      goto fail;
+
+   *devs = &sdev->base;
    return true;
+
+fail:
+   pipe_loader_sw_probe_teardown_common(sdev);
+   FREE(sdev);
+   return false;
 }
 #endif
 
@@ -84,18 +115,30 @@ bool
 pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd)
 {
    struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
+   int i;
 
    if (!sdev)
       return false;
 
-   sdev->ws = kms_dri_create_winsys(fd);
-   if (!pipe_loader_sw_probe_init_common(sdev)) {
-      FREE(sdev);
-      return false;
-   }
-   *devs = &sdev->base;
+   if (!pipe_loader_sw_probe_init_common(sdev))
+      goto fail;
 
+   for (i = 0; sdev->dd->winsys; i++) {
+      if (strcmp(sdev->dd->winsys[i].name, "kms_dri") == 0) {
+         sdev->ws = sdev->dd->winsys[i].create_winsys(fd);
+         break;
+      }
+   }
+   if (!sdev->ws)
+      goto fail;
+
+   *devs = &sdev->base;
    return true;
+
+fail:
+   pipe_loader_sw_probe_teardown_common(sdev);
+   FREE(sdev);
+   return false;
 }
 #endif
 
@@ -103,18 +146,30 @@ bool
 pipe_loader_sw_probe_null(struct pipe_loader_device **devs)
 {
    struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
+   int i;
 
    if (!sdev)
       return false;
 
-   sdev->ws = null_sw_create();
-   if (!pipe_loader_sw_probe_init_common(sdev)) {
-      FREE(sdev);
-      return false;
-   }
-   *devs = &sdev->base;
+   if (!pipe_loader_sw_probe_init_common(sdev))
+      goto fail;
 
+   for (i = 0; sdev->dd->winsys; i++) {
+      if (strcmp(sdev->dd->winsys[i].name, "null") == 0) {
+         sdev->ws = sdev->dd->winsys[i].create_winsys();
+         break;
+      }
+   }
+   if (!sdev->ws)
+      goto fail;
+
+   *devs = &sdev->base;
    return true;
+
+fail:
+   pipe_loader_sw_probe_teardown_common(sdev);
+   FREE(sdev);
+   return false;
 }
 
 int
@@ -136,17 +191,30 @@ pipe_loader_sw_probe_wrapped(struct pipe_loader_device **dev,
                              struct pipe_screen *screen)
 {
    struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
+   int i;
 
    if (!sdev)
       return false;
 
-   sdev->ws = wrapper_sw_winsys_wrap_pipe_screen(screen);
-   if (!pipe_loader_sw_probe_init_common(sdev)) {
-      FREE(sdev);
-      return false;
+   if (!pipe_loader_sw_probe_init_common(sdev))
+      goto fail;
+
+   for (i = 0; sdev->dd->winsys; i++) {
+      if (strcmp(sdev->dd->winsys[i].name, "wrapped") == 0) {
+         sdev->ws = sdev->dd->winsys[i].create_winsys(screen);
+         break;
+      }
    }
+   if (!sdev->ws)
+      goto fail;
+
    *dev = &sdev->base;
    return true;
+
+fail:
+   pipe_loader_sw_probe_teardown_common(sdev);
+   FREE(sdev);
+   return false;
 }
 
 static void
@@ -172,21 +240,8 @@ static struct pipe_screen *
 pipe_loader_sw_create_screen(struct pipe_loader_device *dev)
 {
    struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(dev);
-   struct pipe_screen *(*init)(struct sw_winsys *);
 
-   if (!sdev->lib)
-      sdev->lib = pipe_loader_find_module(&sdev->base, PIPE_SEARCH_DIR);
-   if (!sdev->lib)
-      return NULL;
-
-   init = (void *)util_dl_get_proc_address(sdev->lib, "swrast_create_screen");
-   if (!init){
-      util_dl_close(sdev->lib);
-      sdev->lib = NULL;
-      return NULL;
-   }
-
-   return init(sdev->ws);
+   return sdev->dd->create_screen(sdev->ws);
 }
 
 static struct pipe_loader_ops pipe_loader_sw_ops = {
diff --git a/src/gallium/include/state_tracker/sw_driver.h b/src/gallium/include/state_tracker/sw_driver.h
new file mode 100644
index 00000000000..0eb2b44d6fd
--- /dev/null
+++ b/src/gallium/include/state_tracker/sw_driver.h
@@ -0,0 +1,21 @@
+
+#ifndef _SW_DRIVER_H_
+#define _SW_DRIVER_H_
+
+#include "pipe/p_compiler.h"
+
+struct pipe_screen;
+struct sw_winsys;
+
+struct sw_driver_descriptor
+{
+   struct pipe_screen *(*create_screen)(struct sw_winsys *ws);
+   struct {
+       const char * const name;
+       struct sw_winsys *(*create_winsys)();
+   } winsys[];
+};
+
+extern struct sw_driver_descriptor swrast_driver_descriptor;
+
+#endif
diff --git a/src/gallium/targets/d3dadapter9/Makefile.am b/src/gallium/targets/d3dadapter9/Makefile.am
index bd6d620e819..d125ba8918d 100644
--- a/src/gallium/targets/d3dadapter9/Makefile.am
+++ b/src/gallium/targets/d3dadapter9/Makefile.am
@@ -110,8 +110,7 @@ d3dadapter9_la_LIBADD += $(TARGET_LIB_DEPS) \
 else # HAVE_GALLIUM_STATIC_TARGETS
 
 d3dadapter9_la_LIBADD += \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
-	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am
index 95efdd4451c..038a12bfdfe 100644
--- a/src/gallium/targets/dri/Makefile.am
+++ b/src/gallium/targets/dri/Makefile.am
@@ -98,8 +98,7 @@ gallium_dri_la_LIBADD += $(TARGET_LIB_DEPS) \
 else # HAVE_GALLIUM_STATIC_TARGETS
 
 gallium_dri_la_LIBADD += \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
-	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff --git a/src/gallium/targets/omx/Makefile.am b/src/gallium/targets/omx/Makefile.am
index a4dff487dd8..2454cbe424a 100644
--- a/src/gallium/targets/omx/Makefile.am
+++ b/src/gallium/targets/omx/Makefile.am
@@ -56,8 +56,7 @@ libomx_mesa_la_LIBADD += $(TARGET_LIB_DEPS) \
 else # HAVE_GALLIUM_STATIC_TARGETS
 
 libomx_mesa_la_LIBADD += \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
-	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff --git a/src/gallium/targets/opencl/Makefile.am b/src/gallium/targets/opencl/Makefile.am
index c78b26832ff..004d6d786df 100644
--- a/src/gallium/targets/opencl/Makefile.am
+++ b/src/gallium/targets/opencl/Makefile.am
@@ -19,7 +19,6 @@ lib@OPENCL_LIBNAME@_la_LIBADD = \
 	$(top_builddir)/src/gallium/state_trackers/clover/libclover.la \
 	$(top_builddir)/src/gallium/auxiliary/libgallium.la \
 	$(top_builddir)/src/util/libmesautil.la \
-	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
 	$(ELF_LIB) \
 	-ldl \
 	-lclangCodeGen \
diff --git a/src/gallium/targets/pipe-loader/Makefile.am b/src/gallium/targets/pipe-loader/Makefile.am
index 4f25b4f6073..4bc3b55f26b 100644
--- a/src/gallium/targets/pipe-loader/Makefile.am
+++ b/src/gallium/targets/pipe-loader/Makefile.am
@@ -27,6 +27,7 @@ AM_CPPFLAGS = \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src/gallium/drivers \
 	-I$(top_srcdir)/src/gallium/winsys \
+	$(GALLIUM_PIPE_LOADER_DEFINES) \
 	$(LIBDRM_CFLAGS) \
 	$(VISIBILITY_CFLAGS) \
 	-DGALLIUM_RBUG \
@@ -208,6 +209,10 @@ AM_CPPFLAGS += -DGALLIUM_LLVMPIPE
 pipe_swrast_la_LIBADD += \
 	$(top_builddir)/src/gallium/drivers/llvmpipe/libllvmpipe.la
 endif
+
+pipe_swrast_la_LIBADD += \
+	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
+
 endif
 
 EXTRA_DIST = pipe.sym
diff --git a/src/gallium/targets/pipe-loader/pipe.sym b/src/gallium/targets/pipe-loader/pipe.sym
index 19b1d77b040..b2fa619f7de 100644
--- a/src/gallium/targets/pipe-loader/pipe.sym
+++ b/src/gallium/targets/pipe-loader/pipe.sym
@@ -1,7 +1,7 @@
 {
 	global:
 		driver_descriptor;
-		swrast_create_screen;
+		swrast_driver_descriptor;
 	local:
 		*;
 };
diff --git a/src/gallium/targets/pipe-loader/pipe_swrast.c b/src/gallium/targets/pipe-loader/pipe_swrast.c
index f7f354acf3f..cf617f37e20 100644
--- a/src/gallium/targets/pipe-loader/pipe_swrast.c
+++ b/src/gallium/targets/pipe-loader/pipe_swrast.c
@@ -1,7 +1,11 @@
 
 #include "target-helpers/inline_sw_helper.h"
 #include "target-helpers/inline_debug_helper.h"
-#include "state_tracker/drm_driver.h"
+#include "state_tracker/sw_driver.h"
+#include "sw/dri/dri_sw_winsys.h"
+#include "sw/kms-dri/kms_dri_sw_winsys.h"
+#include "sw/null/null_sw_winsys.h"
+#include "sw/wrapper/wrapper_sw_winsys.h"
 
 PUBLIC struct pipe_screen *
 swrast_create_screen(struct sw_winsys *ws);
@@ -17,3 +21,31 @@ swrast_create_screen(struct sw_winsys *ws)
 
    return screen;
 }
+
+PUBLIC
+struct sw_driver_descriptor swrast_driver_descriptor = {
+   .create_screen = swrast_create_screen,
+   .winsys = {
+#ifdef HAVE_PIPE_LOADER_DRI
+      {
+         .name = "dri",
+         .create_winsys = dri_create_sw_winsys,
+      },
+#endif
+#ifdef HAVE_PIPE_LOADER_KMS
+      {
+         .name = "kms_dri",
+         .create_winsys = kms_dri_create_winsys,
+      },
+#endif
+      {
+         .name = "null",
+         .create_winsys = null_sw_create,
+      },
+      {
+         .name = "wrapped",
+         .create_winsys = wrapper_sw_winsys_wrap_pipe_screen,
+      },
+      { 0 },
+   }
+};
diff --git a/src/gallium/targets/va/Makefile.am b/src/gallium/targets/va/Makefile.am
index 9613f041b58..2fd24a8bdd9 100644
--- a/src/gallium/targets/va/Makefile.am
+++ b/src/gallium/targets/va/Makefile.am
@@ -53,8 +53,7 @@ gallium_drv_video_la_LIBADD += $(TARGET_LIB_DEPS) \
 else # HAVE_GALLIUM_STATIC_TARGETS
 
 gallium_drv_video_la_LIBADD += \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
-	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff --git a/src/gallium/targets/vdpau/Makefile.am b/src/gallium/targets/vdpau/Makefile.am
index 7eb62c1cc78..34b7ef40b20 100644
--- a/src/gallium/targets/vdpau/Makefile.am
+++ b/src/gallium/targets/vdpau/Makefile.am
@@ -65,8 +65,7 @@ libvdpau_gallium_la_LIBADD += $(TARGET_LIB_DEPS) \
 else # HAVE_GALLIUM_STATIC_TARGETS
 
 libvdpau_gallium_la_LIBADD += \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
-	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff --git a/src/gallium/targets/xa/Makefile.am b/src/gallium/targets/xa/Makefile.am
index 02c42c665ed..0fba3b2f3a1 100644
--- a/src/gallium/targets/xa/Makefile.am
+++ b/src/gallium/targets/xa/Makefile.am
@@ -79,8 +79,7 @@ libxatracker_la_LIBADD += $(TARGET_LIB_DEPS)
 else # HAVE_GALLIUM_STATIC_TARGETS
 
 libxatracker_la_LIBADD += \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
-	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff --git a/src/gallium/targets/xvmc/Makefile.am b/src/gallium/targets/xvmc/Makefile.am
index b3285890822..f1045d4f745 100644
--- a/src/gallium/targets/xvmc/Makefile.am
+++ b/src/gallium/targets/xvmc/Makefile.am
@@ -53,8 +53,7 @@ libXvMCgallium_la_LIBADD += $(TARGET_LIB_DEPS) \
 
 else # HAVE_GALLIUM_STATIC_TARGETS
 libXvMCgallium_la_LIBADD += \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
-	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff --git a/src/gallium/tests/trivial/Makefile.am b/src/gallium/tests/trivial/Makefile.am
index b30cb13eee6..175bef2d3d4 100644
--- a/src/gallium/tests/trivial/Makefile.am
+++ b/src/gallium/tests/trivial/Makefile.am
@@ -9,7 +9,6 @@ LDADD = \
 	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
 	$(top_builddir)/src/gallium/auxiliary/libgallium.la \
 	$(top_builddir)/src/util/libmesautil.la \
-	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
 	$(GALLIUM_COMMON_LIB_DEPS)
 
 noinst_PROGRAMS = compute tri quad-tex

From 46991ab9aac99c5ba55b735396f32447e75a6320 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Sat, 17 Oct 2015 20:53:16 +0100
Subject: [PATCH 245/335] pipe-loader: teardown the winsys, if create_screen
 fails

i.e. plug some (hard to hit) memory leaks.

v2: fix rebase fallout - really teardown the winsys (Brian)
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index 816ff1c85d3..85e06d312ed 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -240,8 +240,13 @@ static struct pipe_screen *
 pipe_loader_sw_create_screen(struct pipe_loader_device *dev)
 {
    struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(dev);
+   struct pipe_screen *screen;
 
-   return sdev->dd->create_screen(sdev->ws);
+   screen = sdev->dd->create_screen(sdev->ws);
+   if (!screen)
+      sdev->ws->destroy(sdev->ws);
+
+   return screen;
 }
 
 static struct pipe_loader_ops pipe_loader_sw_ops = {

From e465de5a51dbb1af50da1a44353867adc45d57c7 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 21:35:50 +0100
Subject: [PATCH 246/335] pipe-loader: annotate the ops as const data

Already defined as such in struct pipe_loader_device::ops.

Cc: Tom Stellard <thomas.stellard@amd.com>
Cc: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c | 4 ++--
 src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index 3b858e7425c..d4cb317447a 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -56,7 +56,7 @@ struct pipe_loader_drm_device {
 
 #define pipe_loader_drm_device(dev) ((struct pipe_loader_drm_device *)dev)
 
-static struct pipe_loader_ops pipe_loader_drm_ops;
+static const struct pipe_loader_ops pipe_loader_drm_ops;
 
 bool
 pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd)
@@ -185,7 +185,7 @@ pipe_loader_drm_create_screen(struct pipe_loader_device *dev)
    return dd->create_screen(ddev->fd);
 }
 
-static struct pipe_loader_ops pipe_loader_drm_ops = {
+static const struct pipe_loader_ops pipe_loader_drm_ops = {
    .create_screen = pipe_loader_drm_create_screen,
    .configuration = pipe_loader_drm_configuration,
    .release = pipe_loader_drm_release
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index 85e06d312ed..3db53e199e9 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -46,7 +46,7 @@ struct pipe_loader_sw_device {
 
 #define pipe_loader_sw_device(dev) ((struct pipe_loader_sw_device *)dev)
 
-static struct pipe_loader_ops pipe_loader_sw_ops;
+static const struct pipe_loader_ops pipe_loader_sw_ops;
 
 static bool
 pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev)
@@ -249,7 +249,7 @@ pipe_loader_sw_create_screen(struct pipe_loader_device *dev)
    return screen;
 }
 
-static struct pipe_loader_ops pipe_loader_sw_ops = {
+static const struct pipe_loader_ops pipe_loader_sw_ops = {
    .create_screen = pipe_loader_sw_create_screen,
    .configuration = pipe_loader_sw_configuration,
    .release = pipe_loader_sw_release

From 3ca12ee976e44a1126775a8e801889d42dd06980 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Sat, 17 Oct 2015 21:51:45 +0100
Subject: [PATCH 247/335] pipe-loader: dlopen/dlsym the pipe-driver at probe
 time

Rather than giving false hopes that things might work, just check at
probe time. This allows us to remove the duplication and consolidate
the code wrt the upcomming static pipe-loader.

Cc: Tom Stellard <thomas.stellard@amd.com>
Cc: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 .../auxiliary/pipe-loader/pipe_loader_drm.c   | 44 +++++++------------
 1 file changed, 17 insertions(+), 27 deletions(-)

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index d4cb317447a..33274deeec5 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -50,6 +50,7 @@
 
 struct pipe_loader_drm_device {
    struct pipe_loader_device base;
+   const struct drm_driver_descriptor *dd;
    struct util_dl_library *lib;
    int fd;
 };
@@ -81,10 +82,23 @@ pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd)
    if (!ddev->base.driver_name)
       goto fail;
 
+   ddev->lib = pipe_loader_find_module(dev, PIPE_SEARCH_DIR);
+   if (!ddev->lib)
+      return fail;
+
+   ddev->dd = (const struct drm_driver_descriptor *)
+      util_dl_get_proc_address(ddev->lib, "driver_descriptor");
+
+   /* sanity check on the name */
+   if (!ddev->dd || strcmp(ddev->dd->name, ddev->base.driver_name) != 0)
+       goto fail;
+
    *dev = &ddev->base;
    return true;
 
   fail:
+   if (ddev->lib)
+      util_dl_close(ddev->lib);
    FREE(ddev);
    return false;
 }
@@ -146,43 +160,19 @@ pipe_loader_drm_configuration(struct pipe_loader_device *dev,
                               enum drm_conf conf)
 {
    struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(dev);
-   const struct drm_driver_descriptor *dd;
 
-   if (!ddev->lib)
+   if (!ddev->dd->configuration)
       return NULL;
 
-   dd = (const struct drm_driver_descriptor *)
-      util_dl_get_proc_address(ddev->lib, "driver_descriptor");
-
-   /* sanity check on the name */
-   if (!dd || strcmp(dd->name, ddev->base.driver_name) != 0)
-      return NULL;
-
-   if (!dd->configuration)
-      return NULL;
-
-   return dd->configuration(conf);
+   return ddev->dd->configuration(conf);
 }
 
 static struct pipe_screen *
 pipe_loader_drm_create_screen(struct pipe_loader_device *dev)
 {
    struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(dev);
-   const struct drm_driver_descriptor *dd;
 
-   if (!ddev->lib)
-      ddev->lib = pipe_loader_find_module(&ddev->base, PIPE_SEARCH_DIR);
-   if (!ddev->lib)
-      return NULL;
-
-   dd = (const struct drm_driver_descriptor *)
-      util_dl_get_proc_address(ddev->lib, "driver_descriptor");
-
-   /* sanity check on the name */
-   if (!dd || strcmp(dd->name, ddev->base.driver_name) != 0)
-      return NULL;
-
-   return dd->create_screen(ddev->fd);
+   return ddev->dd->create_screen(ddev->fd);
 }
 
 static const struct pipe_loader_ops pipe_loader_drm_ops = {

From ad12027d8f8ff37ffe14ce17f9d79466b6ffeb32 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Sat, 17 Oct 2015 22:06:17 +0100
Subject: [PATCH 248/335] gallium: rename libpipe_loader to
 libpipe_loader_dynamic

With the next commits we'll introduce a 'static' version, which will
essentially load the statically linked-in pipe-drivers, rather than the
standalone pipe-$foo.so ones.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/auxiliary/pipe-loader/Makefile.am | 8 ++++----
 src/gallium/targets/d3dadapter9/Makefile.am   | 2 +-
 src/gallium/targets/dri/Makefile.am           | 2 +-
 src/gallium/targets/omx/Makefile.am           | 2 +-
 src/gallium/targets/opencl/Makefile.am        | 2 +-
 src/gallium/targets/va/Makefile.am            | 2 +-
 src/gallium/targets/vdpau/Makefile.am         | 2 +-
 src/gallium/targets/xa/Makefile.am            | 2 +-
 src/gallium/targets/xvmc/Makefile.am          | 2 +-
 src/gallium/tests/trivial/Makefile.am         | 2 +-
 10 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am b/src/gallium/auxiliary/pipe-loader/Makefile.am
index 1597b79598e..974cf08a10b 100644
--- a/src/gallium/auxiliary/pipe-loader/Makefile.am
+++ b/src/gallium/auxiliary/pipe-loader/Makefile.am
@@ -10,19 +10,19 @@ AM_CFLAGS = \
 	$(GALLIUM_CFLAGS) \
 	$(VISIBILITY_CFLAGS)
 
-noinst_LTLIBRARIES = libpipe_loader.la
+noinst_LTLIBRARIES = libpipe_loader_dynamic.la
 
-libpipe_loader_la_SOURCES = \
+libpipe_loader_dynamic_la_SOURCES = \
 	$(COMMON_SOURCES)
 
 if HAVE_LIBDRM
 AM_CFLAGS += \
 	$(LIBDRM_CFLAGS)
 
-libpipe_loader_la_SOURCES += \
+libpipe_loader_dynamic_la_SOURCES += \
 	$(DRM_SOURCES)
 
-libpipe_loader_la_LIBADD = \
+libpipe_loader_dynamic_la_LIBADD = \
 	$(top_builddir)/src/loader/libloader.la
 
 endif
diff --git a/src/gallium/targets/d3dadapter9/Makefile.am b/src/gallium/targets/d3dadapter9/Makefile.am
index d125ba8918d..776f86bda6d 100644
--- a/src/gallium/targets/d3dadapter9/Makefile.am
+++ b/src/gallium/targets/d3dadapter9/Makefile.am
@@ -110,7 +110,7 @@ d3dadapter9_la_LIBADD += $(TARGET_LIB_DEPS) \
 else # HAVE_GALLIUM_STATIC_TARGETS
 
 d3dadapter9_la_LIBADD += \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am
index 038a12bfdfe..e3a145f4d30 100644
--- a/src/gallium/targets/dri/Makefile.am
+++ b/src/gallium/targets/dri/Makefile.am
@@ -98,7 +98,7 @@ gallium_dri_la_LIBADD += $(TARGET_LIB_DEPS) \
 else # HAVE_GALLIUM_STATIC_TARGETS
 
 gallium_dri_la_LIBADD += \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff --git a/src/gallium/targets/omx/Makefile.am b/src/gallium/targets/omx/Makefile.am
index 2454cbe424a..f9c0842179d 100644
--- a/src/gallium/targets/omx/Makefile.am
+++ b/src/gallium/targets/omx/Makefile.am
@@ -56,7 +56,7 @@ libomx_mesa_la_LIBADD += $(TARGET_LIB_DEPS) \
 else # HAVE_GALLIUM_STATIC_TARGETS
 
 libomx_mesa_la_LIBADD += \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff --git a/src/gallium/targets/opencl/Makefile.am b/src/gallium/targets/opencl/Makefile.am
index 004d6d786df..3cb29766724 100644
--- a/src/gallium/targets/opencl/Makefile.am
+++ b/src/gallium/targets/opencl/Makefile.am
@@ -15,7 +15,7 @@ lib@OPENCL_LIBNAME@_la_LDFLAGS += \
 endif
 
 lib@OPENCL_LIBNAME@_la_LIBADD = \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la \
 	$(top_builddir)/src/gallium/state_trackers/clover/libclover.la \
 	$(top_builddir)/src/gallium/auxiliary/libgallium.la \
 	$(top_builddir)/src/util/libmesautil.la \
diff --git a/src/gallium/targets/va/Makefile.am b/src/gallium/targets/va/Makefile.am
index 2fd24a8bdd9..17b9ae3e822 100644
--- a/src/gallium/targets/va/Makefile.am
+++ b/src/gallium/targets/va/Makefile.am
@@ -53,7 +53,7 @@ gallium_drv_video_la_LIBADD += $(TARGET_LIB_DEPS) \
 else # HAVE_GALLIUM_STATIC_TARGETS
 
 gallium_drv_video_la_LIBADD += \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff --git a/src/gallium/targets/vdpau/Makefile.am b/src/gallium/targets/vdpau/Makefile.am
index 34b7ef40b20..f9fb56069a9 100644
--- a/src/gallium/targets/vdpau/Makefile.am
+++ b/src/gallium/targets/vdpau/Makefile.am
@@ -65,7 +65,7 @@ libvdpau_gallium_la_LIBADD += $(TARGET_LIB_DEPS) \
 else # HAVE_GALLIUM_STATIC_TARGETS
 
 libvdpau_gallium_la_LIBADD += \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff --git a/src/gallium/targets/xa/Makefile.am b/src/gallium/targets/xa/Makefile.am
index 0fba3b2f3a1..545d17eb4e1 100644
--- a/src/gallium/targets/xa/Makefile.am
+++ b/src/gallium/targets/xa/Makefile.am
@@ -79,7 +79,7 @@ libxatracker_la_LIBADD += $(TARGET_LIB_DEPS)
 else # HAVE_GALLIUM_STATIC_TARGETS
 
 libxatracker_la_LIBADD += \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff --git a/src/gallium/targets/xvmc/Makefile.am b/src/gallium/targets/xvmc/Makefile.am
index f1045d4f745..5fcfc88dd53 100644
--- a/src/gallium/targets/xvmc/Makefile.am
+++ b/src/gallium/targets/xvmc/Makefile.am
@@ -53,7 +53,7 @@ libXvMCgallium_la_LIBADD += $(TARGET_LIB_DEPS) \
 
 else # HAVE_GALLIUM_STATIC_TARGETS
 libXvMCgallium_la_LIBADD += \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff --git a/src/gallium/tests/trivial/Makefile.am b/src/gallium/tests/trivial/Makefile.am
index 175bef2d3d4..585fb699e6c 100644
--- a/src/gallium/tests/trivial/Makefile.am
+++ b/src/gallium/tests/trivial/Makefile.am
@@ -6,7 +6,7 @@ AM_CFLAGS = \
 	$(GALLIUM_CFLAGS)
 
 LDADD = \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la \
 	$(top_builddir)/src/gallium/auxiliary/libgallium.la \
 	$(top_builddir)/src/util/libmesautil.la \
 	$(GALLIUM_COMMON_LIB_DEPS)

From 0f39f9cb7ad8e93cfad95043724143ed097de966 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Tue, 13 Oct 2015 18:07:11 +0100
Subject: [PATCH 249/335] pipe-loader: add a dummy 'static' pipe-loader

It is to be used in contrast of the dynamic one. The state-tracker does
not need to know if the pipe-driver is built into the final blob or
a separate object. This will allow us to move the logic to the final
step (in target) where the appropriate pipe-loader will be chosen.

Cc: Tom Stellard <thomas.stellard@amd.com>
Cc: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/auxiliary/pipe-loader/Makefile.am | 23 ++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am b/src/gallium/auxiliary/pipe-loader/Makefile.am
index 974cf08a10b..6a4a667ab0f 100644
--- a/src/gallium/auxiliary/pipe-loader/Makefile.am
+++ b/src/gallium/auxiliary/pipe-loader/Makefile.am
@@ -5,12 +5,24 @@ include $(top_srcdir)/src/gallium/Automake.inc
 AM_CFLAGS = \
 	-I$(top_srcdir)/src/loader \
 	-I$(top_srcdir)/src/gallium/winsys \
-	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\" \
 	$(GALLIUM_PIPE_LOADER_DEFINES) \
 	$(GALLIUM_CFLAGS) \
 	$(VISIBILITY_CFLAGS)
 
-noinst_LTLIBRARIES = libpipe_loader_dynamic.la
+noinst_LTLIBRARIES = \
+	libpipe_loader_static.la \
+	libpipe_loader_dynamic.la
+
+libpipe_loader_static_la_CFLAGS = \
+	$(AM_CFLAGS) \
+	-DGALLIUM_STATIC_TARGETS=1
+
+libpipe_loader_dynamic_la_CFLAGS = \
+	$(AM_CFLAGS) \
+	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\"
+
+libpipe_loader_static_la_SOURCES = \
+	$(COMMON_SOURCES)
 
 libpipe_loader_dynamic_la_SOURCES = \
 	$(COMMON_SOURCES)
@@ -19,11 +31,16 @@ if HAVE_LIBDRM
 AM_CFLAGS += \
 	$(LIBDRM_CFLAGS)
 
+libpipe_loader_static_la_SOURCES += \
+	$(DRM_SOURCES)
+
 libpipe_loader_dynamic_la_SOURCES += \
 	$(DRM_SOURCES)
 
+libpipe_loader_static_la_LIBADD = \
+	$(top_builddir)/src/loader/libloader.la
+
 libpipe_loader_dynamic_la_LIBADD = \
 	$(top_builddir)/src/loader/libloader.la
 
 endif
-

From 1b589207dee10abbe946dd7c3955ad153c5c5881 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 14:56:21 +0100
Subject: [PATCH 250/335] pipe-loader: wire up the 'static' drm pipe-loader

Add a list of driver descriptors and select one from the list, during
probe time.

As we'll need to have all the driver pipe_foo_screen_create() functions
provided externally (i.e. from another static lib) we need a separate
(non-inline) drm_helper, which contains the function declarations.

v2: rebase on top of virgl support.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 .../auxiliary/pipe-loader/pipe_loader_drm.c   | 127 +++++++++++++++++-
 .../target-helpers/drm_helper_public.h        |  37 +++++
 2 files changed, 161 insertions(+), 3 deletions(-)
 create mode 100644 src/gallium/auxiliary/target-helpers/drm_helper_public.h

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index 33274deeec5..e7804d34e79 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -36,6 +36,7 @@
 #include <unistd.h>
 
 #include "loader.h"
+#include "target-helpers/drm_helper_public.h"
 #include "state_tracker/drm_driver.h"
 #include "pipe_loader_priv.h"
 
@@ -51,7 +52,9 @@
 struct pipe_loader_drm_device {
    struct pipe_loader_device base;
    const struct drm_driver_descriptor *dd;
+#ifndef GALLIUM_STATIC_TARGETS
    struct util_dl_library *lib;
+#endif
    int fd;
 };
 
@@ -59,6 +62,109 @@ struct pipe_loader_drm_device {
 
 static const struct pipe_loader_ops pipe_loader_drm_ops;
 
+#ifdef GALLIUM_STATIC_TARGETS
+static const struct drm_conf_ret throttle_ret = {
+   DRM_CONF_INT,
+   {2},
+};
+
+static const struct drm_conf_ret share_fd_ret = {
+   DRM_CONF_BOOL,
+   {true},
+};
+
+static inline const struct drm_conf_ret *
+configuration_query(enum drm_conf conf)
+{
+   switch (conf) {
+   case DRM_CONF_THROTTLE:
+      return &throttle_ret;
+   case DRM_CONF_SHARE_FD:
+      return &share_fd_ret;
+   default:
+      break;
+   }
+   return NULL;
+}
+
+static const struct drm_driver_descriptor driver_descriptors[] = {
+    {
+        .name = "i915",
+        .driver_name = "i915",
+        .create_screen = pipe_i915_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "i965",
+        .driver_name = "i915",
+        .create_screen = pipe_ilo_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "nouveau",
+        .driver_name = "nouveau",
+        .create_screen = pipe_nouveau_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "r300",
+        .driver_name = "radeon",
+        .create_screen = pipe_r300_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "r600",
+        .driver_name = "radeon",
+        .create_screen = pipe_r600_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "radeonsi",
+        .driver_name = "radeon",
+        .create_screen = pipe_radeonsi_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "vmwgfx",
+        .driver_name = "vmwgfx",
+        .create_screen = pipe_vmwgfx_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "kgsl",
+        .driver_name = "freedreno",
+        .create_screen = pipe_freedreno_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "msm",
+        .driver_name = "freedreno",
+        .create_screen = pipe_freedreno_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "virtio_gpu",
+        .driver_name = "virtio-gpu",
+        .create_screen = pipe_virgl_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "vc4",
+        .driver_name = "vc4",
+        .create_screen = pipe_vc4_create_screen,
+        .configuration = configuration_query,
+    },
+#ifdef USE_VC4_SIMULATOR
+    {
+        .name = "i965",
+        .driver_name = "vc4",
+        .create_screen = pipe_vc4_create_screen,
+        .configuration = configuration_query,
+    },
+#endif
+};
+#endif
+
 bool
 pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd)
 {
@@ -82,23 +188,36 @@ pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd)
    if (!ddev->base.driver_name)
       goto fail;
 
-   ddev->lib = pipe_loader_find_module(dev, PIPE_SEARCH_DIR);
+#ifdef GALLIUM_STATIC_TARGETS
+   for (int i = 0; i < ARRAY_SIZE(driver_descriptors); i++) {
+      if (strcmp(driver_descriptors[i].name, ddev->base.driver_name) == 0) {
+         ddev->dd = &driver_descriptors[i];
+         break;
+      }
+   }
+   if (!ddev->dd)
+      goto fail;
+#else
+   ddev->lib = pipe_loader_find_module(&ddev->base, PIPE_SEARCH_DIR);
    if (!ddev->lib)
-      return fail;
+      goto fail;
 
    ddev->dd = (const struct drm_driver_descriptor *)
       util_dl_get_proc_address(ddev->lib, "driver_descriptor");
 
    /* sanity check on the name */
    if (!ddev->dd || strcmp(ddev->dd->name, ddev->base.driver_name) != 0)
-       goto fail;
+      goto fail;
+#endif
 
    *dev = &ddev->base;
    return true;
 
   fail:
+#ifndef GALLIUM_STATIC_TARGETS
    if (ddev->lib)
       util_dl_close(ddev->lib);
+#endif
    FREE(ddev);
    return false;
 }
@@ -146,8 +265,10 @@ pipe_loader_drm_release(struct pipe_loader_device **dev)
 {
    struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(*dev);
 
+#ifndef GALLIUM_STATIC_TARGETS
    if (ddev->lib)
       util_dl_close(ddev->lib);
+#endif
 
    close(ddev->fd);
    FREE(ddev->base.driver_name);
diff --git a/src/gallium/auxiliary/target-helpers/drm_helper_public.h b/src/gallium/auxiliary/target-helpers/drm_helper_public.h
new file mode 100644
index 00000000000..d1f9382a6f9
--- /dev/null
+++ b/src/gallium/auxiliary/target-helpers/drm_helper_public.h
@@ -0,0 +1,37 @@
+#ifndef _DRM_HELPER_PUBLIC_H
+#define _DRM_HELPER_PUBLIC_H
+
+
+struct pipe_screen;
+
+struct pipe_screen *
+pipe_i915_create_screen(int fd);
+
+struct pipe_screen *
+pipe_ilo_create_screen(int fd);
+
+struct pipe_screen *
+pipe_nouveau_create_screen(int fd);
+
+struct pipe_screen *
+pipe_r300_create_screen(int fd);
+
+struct pipe_screen *
+pipe_r600_create_screen(int fd);
+
+struct pipe_screen *
+pipe_radeonsi_create_screen(int fd);
+
+struct pipe_screen *
+pipe_vmwgfx_create_screen(int fd);
+
+struct pipe_screen *
+pipe_freedreno_create_screen(int fd);
+
+struct pipe_screen *
+pipe_virgl_create_screen(int fd);
+
+struct pipe_screen *
+pipe_vc4_create_screen(int fd);
+
+#endif /* _DRM_HELPER_PUBLIC_H */

From be78f73b37caa5c9bc8dec9390d0c397bbff8729 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 20 Nov 2015 17:19:54 +0000
Subject: [PATCH 251/335] pipe-loader: wire up the 'static' sw pipe-loader

Analogous to previous commit with a small catch.

As the sw inline helpers are mere wrappers, and the screen <> winsys
split is more prominent (with the latter not being part of the final
pipe-driver), things will just work.

v2: rebase on top of earlier 'consolitate teardown' changes

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 .../auxiliary/pipe-loader/pipe_loader_sw.c    | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index 3db53e199e9..5539a730b4c 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -40,7 +40,9 @@
 struct pipe_loader_sw_device {
    struct pipe_loader_device base;
    const struct sw_driver_descriptor *dd;
+#ifndef GALLIUM_STATIC_TARGETS
    struct util_dl_library *lib;
+#endif
    struct sw_winsys *ws;
 };
 
@@ -48,6 +50,41 @@ struct pipe_loader_sw_device {
 
 static const struct pipe_loader_ops pipe_loader_sw_ops;
 
+#ifdef GALLIUM_STATIC_TARGETS
+static const struct sw_driver_descriptor driver_descriptors = {
+   .create_screen = sw_screen_create,
+   .winsys = {
+#ifdef HAVE_PIPE_LOADER_DRI
+      {
+         .name = "dri",
+         .create_winsys = dri_create_sw_winsys,
+      },
+#endif
+#ifdef HAVE_PIPE_LOADER_KMS
+      {
+         .name = "kms_dri",
+         .create_winsys = kms_dri_create_winsys,
+      },
+#endif
+/**
+ * XXX: Do not include these two for non autotools builds.
+ * They don't have neither opencl nor nine, where these are used.
+ */
+#ifndef DROP_PIPE_LOADER_MISC
+      {
+         .name = "null",
+         .create_winsys = null_sw_create,
+      },
+      {
+         .name = "wrapped",
+         .create_winsys = wrapper_sw_winsys_wrap_pipe_screen,
+      },
+#endif
+      { 0 },
+   }
+};
+#endif
+
 static bool
 pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev)
 {
@@ -55,6 +92,11 @@ pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev)
    sdev->base.driver_name = "swrast";
    sdev->base.ops = &pipe_loader_sw_ops;
 
+#ifdef GALLIUM_STATIC_TARGETS
+   sdev->dd = &driver_descriptors;
+   if (!sdev->dd)
+      return false;
+#else
    sdev->lib = pipe_loader_find_module(&sdev->base, PIPE_SEARCH_DIR);
    if (!sdev->lib)
       return false;
@@ -67,6 +109,7 @@ pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev)
       sdev->lib = NULL;
       return false;
    }
+#endif
 
    return true;
 }
@@ -74,8 +117,10 @@ pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev)
 static void
 pipe_loader_sw_probe_teardown_common(struct pipe_loader_sw_device *sdev)
 {
+#ifndef GALLIUM_STATIC_TARGETS
    if (sdev->lib)
       util_dl_close(sdev->lib);
+#endif
 }
 
 #ifdef HAVE_PIPE_LOADER_DRI
@@ -222,8 +267,10 @@ pipe_loader_sw_release(struct pipe_loader_device **dev)
 {
    struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(*dev);
 
+#ifndef GALLIUM_STATIC_TARGETS
    if (sdev->lib)
       util_dl_close(sdev->lib);
+#endif
 
    FREE(sdev);
    *dev = NULL;

From 950e06a29bd664af0cb454fc2f35f8cc4153e7f1 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Sat, 17 Oct 2015 21:14:09 +0100
Subject: [PATCH 252/335] automake: remove no longer needed HAVE_LOADER_GALLIUM
 conditional

As of last few commits we have a static and dynamic pipe-loader. Either
of which will be used with (almost) all targets..

We can look into allowing the user to select which way the targets are
built, be that 'static for all' or 'per target' in follow up commits.
After which we can look into building only the static or dynamic
version, although building both shouldn't cause any issues.

Hack/workaround alert:
Control the standalone pipe-drivers via HAVE_CLOVER. Will need to be
fixed as the targets are converted/configure knobs are in.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 configure.ac                      | 40 +++++++------------------------
 src/gallium/Makefile.am           |  4 +++-
 src/gallium/auxiliary/Makefile.am |  4 ----
 3 files changed, 12 insertions(+), 36 deletions(-)

diff --git a/configure.ac b/configure.ac
index 55c05018e64..91fdfe5f9cb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -997,10 +997,6 @@ if test -n "$with_gallium_drivers" -a "x$enable_glx$enable_xlib_glx" = xyesyes;
     NEED_WINSYS_XLIB="yes"
 fi
 
-if test "x$enable_dri" = xyes; then
-    enable_gallium_loader="$enable_shared_pipe_drivers"
-fi
-
 if test "x$enable_gallium_osmesa" = xyes; then
     if ! echo "$with_gallium_drivers" | grep -q 'swrast'; then
         AC_MSG_ERROR([gallium_osmesa requires the gallium swrast driver])
@@ -1604,7 +1600,6 @@ if test "x$enable_xa" = xyes; then
           enabling XA.
           Example: ./configure --enable-xa --with-gallium-drivers=svga...])
     fi
-    enable_gallium_loader=$enable_shared_pipe_drivers
     gallium_st="$gallium_st xa"
 fi
 AM_CONDITIONAL(HAVE_ST_XA, test "x$enable_xa" = xyes)
@@ -1650,28 +1645,24 @@ AM_CONDITIONAL(NEED_GALLIUM_VL_WINSYS, test "x$need_gallium_vl_winsys" = xyes)
 
 if test "x$enable_xvmc" = xyes; then
     PKG_CHECK_MODULES([XVMC], [xvmc >= $XVMC_REQUIRED])
-    enable_gallium_loader=$enable_shared_pipe_drivers
     gallium_st="$gallium_st xvmc"
 fi
 AM_CONDITIONAL(HAVE_ST_XVMC, test "x$enable_xvmc" = xyes)
 
 if test "x$enable_vdpau" = xyes; then
     PKG_CHECK_MODULES([VDPAU], [vdpau >= $VDPAU_REQUIRED])
-    enable_gallium_loader=$enable_shared_pipe_drivers
     gallium_st="$gallium_st vdpau"
 fi
 AM_CONDITIONAL(HAVE_ST_VDPAU, test "x$enable_vdpau" = xyes)
 
 if test "x$enable_omx" = xyes; then
     PKG_CHECK_MODULES([OMX], [libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED])
-    enable_gallium_loader=$enable_shared_pipe_drivers
     gallium_st="$gallium_st omx"
 fi
 AM_CONDITIONAL(HAVE_ST_OMX, test "x$enable_omx" = xyes)
 
 if test "x$enable_va" = xyes; then
     PKG_CHECK_MODULES([VA], [libva >= $LIBVA_REQUIRED])
-    enable_gallium_loader=$enable_shared_pipe_drivers
     gallium_st="$gallium_st va"
 fi
 AM_CONDITIONAL(HAVE_ST_VA, test "x$enable_va" = xyes)
@@ -1694,7 +1685,6 @@ if test "x$enable_nine" = xyes; then
         AC_MSG_WARN([using nine together with wine requires DRI3 enabled system])
     fi
 
-    enable_gallium_loader=$enable_shared_pipe_drivers
     gallium_st="$gallium_st nine"
 fi
 AM_CONDITIONAL(HAVE_ST_NINE, test "x$enable_nine" = xyes)
@@ -1733,8 +1723,6 @@ if test "x$enable_opencl" = xyes; then
         AC_SUBST([LIBCLC_LIBEXECDIR])
     fi
 
-    # XXX: Use $enable_shared_pipe_drivers once converted to use static/shared pipe-drivers
-    enable_gallium_loader=yes
     gallium_st="$gallium_st clover"
 
     if test "x$enable_opencl_icd" = xyes; then
@@ -2015,10 +2003,6 @@ AC_SUBST([XVMC_LIB_INSTALL_DIR])
 dnl
 dnl Gallium Tests
 dnl
-if test "x$enable_gallium_tests" = xyes; then
-    # XXX: Use $enable_shared_pipe_drivers once converted to use static/shared pipe-drivers
-    enable_gallium_loader=yes
-fi
 AM_CONDITIONAL(HAVE_GALLIUM_TESTS, test "x$enable_gallium_tests" = xyes)
 
 dnl Directory for VDPAU libs
@@ -2073,10 +2057,8 @@ gallium_require_llvm() {
 }
 
 gallium_require_drm_loader() {
-    if test "x$enable_gallium_loader" = xyes; then
-        if test "x$need_pci_id$have_pci_id" = xyesno; then
-            AC_MSG_ERROR([Gallium drm loader requires libudev >= $LIBUDEV_REQUIRED or sysfs])
-        fi
+    if test "x$need_pci_id$have_pci_id" = xyesno; then
+        AC_MSG_ERROR([Gallium drm loader requires libudev >= $LIBUDEV_REQUIRED or sysfs])
     fi
 }
 
@@ -2274,18 +2256,15 @@ AM_CONDITIONAL(HAVE_GALLIUM_VIRGL, test "x$HAVE_GALLIUM_VIRGL" = xyes)
 
 AM_CONDITIONAL(HAVE_GALLIUM_STATIC_TARGETS, test "x$enable_shared_pipe_drivers" = xno)
 
-if test "x$enable_gallium_loader" = xyes; then
-    if test "x$enable_dri" = xyes; then
-        GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRI"
-    fi
-
-    if test "x$have_drisw_kms" = xyes; then
-        GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_KMS"
-    fi
-
-    AC_SUBST([GALLIUM_PIPE_LOADER_DEFINES])
+if test "x$enable_dri" = xyes; then
+    GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRI"
 fi
 
+if test "x$have_drisw_kms" = xyes; then
+    GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_KMS"
+fi
+AC_SUBST([GALLIUM_PIPE_LOADER_DEFINES])
+
 AM_CONDITIONAL(HAVE_I915_DRI, test x$HAVE_I915_DRI = xyes)
 AM_CONDITIONAL(HAVE_I965_DRI, test x$HAVE_I965_DRI = xyes)
 AM_CONDITIONAL(HAVE_NOUVEAU_DRI, test x$HAVE_NOUVEAU_DRI = xyes)
@@ -2299,7 +2278,6 @@ AM_CONDITIONAL(NEED_RADEON_DRM_WINSYS, test "x$HAVE_GALLIUM_R300" = xyes -o \
 AM_CONDITIONAL(NEED_WINSYS_XLIB, test "x$NEED_WINSYS_XLIB" = xyes)
 AM_CONDITIONAL(NEED_RADEON_LLVM, test x$NEED_RADEON_LLVM = xyes)
 AM_CONDITIONAL(USE_R600_LLVM_COMPILER, test x$USE_R600_LLVM_COMPILER = xyes)
-AM_CONDITIONAL(HAVE_LOADER_GALLIUM, test x$enable_gallium_loader = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_COMPUTE, test x$enable_opencl = xyes)
 AM_CONDITIONAL(HAVE_MESA_LLVM, test x$MESA_LLVM = x1)
 AM_CONDITIONAL(USE_VC4_SIMULATOR, test x$USE_VC4_SIMULATOR = xyes)
diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am
index 2c7508385ca..e42a8f17703 100644
--- a/src/gallium/Makefile.am
+++ b/src/gallium/Makefile.am
@@ -5,6 +5,7 @@ SUBDIRS =
 ##
 
 SUBDIRS += auxiliary
+SUBDIRS += auxiliary/pipe-loader
 
 ##
 ## Gallium pipe drivers and their respective winsys'
@@ -120,7 +121,8 @@ EXTRA_DIST = \
 ## Gallium state trackers and their users (targets)
 ##
 
-if HAVE_LOADER_GALLIUM
+## XXX: Rename the conditional once we have a config switch for static/dynamic pipe-drivers
+if HAVE_CLOVER
 SUBDIRS += targets/pipe-loader
 endif
 
diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am
index 0908608d2db..7ff8972f8c0 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -1,7 +1,3 @@
-if HAVE_LOADER_GALLIUM
-SUBDIRS := pipe-loader
-endif
-
 include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 

From af031deed6ab4d6236b896507e8afcf9d6fd3173 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 7 Oct 2015 17:37:37 +0100
Subject: [PATCH 253/335] target-helpers: move the DRI specifics to the target

Rather than having all targets include the file, with only some defining
the relevant guard macro, just move things where they are used.

v2: rebase on top of virgl support.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 .../target-helpers/inline_drm_helper.h        | 136 ---------------
 .../target-helpers/inline_sw_helper.h         |  10 --
 src/gallium/state_trackers/dri/drisw.c        |   3 +
 src/gallium/targets/dri/Android.mk            |   2 +-
 src/gallium/targets/dri/Makefile.am           |   1 -
 src/gallium/targets/dri/SConscript            |   1 -
 src/gallium/targets/dri/target.c              | 162 ++++++++++++++++++
 7 files changed, 166 insertions(+), 149 deletions(-)

diff --git a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
index 6ca4dc8136c..55f636339b3 100644
--- a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
@@ -72,14 +72,6 @@ static char* driver_name = NULL;
 #if defined(DRI_TARGET)
 #if defined(HAVE_LIBDRM)
 
-const __DRIextension **__driDriverGetExtensions_kms_swrast(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_kms_swrast(void)
-{
-   globalDriverAPI = &dri_kms_driver_api;
-   return galliumdrm_driver_extensions;
-}
-
 struct pipe_screen *
 kms_swrast_create_screen(int fd)
 {
@@ -98,16 +90,6 @@ kms_swrast_create_screen(int fd)
 #endif
 
 #if defined(GALLIUM_I915)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_i915(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_i915(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
 
 static struct pipe_screen *
 pipe_i915_create_screen(int fd)
@@ -125,16 +107,6 @@ pipe_i915_create_screen(int fd)
 #endif
 
 #if defined(GALLIUM_ILO)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_i965(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
 
 static struct pipe_screen *
 pipe_ilo_create_screen(int fd)
@@ -152,16 +124,6 @@ pipe_ilo_create_screen(int fd)
 #endif
 
 #if defined(GALLIUM_NOUVEAU)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_nouveau(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_nouveau(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
 
 static struct pipe_screen *
 pipe_nouveau_create_screen(int fd)
@@ -174,16 +136,6 @@ pipe_nouveau_create_screen(int fd)
 #endif
 
 #if defined(GALLIUM_R300)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_r300(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_r300(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
 
 static struct pipe_screen *
 pipe_r300_create_screen(int fd)
@@ -196,16 +148,6 @@ pipe_r300_create_screen(int fd)
 #endif
 
 #if defined(GALLIUM_R600)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_r600(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_r600(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
 
 static struct pipe_screen *
 pipe_r600_create_screen(int fd)
@@ -218,16 +160,6 @@ pipe_r600_create_screen(int fd)
 #endif
 
 #if defined(GALLIUM_RADEONSI)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_radeonsi(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_radeonsi(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
 
 static struct pipe_screen *
 pipe_radeonsi_create_screen(int fd)
@@ -245,16 +177,6 @@ pipe_radeonsi_create_screen(int fd)
 #endif
 
 #if defined(GALLIUM_VMWGFX)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_vmwgfx(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_vmwgfx(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
 
 static struct pipe_screen *
 pipe_vmwgfx_create_screen(int fd)
@@ -272,24 +194,6 @@ pipe_vmwgfx_create_screen(int fd)
 #endif
 
 #if defined(GALLIUM_FREEDRENO)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_msm(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_msm(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-
-const __DRIextension **__driDriverGetExtensions_kgsl(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_kgsl(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
 
 static struct pipe_screen *
 pipe_freedreno_create_screen(int fd)
@@ -302,16 +206,6 @@ pipe_freedreno_create_screen(int fd)
 #endif
 
 #if defined(GALLIUM_VIRGL)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_virtio_gpu(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_virtio_gpu(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
 
 static struct pipe_screen *
 pipe_virgl_create_screen(int fd)
@@ -329,36 +223,6 @@ pipe_virgl_create_screen(int fd)
 #endif
 
 #if defined(GALLIUM_VC4)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_vc4(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_vc4(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-
-#if defined(USE_VC4_SIMULATOR)
-const __DRIextension **__driDriverGetExtensions_i965(void);
-
-/**
- * When building using the simulator (on x86), we advertise ourselves as the
- * i965 driver so that you can just make a directory with a link from
- * i965_dri.so to the built vc4_dri.so, and point LIBGL_DRIVERS_PATH to that
- * on your i965-using host to run the driver under simulation.
- *
- * This is, of course, incompatible with building with the ilo driver, but you
- * shouldn't be building that anyway.
- */
-PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
-
-#endif
 
 static struct pipe_screen *
 pipe_vc4_create_screen(int fd)
diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
index 7e10c28f542..16937bc6a53 100644
--- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
@@ -73,15 +73,6 @@ sw_screen_create(struct sw_winsys *winsys)
 #if defined(DRI_TARGET)
 #include "target-helpers/inline_debug_helper.h"
 #include "sw/dri/dri_sw_winsys.h"
-#include "dri_screen.h"
-
-const __DRIextension **__driDriverGetExtensions_swrast(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_swrast(void)
-{
-   globalDriverAPI = &galliumsw_driver_api;
-   return galliumsw_driver_extensions;
-}
 
 inline struct pipe_screen *
 drisw_create_screen(struct drisw_loader_funcs *lf)
@@ -103,7 +94,6 @@ drisw_create_screen(struct drisw_loader_funcs *lf)
    return screen;
 }
 #endif // DRI_TARGET
-
 #endif // GALLIUM_SOFTPIPE
 
 
diff --git a/src/gallium/state_trackers/dri/drisw.c b/src/gallium/state_trackers/dri/drisw.c
index 753c59d696a..1b24f4896ea 100644
--- a/src/gallium/state_trackers/dri/drisw.c
+++ b/src/gallium/state_trackers/dri/drisw.c
@@ -34,6 +34,9 @@
  * for createImage/destroyImage similar to DRI2 getBuffers.
  */
 
+/* XXX: Temporary hack, until we get rid of drisw_create_screen() */
+#define DRI_TARGET
+
 #include "util/u_format.h"
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk
index a33d7f83671..89b420fa7fe 100644
--- a/src/gallium/targets/dri/Android.mk
+++ b/src/gallium/targets/dri/Android.mk
@@ -35,7 +35,7 @@ endif
 
 LOCAL_SRC_FILES := target.c
 
-LOCAL_CFLAGS := -DDRI_TARGET
+LOCAL_CFLAGS :=
 
 LOCAL_SHARED_LIBRARIES := \
 	libdl \
diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am
index e3a145f4d30..b05441f8a9f 100644
--- a/src/gallium/targets/dri/Makefile.am
+++ b/src/gallium/targets/dri/Makefile.am
@@ -10,7 +10,6 @@ AM_CFLAGS = \
 
 AM_CPPFLAGS = \
 	$(DEFINES) \
-	-DDRI_TARGET \
         -DGALLIUM_DDEBUG \
 	-DGALLIUM_NOOP \
 	-DGALLIUM_RBUG \
diff --git a/src/gallium/targets/dri/SConscript b/src/gallium/targets/dri/SConscript
index 2fb0da09200..8d28924cb04 100644
--- a/src/gallium/targets/dri/SConscript
+++ b/src/gallium/targets/dri/SConscript
@@ -30,7 +30,6 @@ env.PkgUseModules('DRM')
 env.Append(CPPDEFINES = [
     'GALLIUM_VMWGFX',
     'GALLIUM_SOFTPIPE',
-    'DRI_TARGET',
 ])
 
 env.Prepend(LIBS = [
diff --git a/src/gallium/targets/dri/target.c b/src/gallium/targets/dri/target.c
index 32a11ef6281..16a958bdf67 100644
--- a/src/gallium/targets/dri/target.c
+++ b/src/gallium/targets/dri/target.c
@@ -1,2 +1,164 @@
 #include "target-helpers/inline_drm_helper.h"
 #include "target-helpers/inline_sw_helper.h"
+
+#include "dri_screen.h"
+
+#if defined(GALLIUM_SOFTPIPE)
+
+const __DRIextension **__driDriverGetExtensions_swrast(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_swrast(void)
+{
+   globalDriverAPI = &galliumsw_driver_api;
+   return galliumsw_driver_extensions;
+}
+
+#if defined(HAVE_LIBDRM)
+
+const __DRIextension **__driDriverGetExtensions_kms_swrast(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_kms_swrast(void)
+{
+   globalDriverAPI = &dri_kms_driver_api;
+   return galliumdrm_driver_extensions;
+}
+
+#endif
+#endif
+
+#if defined(GALLIUM_I915)
+
+const __DRIextension **__driDriverGetExtensions_i915(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_i915(void)
+{
+   globalDriverAPI = &galliumdrm_driver_api;
+   return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_ILO)
+
+const __DRIextension **__driDriverGetExtensions_i965(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void)
+{
+   globalDriverAPI = &galliumdrm_driver_api;
+   return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_NOUVEAU)
+
+const __DRIextension **__driDriverGetExtensions_nouveau(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_nouveau(void)
+{
+   globalDriverAPI = &galliumdrm_driver_api;
+   return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_R300)
+
+const __DRIextension **__driDriverGetExtensions_r300(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_r300(void)
+{
+   globalDriverAPI = &galliumdrm_driver_api;
+   return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_R600)
+
+const __DRIextension **__driDriverGetExtensions_r600(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_r600(void)
+{
+   globalDriverAPI = &galliumdrm_driver_api;
+   return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_RADEONSI)
+
+const __DRIextension **__driDriverGetExtensions_radeonsi(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_radeonsi(void)
+{
+   globalDriverAPI = &galliumdrm_driver_api;
+   return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_VMWGFX)
+
+const __DRIextension **__driDriverGetExtensions_vmwgfx(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_vmwgfx(void)
+{
+   globalDriverAPI = &galliumdrm_driver_api;
+   return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_FREEDRENO)
+
+const __DRIextension **__driDriverGetExtensions_msm(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_msm(void)
+{
+   globalDriverAPI = &galliumdrm_driver_api;
+   return galliumdrm_driver_extensions;
+}
+
+const __DRIextension **__driDriverGetExtensions_kgsl(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_kgsl(void)
+{
+   globalDriverAPI = &galliumdrm_driver_api;
+   return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_VIRGL)
+
+const __DRIextension **__driDriverGetExtensions_virtio_gpu(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_virtio_gpu(void)
+{
+   globalDriverAPI = &galliumdrm_driver_api;
+   return galliumdrm_driver_extensions;
+}
+#endif
+
+#if defined(GALLIUM_VC4)
+
+const __DRIextension **__driDriverGetExtensions_vc4(void);
+
+PUBLIC const __DRIextension **__driDriverGetExtensions_vc4(void)
+{
+   globalDriverAPI = &galliumdrm_driver_api;
+   return galliumdrm_driver_extensions;
+}
+
+#if defined(USE_VC4_SIMULATOR)
+const __DRIextension **__driDriverGetExtensions_i965(void);
+
+/**
+ * When building using the simulator (on x86), we advertise ourselves as the
+ * i965 driver so that you can just make a directory with a link from
+ * i965_dri.so to the built vc4_dri.so, and point LIBGL_DRIVERS_PATH to that
+ * on your i965-using host to run the driver under simulation.
+ *
+ * This is, of course, incompatible with building with the ilo driver, but you
+ * shouldn't be building that anyway.
+ */
+PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void)
+{
+   globalDriverAPI = &galliumdrm_driver_api;
+   return galliumdrm_driver_extensions;
+}
+#endif
+#endif

From 17d3a5f8579cf103ffd7002052c700ffe9819152 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Sun, 11 Oct 2015 13:50:56 +0100
Subject: [PATCH 254/335] target-helpers: add a non-inline drm_helper.h

Unlike the inline ones, here we'd want to have an extern definition of
the functions. This is required as with follow-up commits, we'll
gradually start using the static pipe-loader, with the latter needing
the symbols.

These are direct copy from the inline version.

v2:
 - rebase on top of virgl support
 - add "driver missing" printfs (Nicolai)

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 .../auxiliary/target-helpers/drm_helper.h     | 275 ++++++++++++++++++
 1 file changed, 275 insertions(+)
 create mode 100644 src/gallium/auxiliary/target-helpers/drm_helper.h

diff --git a/src/gallium/auxiliary/target-helpers/drm_helper.h b/src/gallium/auxiliary/target-helpers/drm_helper.h
new file mode 100644
index 00000000000..73a80b6c1dc
--- /dev/null
+++ b/src/gallium/auxiliary/target-helpers/drm_helper.h
@@ -0,0 +1,275 @@
+#ifndef DRM_HELPER_H
+#define DRM_HELPER_H
+
+#include <stdio.h>
+#include "target-helpers/inline_debug_helper.h"
+#include "target-helpers/drm_helper_public.h"
+
+#ifdef GALLIUM_I915
+#include "i915/drm/i915_drm_public.h"
+#include "i915/i915_public.h"
+
+struct pipe_screen *
+pipe_i915_create_screen(int fd)
+{
+   struct i915_winsys *iws;
+   struct pipe_screen *screen;
+
+   iws = i915_drm_winsys_create(fd);
+   if (!iws)
+      return NULL;
+
+   screen = i915_screen_create(iws);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_i915_create_screen(int fd)
+{
+   fprintf(stderr, "i915g: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_ILO
+#include "intel/drm/intel_drm_public.h"
+#include "ilo/ilo_public.h"
+
+struct pipe_screen *
+pipe_ilo_create_screen(int fd)
+{
+   struct intel_winsys *iws;
+   struct pipe_screen *screen;
+
+   iws = intel_winsys_create_for_fd(fd);
+   if (!iws)
+      return NULL;
+
+   screen = ilo_screen_create(iws);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_ilo_create_screen(int fd)
+{
+   fprintf(stderr, "ilo: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_NOUVEAU
+#include "nouveau/drm/nouveau_drm_public.h"
+
+struct pipe_screen *
+pipe_nouveau_create_screen(int fd)
+{
+   struct pipe_screen *screen;
+
+   screen = nouveau_drm_screen_create(fd);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_nouveau_create_screen(int fd)
+{
+   fprintf(stderr, "nouveau: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_R300
+#include "radeon/radeon_winsys.h"
+#include "radeon/drm/radeon_drm_public.h"
+#include "r300/r300_public.h"
+
+struct pipe_screen *
+pipe_r300_create_screen(int fd)
+{
+   struct radeon_winsys *rw;
+
+   rw = radeon_drm_winsys_create(fd, r300_screen_create);
+   return rw ? debug_screen_wrap(rw->screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_r300_create_screen(int fd)
+{
+   fprintf(stderr, "r300: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_R600
+#include "radeon/radeon_winsys.h"
+#include "radeon/drm/radeon_drm_public.h"
+#include "r600/r600_public.h"
+
+struct pipe_screen *
+pipe_r600_create_screen(int fd)
+{
+   struct radeon_winsys *rw;
+
+   rw = radeon_drm_winsys_create(fd, r600_screen_create);
+   return rw ? debug_screen_wrap(rw->screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_r600_create_screen(int fd)
+{
+   fprintf(stderr, "r600: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_RADEONSI
+#include "radeon/radeon_winsys.h"
+#include "radeon/drm/radeon_drm_public.h"
+#include "amdgpu/drm/amdgpu_public.h"
+#include "radeonsi/si_public.h"
+
+struct pipe_screen *
+pipe_radeonsi_create_screen(int fd)
+{
+   struct radeon_winsys *rw;
+
+   /* First, try amdgpu. */
+   rw = amdgpu_winsys_create(fd, radeonsi_screen_create);
+
+   if (!rw)
+      rw = radeon_drm_winsys_create(fd, radeonsi_screen_create);
+
+   return rw ? debug_screen_wrap(rw->screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_radeonsi_create_screen(int fd)
+{
+   fprintf(stderr, "radeonsi: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_VMWGFX
+#include "svga/drm/svga_drm_public.h"
+#include "svga/svga_public.h"
+
+struct pipe_screen *
+pipe_vmwgfx_create_screen(int fd)
+{
+   struct svga_winsys_screen *sws;
+   struct pipe_screen *screen;
+
+   sws = svga_drm_winsys_screen_create(fd);
+   if (!sws)
+      return NULL;
+
+   screen = svga_screen_create(sws);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_vmwgfx_create_screen(int fd)
+{
+   fprintf(stderr, "svga: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_FREEDRENO
+#include "freedreno/drm/freedreno_drm_public.h"
+
+struct pipe_screen *
+pipe_freedreno_create_screen(int fd)
+{
+   struct pipe_screen *screen;
+
+   screen = fd_drm_screen_create(fd);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_freedreno_create_screen(int fd)
+{
+   fprintf(stderr, "freedreno: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_VIRGL
+#include "virgl/drm/virgl_drm_public.h"
+#include "virgl/virgl_public.h"
+
+static struct pipe_screen *
+pipe_virgl_create_screen(int fd)
+{
+   struct virgl_winsys *vws;
+   struct pipe_screen *screen;
+
+   vws = virgl_drm_winsys_create(fd);
+   if (!vws)
+      return NULL;
+
+   screen = virgl_create_screen(vws);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_virgl_create_screen(int fd)
+{
+   fprintf(stderr, "virgl: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_VC4
+#include "vc4/drm/vc4_drm_public.h"
+
+struct pipe_screen *
+pipe_vc4_create_screen(int fd)
+{
+   struct pipe_screen *screen;
+
+   screen = vc4_drm_screen_create(fd);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_vc4_create_screen(int fd)
+{
+   fprintf(stderr, "vc4: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+
+#endif /* DRM_HELPER_H */

From 7999e6ddba38016964cbc33b98b4658e900bcd3c Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 18:11:11 +0100
Subject: [PATCH 255/335] pipe-loader: don't mix code and variable declarations

We cannot use this C99 feature here quite yet, as the code needs to be
build with MSVC prior to 2013.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index e7804d34e79..b5dfc56f49d 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -238,8 +238,9 @@ pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev)
 
    for (i = DRM_RENDER_NODE_MIN_MINOR, j = 0;
         i <= DRM_RENDER_NODE_MAX_MINOR; i++) {
-      fd = open_drm_render_node_minor(i);
       struct pipe_loader_device *dev;
+
+      fd = open_drm_render_node_minor(i);
       if (fd < 0)
          continue;
 

From 234b03cc2378fbb4375fc2635fe3eeab85840d38 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 23:32:58 +0100
Subject: [PATCH 256/335] pipe-loader: add preliminary scons support

Add a 'static' pipe-loader build, which will be used with follow-up
commits.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/SConscript                        |  1 +
 src/gallium/auxiliary/pipe-loader/Makefile.am |  2 ++
 src/gallium/auxiliary/pipe-loader/SConscript  | 34 +++++++++++++++++++
 3 files changed, 37 insertions(+)
 create mode 100644 src/gallium/auxiliary/pipe-loader/SConscript

diff --git a/src/gallium/SConscript b/src/gallium/SConscript
index fa5fa6e8734..0c3a3742c16 100644
--- a/src/gallium/SConscript
+++ b/src/gallium/SConscript
@@ -5,6 +5,7 @@ Import('env')
 #
 
 SConscript('auxiliary/SConscript')
+SConscript('auxiliary/pipe-loader/SConscript')
 
 #
 # Drivers
diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am b/src/gallium/auxiliary/pipe-loader/Makefile.am
index 6a4a667ab0f..f661897fdf7 100644
--- a/src/gallium/auxiliary/pipe-loader/Makefile.am
+++ b/src/gallium/auxiliary/pipe-loader/Makefile.am
@@ -44,3 +44,5 @@ libpipe_loader_dynamic_la_LIBADD = \
 	$(top_builddir)/src/loader/libloader.la
 
 endif
+
+EXTRA_DIST = SConscript
diff --git a/src/gallium/auxiliary/pipe-loader/SConscript b/src/gallium/auxiliary/pipe-loader/SConscript
new file mode 100644
index 00000000000..393b6021bee
--- /dev/null
+++ b/src/gallium/auxiliary/pipe-loader/SConscript
@@ -0,0 +1,34 @@
+Import('*')
+
+env = env.Clone()
+
+env.MSVC2008Compat()
+
+env.Append(CPPPATH = [
+    '#/src/loader',
+    '#/src/gallium/winsys',
+])
+
+env.Append(CPPDEFINES = [
+    ('HAVE_PIPE_LOADER_DRI', '1'),
+    ('DROP_PIPE_LOADER_MISC', '1'),
+    ('GALLIUM_STATIC_TARGETS', '1'),
+])
+
+source = env.ParseSourceList('Makefile.sources', 'COMMON_SOURCES')
+
+#if HAVE_LIBDRM
+source += env.ParseSourceList('Makefile.sources', 'DRM_SOURCES')
+
+env.PkgUseModules('DRM')
+env.Append(LIBS = [libloader])
+#endif
+
+pipe_loader = env.ConvenienceLibrary(
+    target = 'pipe_loader',
+    source = source,
+)
+
+env.Alias('pipe_loader', pipe_loader)
+
+Export('pipe_loader')

From c4d337146ad1b880834214510f773f10d9ea6556 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 23:44:17 +0100
Subject: [PATCH 257/335] pipe-loader: add preliminary Android support

Add a 'static' pipe-loader build, which will be used with follow-up
commits.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Chih-Wei Huang <cwhuang@linux.org.tw>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/Android.mk                       |  1 +
 src/gallium/auxiliary/pipe-loader/Android.mk | 49 ++++++++++++++++++++
 2 files changed, 50 insertions(+)
 create mode 100644 src/gallium/auxiliary/pipe-loader/Android.mk

diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk
index 39e064e9538..b406d4a5480 100644
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -27,6 +27,7 @@ GALLIUM_TOP := $(call my-dir)
 GALLIUM_COMMON_MK := $(GALLIUM_TOP)/Android.common.mk
 
 SUBDIRS := auxiliary
+SUBDIRS += auxiliary/pipe-loader
 
 #
 # Gallium drivers and their respective winsys
diff --git a/src/gallium/auxiliary/pipe-loader/Android.mk b/src/gallium/auxiliary/pipe-loader/Android.mk
new file mode 100644
index 00000000000..27893137a1a
--- /dev/null
+++ b/src/gallium/auxiliary/pipe-loader/Android.mk
@@ -0,0 +1,49 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2015 Emil Velikov <emil.l.velikov@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# NOTE: Currently we build only a 'static' pipe-loader
+LOCAL_PATH := $(call my-dir)
+
+# get COMMON_SOURCES and DRM_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_CFLAGS := \
+	-DHAVE_PIPE_LOADER_DRI \
+	-DDROP_PIPE_LOADER_MISC \
+	-DGALLIUM_STATIC_TARGETS
+
+LOCAL_SRC_FILES := $(COMMON_SOURCES)
+
+LOCAL_MODULE := libmesa_pipe_loader
+
+ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
+LOCAL_CFLAGS += -DHAVE_LIBDRM
+LOCAL_SRC_FILES += $(DRM_SOURCES)
+
+LOCAL_SHARED_LIBRARIES := libdrm
+LOCAL_STATIC_LIBRARIES := libmesa_loader
+endif
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)

From 23fb11455b415238fb9e378fa0ab51f4b6cefd7a Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 23:48:15 +0100
Subject: [PATCH 258/335] {st,targets}/dri: use static/dynamic pipe-loader

Covert DRI to use only the pipe-loader interface.

With drisw_create_screen and kms_swrast_create_screen replaced by their
pipe-loader equivalent, we can now drop them.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 .../target-helpers/inline_drm_helper.h        | 24 ----------------
 .../target-helpers/inline_sw_helper.h         | 28 -------------------
 src/gallium/drivers/softpipe/Automake.inc     |  3 --
 src/gallium/state_trackers/dri/Android.mk     |  3 --
 src/gallium/state_trackers/dri/Makefile.am    |  5 ----
 src/gallium/state_trackers/dri/SConscript     |  4 ---
 src/gallium/state_trackers/dri/dri2.c         | 20 +++----------
 src/gallium/state_trackers/dri/dri_screen.c   |  2 --
 src/gallium/state_trackers/dri/drisw.c        | 12 ++++----
 src/gallium/targets/dri/Android.mk            |  1 +
 src/gallium/targets/dri/Makefile.am           |  7 +++--
 src/gallium/targets/dri/SConscript            |  1 +
 src/gallium/targets/dri/target.c              |  3 +-
 13 files changed, 19 insertions(+), 94 deletions(-)

diff --git a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
index 55f636339b3..c57fbac7f35 100644
--- a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
@@ -4,9 +4,6 @@
 #include "state_tracker/drm_driver.h"
 #include "target-helpers/inline_debug_helper.h"
 #include "loader.h"
-#if defined(DRI_TARGET)
-#include "dri_screen.h"
-#endif
 
 #if GALLIUM_SOFTPIPE
 #include "target-helpers/inline_sw_helper.h"
@@ -68,27 +65,6 @@ static char* driver_name = NULL;
 
 /* XXX: We need to teardown the winsys if *screen_create() fails. */
 
-#if defined(GALLIUM_SOFTPIPE)
-#if defined(DRI_TARGET)
-#if defined(HAVE_LIBDRM)
-
-struct pipe_screen *
-kms_swrast_create_screen(int fd)
-{
-   struct sw_winsys *sws;
-   struct pipe_screen *screen;
-
-   sws = kms_dri_create_winsys(fd);
-   if (!sws)
-      return NULL;
-
-   screen = sw_screen_create(sws);
-   return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-#endif
-#endif
-
 #if defined(GALLIUM_I915)
 
 static struct pipe_screen *
diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
index 16937bc6a53..a9ab16f2b54 100644
--- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
@@ -69,32 +69,4 @@ sw_screen_create(struct sw_winsys *winsys)
    return sw_screen_create_named(winsys, driver);
 }
 
-#if defined(GALLIUM_SOFTPIPE)
-#if defined(DRI_TARGET)
-#include "target-helpers/inline_debug_helper.h"
-#include "sw/dri/dri_sw_winsys.h"
-
-inline struct pipe_screen *
-drisw_create_screen(struct drisw_loader_funcs *lf)
-{
-   struct sw_winsys *winsys = NULL;
-   struct pipe_screen *screen = NULL;
-
-   winsys = dri_create_sw_winsys(lf);
-   if (winsys == NULL)
-      return NULL;
-
-   screen = sw_screen_create(winsys);
-   if (screen == NULL) {
-      winsys->destroy(winsys);
-      return NULL;
-   }
-
-   screen = debug_screen_wrap(screen);
-   return screen;
-}
-#endif // DRI_TARGET
-#endif // GALLIUM_SOFTPIPE
-
-
 #endif
diff --git a/src/gallium/drivers/softpipe/Automake.inc b/src/gallium/drivers/softpipe/Automake.inc
index 5cedcef9772..bd3c2eead16 100644
--- a/src/gallium/drivers/softpipe/Automake.inc
+++ b/src/gallium/drivers/softpipe/Automake.inc
@@ -3,13 +3,10 @@ if HAVE_GALLIUM_SOFTPIPE
 TARGET_DRIVERS += swrast
 TARGET_CPPFLAGS += -DGALLIUM_SOFTPIPE
 TARGET_LIB_DEPS += \
-	$(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la \
 	$(top_builddir)/src/gallium/drivers/softpipe/libsoftpipe.la
 
 if HAVE_DRISW_KMS
 TARGET_DRIVERS += kms_swrast
-TARGET_LIB_DEPS += \
-	$(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la
 
 endif
 endif
diff --git a/src/gallium/state_trackers/dri/Android.mk b/src/gallium/state_trackers/dri/Android.mk
index 43f0de9b464..f0eb18dcacf 100644
--- a/src/gallium/state_trackers/dri/Android.mk
+++ b/src/gallium/state_trackers/dri/Android.mk
@@ -29,9 +29,6 @@ include $(CLEAR_VARS)
 
 LOCAL_SRC_FILES := $(common_SOURCES)
 
-LOCAL_CFLAGS := \
-	-DGALLIUM_STATIC_TARGETS=1 \
-
 LOCAL_C_INCLUDES := \
 	$(MESA_TOP)/src/mapi \
 	$(MESA_TOP)/src/mesa \
diff --git a/src/gallium/state_trackers/dri/Makefile.am b/src/gallium/state_trackers/dri/Makefile.am
index 102b84390bb..74bccaa6416 100644
--- a/src/gallium/state_trackers/dri/Makefile.am
+++ b/src/gallium/state_trackers/dri/Makefile.am
@@ -34,15 +34,10 @@ AM_CPPFLAGS = \
 	$(LIBDRM_CFLAGS) \
 	$(VISIBILITY_CFLAGS)
 
-if HAVE_GALLIUM_STATIC_TARGETS
-AM_CPPFLAGS += \
-	-DGALLIUM_STATIC_TARGETS=1
-
 if HAVE_GALLIUM_SOFTPIPE
 AM_CPPFLAGS += \
 	-DGALLIUM_SOFTPIPE
 endif # HAVE_GALLIUM_SOFTPIPE
-endif # HAVE_GALLIUM_STATIC_TARGETS
 
 noinst_LTLIBRARIES = libdri.la
 libdri_la_SOURCES = $(common_SOURCES)
diff --git a/src/gallium/state_trackers/dri/SConscript b/src/gallium/state_trackers/dri/SConscript
index 657300baf13..fa48fb8a0d7 100644
--- a/src/gallium/state_trackers/dri/SConscript
+++ b/src/gallium/state_trackers/dri/SConscript
@@ -15,10 +15,6 @@ env.Append(CPPPATH = [
     xmlpool_options.dir.dir, # Dir to generated xmlpool/options.h
 ])
 
-env.Append(CPPDEFINES = [
-    ('GALLIUM_STATIC_TARGETS', '1'),
-])
-
 sources = env.ParseSourceList('Makefile.sources', 'common_SOURCES')
 
 # XXX: if HAVE_DRISW
diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c
index a11f3b8d21c..62173750381 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -1457,19 +1457,12 @@ dri2_init_screen(__DRIscreen * sPriv)
 
    sPriv->driverPrivate = (void *)screen;
 
-#if GALLIUM_STATIC_TARGETS
-   pscreen = dd_create_screen(screen->fd);
-
-   throttle_ret = dd_configuration(DRM_CONF_THROTTLE);
-   dmabuf_ret = dd_configuration(DRM_CONF_SHARE_FD);
-#else
    if (pipe_loader_drm_probe_fd(&screen->dev, screen->fd)) {
       pscreen = pipe_loader_create_screen(screen->dev);
 
       throttle_ret = pipe_loader_configuration(screen->dev, DRM_CONF_THROTTLE);
       dmabuf_ret = pipe_loader_configuration(screen->dev, DRM_CONF_SHARE_FD);
    }
-#endif // GALLIUM_STATIC_TARGETS
 
    if (throttle_ret && throttle_ret->val.val_int != -1) {
       screen->throttling_enabled = TRUE;
@@ -1495,11 +1488,7 @@ dri2_init_screen(__DRIscreen * sPriv)
 
    /* dri_init_screen_helper checks pscreen for us */
 
-#if GALLIUM_STATIC_TARGETS
-   configs = dri_init_screen_helper(screen, pscreen, dd_driver_name());
-#else
    configs = dri_init_screen_helper(screen, pscreen, screen->dev->driver_name);
-#endif // GALLIUM_STATIC_TARGETS
    if (!configs)
       goto fail;
 
@@ -1511,10 +1500,8 @@ dri2_init_screen(__DRIscreen * sPriv)
    return configs;
 fail:
    dri_destroy_screen_helper(screen);
-#if !GALLIUM_STATIC_TARGETS
    if (screen->dev)
       pipe_loader_release(&screen->dev, 1);
-#endif // !GALLIUM_STATIC_TARGETS
    FREE(screen);
    return NULL;
 }
@@ -1527,7 +1514,6 @@ fail:
 static const __DRIconfig **
 dri_kms_init_screen(__DRIscreen * sPriv)
 {
-#if GALLIUM_STATIC_TARGETS
 #if defined(GALLIUM_SOFTPIPE)
    const __DRIconfig **configs;
    struct dri_screen *screen;
@@ -1543,7 +1529,8 @@ dri_kms_init_screen(__DRIscreen * sPriv)
 
    sPriv->driverPrivate = (void *)screen;
 
-   pscreen = kms_swrast_create_screen(screen->fd);
+   if (pipe_loader_sw_probe_kms(&screen->dev, screen->fd))
+      pscreen = pipe_loader_create_screen(screen->dev);
 
    if (drmGetCap(sPriv->fd, DRM_CAP_PRIME, &cap) == 0 &&
           (cap & DRM_PRIME_CAP_IMPORT)) {
@@ -1566,9 +1553,10 @@ dri_kms_init_screen(__DRIscreen * sPriv)
    return configs;
 fail:
    dri_destroy_screen_helper(screen);
+   if (screen->dev)
+      pipe_loader_release(&screen->dev, 1);
    FREE(screen);
 #endif // GALLIUM_SOFTPIPE
-#endif // GALLIUM_STATIC_TARGETS
    return NULL;
 }
 
diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c
index c4c2d9c8fb1..cf0f26554d3 100644
--- a/src/gallium/state_trackers/dri/dri_screen.c
+++ b/src/gallium/state_trackers/dri/dri_screen.c
@@ -390,9 +390,7 @@ dri_destroy_screen(__DRIscreen * sPriv)
 
    dri_destroy_screen_helper(screen);
 
-#if !GALLIUM_STATIC_TARGETS
    pipe_loader_release(&screen->dev, 1);
-#endif // !GALLIUM_STATIC_TARGETS
 
    free(screen);
    sPriv->driverPrivate = NULL;
diff --git a/src/gallium/state_trackers/dri/drisw.c b/src/gallium/state_trackers/dri/drisw.c
index 1b24f4896ea..06fa9427667 100644
--- a/src/gallium/state_trackers/dri/drisw.c
+++ b/src/gallium/state_trackers/dri/drisw.c
@@ -34,14 +34,12 @@
  * for createImage/destroyImage similar to DRI2 getBuffers.
  */
 
-/* XXX: Temporary hack, until we get rid of drisw_create_screen() */
-#define DRI_TARGET
-
 #include "util/u_format.h"
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "util/u_box.h"
 #include "pipe/p_context.h"
+#include "pipe-loader/pipe_loader.h"
 #include "state_tracker/drisw_api.h"
 #include "state_tracker/st_context.h"
 
@@ -385,7 +383,7 @@ drisw_init_screen(__DRIscreen * sPriv)
 {
    const __DRIconfig **configs;
    struct dri_screen *screen;
-   struct pipe_screen *pscreen;
+   struct pipe_screen *pscreen = NULL;
 
    screen = CALLOC_STRUCT(dri_screen);
    if (!screen)
@@ -399,7 +397,9 @@ drisw_init_screen(__DRIscreen * sPriv)
    sPriv->driverPrivate = (void *)screen;
    sPriv->extensions = drisw_screen_extensions;
 
-   pscreen = drisw_create_screen(&drisw_lf);
+   if (pipe_loader_sw_probe_dri(&screen->dev, &drisw_lf))
+      pscreen = pipe_loader_create_screen(screen->dev);
+
    /* dri_init_screen_helper checks pscreen for us */
 
    configs = dri_init_screen_helper(screen, pscreen, "swrast");
@@ -409,6 +409,8 @@ drisw_init_screen(__DRIscreen * sPriv)
    return configs;
 fail:
    dri_destroy_screen_helper(screen);
+   if (screen->dev)
+      pipe_loader_release(&screen->dev, 1);
    FREE(screen);
    return NULL;
 }
diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk
index 89b420fa7fe..2d9610ee9ab 100644
--- a/src/gallium/targets/dri/Android.mk
+++ b/src/gallium/targets/dri/Android.mk
@@ -108,6 +108,7 @@ LOCAL_STATIC_LIBRARIES := \
 	libmesa_dri_common \
 	libmesa_megadriver_stub \
 	libmesa_gallium \
+	libmesa_pipe_loader \
 	libmesa_util \
 	libmesa_loader \
 
diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am
index b05441f8a9f..2666524fbfe 100644
--- a/src/gallium/targets/dri/Makefile.am
+++ b/src/gallium/targets/dri/Makefile.am
@@ -64,7 +64,7 @@ EXTRA_DIST = \
 
 TARGET_DRIVERS =
 TARGET_CPPFLAGS =
-TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la
+TARGET_LIB_DEPS =
 
 include $(top_srcdir)/src/gallium/drivers/i915/Automake.inc
 
@@ -91,7 +91,10 @@ if HAVE_GALLIUM_STATIC_TARGETS
 
 gallium_dri_la_SOURCES += target.c
 gallium_dri_la_CPPFLAGS = $(AM_CPPFLAGS) $(TARGET_CPPFLAGS)
-gallium_dri_la_LIBADD += $(TARGET_LIB_DEPS) \
+gallium_dri_la_LIBADD += \
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
+	$(TARGET_LIB_DEPS) \
 	$(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON)
 
 else # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/dri/SConscript b/src/gallium/targets/dri/SConscript
index 8d28924cb04..b4516598675 100644
--- a/src/gallium/targets/dri/SConscript
+++ b/src/gallium/targets/dri/SConscript
@@ -38,6 +38,7 @@ env.Prepend(LIBS = [
     svga,
     ws_dri,
     softpipe,
+    pipe_loader,
     libloader,
     mesautil,
     mesa,
diff --git a/src/gallium/targets/dri/target.c b/src/gallium/targets/dri/target.c
index 16a958bdf67..d6fbd01b88f 100644
--- a/src/gallium/targets/dri/target.c
+++ b/src/gallium/targets/dri/target.c
@@ -1,5 +1,4 @@
-#include "target-helpers/inline_drm_helper.h"
-#include "target-helpers/inline_sw_helper.h"
+#include "target-helpers/drm_helper.h"
 
 #include "dri_screen.h"
 

From 1eb6e8a23cc29081105336dc2a1cbf6bb4c2603d Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 20 Nov 2015 18:02:12 +0000
Subject: [PATCH 259/335] {auxiliary,targets}/vl: use static/dynamic
 pipe-loader

Analogous to previous commit.

v2: rebase on top of vl_winsys_drm.c addition

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/auxiliary/Makefile.am        |  6 ------
 src/gallium/auxiliary/vl/vl_winsys_dri.c |  8 --------
 src/gallium/auxiliary/vl/vl_winsys_drm.c | 11 +----------
 src/gallium/targets/omx/Makefile.am      |  7 +++++--
 src/gallium/targets/omx/target.c         |  2 +-
 src/gallium/targets/va/Makefile.am       |  7 +++++--
 src/gallium/targets/va/target.c          |  2 +-
 src/gallium/targets/vdpau/Makefile.am    |  7 +++++--
 src/gallium/targets/vdpau/target.c       |  2 +-
 src/gallium/targets/xvmc/Makefile.am     |  7 +++++--
 src/gallium/targets/xvmc/target.c        |  2 +-
 11 files changed, 25 insertions(+), 36 deletions(-)

diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am
index 7ff8972f8c0..ee296ceda33 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -64,12 +64,6 @@ COMMON_VL_CFLAGS = \
 	$(DRI2PROTO_CFLAGS) \
 	$(LIBDRM_CFLAGS)
 
-if HAVE_GALLIUM_STATIC_TARGETS
-COMMON_VL_CFLAGS += \
-	-DGALLIUM_STATIC_TARGETS=1
-
-endif # HAVE_GALLIUM_STATIC_TARGETS
-
 noinst_LTLIBRARIES += libgalliumvl.la
 
 libgalliumvl_la_CFLAGS = \
diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri.c b/src/gallium/auxiliary/vl/vl_winsys_dri.c
index fb16adc966c..c4f9295271f 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c
@@ -402,12 +402,8 @@ vl_dri2_screen_create(Display *display, int screen)
    if (authenticate == NULL || !authenticate->authenticated)
       goto free_authenticate;
 
-#if GALLIUM_STATIC_TARGETS
-   scrn->base.pscreen = dd_create_screen(fd);
-#else
    if (pipe_loader_drm_probe_fd(&scrn->base.dev, fd))
       scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev);
-#endif // GALLIUM_STATIC_TARGETS
 
    if (!scrn->base.pscreen)
       goto release_pipe;
@@ -430,10 +426,8 @@ vl_dri2_screen_create(Display *display, int screen)
    return &scrn->base;
 
 release_pipe:
-#if !GALLIUM_STATIC_TARGETS
    if (scrn->base.dev)
       pipe_loader_release(&scrn->base.dev, 1);
-#endif // !GALLIUM_STATIC_TARGETS
 free_authenticate:
    free(authenticate);
 free_connect:
@@ -462,8 +456,6 @@ vl_dri2_screen_destroy(struct vl_screen *vscreen)
 
    vl_dri2_destroy_drawable(scrn);
    scrn->base.pscreen->destroy(scrn->base.pscreen);
-#if !GALLIUM_STATIC_TARGETS
    pipe_loader_release(&scrn->base.dev, 1);
-#endif // !GALLIUM_STATIC_TARGETS
    FREE(scrn);
 }
diff --git a/src/gallium/auxiliary/vl/vl_winsys_drm.c b/src/gallium/auxiliary/vl/vl_winsys_drm.c
index f4e8306b67c..f993e2c7727 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_drm.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_drm.c
@@ -46,12 +46,8 @@ vl_drm_screen_create(int fd)
    if (!vscreen)
       return NULL;
 
-#if GALLIUM_STATIC_TARGETS
-   vscreen->pscreen = dd_create_screen(fd);
-#else
    if (pipe_loader_drm_probe_fd(&vscreen->dev, dup(fd)))
       vscreen->pscreen = pipe_loader_create_screen(vscreen->dev);
-#endif
 
    if (!vscreen->pscreen)
       goto error;
@@ -65,10 +61,9 @@ vl_drm_screen_create(int fd)
    return vscreen;
 
 error:
-#if !GALLIUM_STATIC_TARGETS
    if (vscreen->dev)
       pipe_loader_release(&vscreen->dev, 1);
-#endif // !GALLIUM_STATIC_TARGETS
+
    FREE(vscreen);
    return NULL;
 }
@@ -79,10 +74,6 @@ vl_drm_screen_destroy(struct vl_screen *vscreen)
    assert(vscreen);
 
    vscreen->pscreen->destroy(vscreen->pscreen);
-
-#if !GALLIUM_STATIC_TARGETS
    pipe_loader_release(&vscreen->dev, 1);
-#endif
-
    FREE(vscreen);
 }
diff --git a/src/gallium/targets/omx/Makefile.am b/src/gallium/targets/omx/Makefile.am
index f9c0842179d..3bdb9eb7e61 100644
--- a/src/gallium/targets/omx/Makefile.am
+++ b/src/gallium/targets/omx/Makefile.am
@@ -40,7 +40,7 @@ if HAVE_GALLIUM_STATIC_TARGETS
 
 TARGET_DRIVERS =
 TARGET_CPPFLAGS =
-TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la
+TARGET_LIB_DEPS =
 
 
 include $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc
@@ -50,7 +50,10 @@ include $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc
 
 libomx_mesa_la_SOURCES += target.c
 libomx_mesa_la_CPPFLAGS = $(TARGET_CPPFLAGS)
-libomx_mesa_la_LIBADD += $(TARGET_LIB_DEPS) \
+libomx_mesa_la_LIBADD += \
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
+	$(TARGET_LIB_DEPS) \
 	$(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON)
 
 else # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/omx/target.c b/src/gallium/targets/omx/target.c
index fde4a4a7dcf..42b1346d341 100644
--- a/src/gallium/targets/omx/target.c
+++ b/src/gallium/targets/omx/target.c
@@ -1 +1 @@
-#include "target-helpers/inline_drm_helper.h"
+#include "target-helpers/drm_helper.h"
diff --git a/src/gallium/targets/va/Makefile.am b/src/gallium/targets/va/Makefile.am
index 17b9ae3e822..733e7acb455 100644
--- a/src/gallium/targets/va/Makefile.am
+++ b/src/gallium/targets/va/Makefile.am
@@ -40,14 +40,17 @@ if HAVE_GALLIUM_STATIC_TARGETS
 
 TARGET_DRIVERS =
 TARGET_CPPFLAGS =
-TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la
+TARGET_LIB_DEPS =
 
 include $(top_srcdir)/src/gallium/drivers/r600/Automake.inc
 include $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc
 
 gallium_drv_video_la_SOURCES += target.c
 gallium_drv_video_la_CPPFLAGS = $(TARGET_CPPFLAGS)
-gallium_drv_video_la_LIBADD += $(TARGET_LIB_DEPS) \
+gallium_drv_video_la_LIBADD += \
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
+	$(TARGET_LIB_DEPS) \
 	$(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON)
 
 else # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/va/target.c b/src/gallium/targets/va/target.c
index fde4a4a7dcf..42b1346d341 100644
--- a/src/gallium/targets/va/target.c
+++ b/src/gallium/targets/va/target.c
@@ -1 +1 @@
-#include "target-helpers/inline_drm_helper.h"
+#include "target-helpers/drm_helper.h"
diff --git a/src/gallium/targets/vdpau/Makefile.am b/src/gallium/targets/vdpau/Makefile.am
index f9fb56069a9..d388f8b5014 100644
--- a/src/gallium/targets/vdpau/Makefile.am
+++ b/src/gallium/targets/vdpau/Makefile.am
@@ -47,7 +47,7 @@ EXTRA_DIST = \
 
 TARGET_DRIVERS =
 TARGET_CPPFLAGS =
-TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la
+TARGET_LIB_DEPS =
 
 include $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc
 
@@ -59,7 +59,10 @@ if HAVE_GALLIUM_STATIC_TARGETS
 
 libvdpau_gallium_la_SOURCES += target.c
 libvdpau_gallium_la_CPPFLAGS = $(TARGET_CPPFLAGS)
-libvdpau_gallium_la_LIBADD += $(TARGET_LIB_DEPS) \
+libvdpau_gallium_la_LIBADD += \
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
+	$(TARGET_LIB_DEPS) \
 	$(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON)
 
 else # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/vdpau/target.c b/src/gallium/targets/vdpau/target.c
index fde4a4a7dcf..42b1346d341 100644
--- a/src/gallium/targets/vdpau/target.c
+++ b/src/gallium/targets/vdpau/target.c
@@ -1 +1 @@
-#include "target-helpers/inline_drm_helper.h"
+#include "target-helpers/drm_helper.h"
diff --git a/src/gallium/targets/xvmc/Makefile.am b/src/gallium/targets/xvmc/Makefile.am
index 5fcfc88dd53..fdc5f4b7318 100644
--- a/src/gallium/targets/xvmc/Makefile.am
+++ b/src/gallium/targets/xvmc/Makefile.am
@@ -38,7 +38,7 @@ EXTRA_DIST = xvmc.sym
 
 TARGET_DRIVERS =
 TARGET_CPPFLAGS =
-TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la
+TARGET_LIB_DEPS =
 
 include $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc
 
@@ -48,7 +48,10 @@ if HAVE_GALLIUM_STATIC_TARGETS
 
 libXvMCgallium_la_SOURCES += target.c
 libXvMCgallium_la_CPPFLAGS = $(TARGET_CPPFLAGS)
-libXvMCgallium_la_LIBADD += $(TARGET_LIB_DEPS) \
+libXvMCgallium_la_LIBADD += \
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
+	$(TARGET_LIB_DEPS) \
 	$(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON)
 
 else # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/xvmc/target.c b/src/gallium/targets/xvmc/target.c
index fde4a4a7dcf..42b1346d341 100644
--- a/src/gallium/targets/xvmc/target.c
+++ b/src/gallium/targets/xvmc/target.c
@@ -1 +1 @@
-#include "target-helpers/inline_drm_helper.h"
+#include "target-helpers/drm_helper.h"

From 611ef64ed5ddb3c8bd95c2e0dc2f0959218bf516 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Thu, 15 Oct 2015 00:43:52 +0100
Subject: [PATCH 260/335] {st,targets}/xa: use static/dynamic pipe-loader

Analogous to previous commits.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/state_trackers/xa/Makefile.am  |  5 -----
 src/gallium/state_trackers/xa/xa_tracker.c | 16 ++--------------
 src/gallium/targets/xa/Makefile.am         |  7 +++++--
 src/gallium/targets/xa/target.c            |  2 +-
 4 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/src/gallium/state_trackers/xa/Makefile.am b/src/gallium/state_trackers/xa/Makefile.am
index 0d50c27253b..968778f995c 100644
--- a/src/gallium/state_trackers/xa/Makefile.am
+++ b/src/gallium/state_trackers/xa/Makefile.am
@@ -28,11 +28,6 @@ AM_CFLAGS = \
 	$(GALLIUM_CFLAGS) \
 	$(VISIBILITY_CFLAGS)
 
-if HAVE_GALLIUM_STATIC_TARGETS
-AM_CPPFLAGS = \
-	-DGALLIUM_STATIC_TARGETS=1
-endif
-
 xa_includedir = $(includedir)
 xa_include_HEADERS = \
 	xa_composite.h \
diff --git a/src/gallium/state_trackers/xa/xa_tracker.c b/src/gallium/state_trackers/xa/xa_tracker.c
index 3011598e0d9..faa630c144b 100644
--- a/src/gallium/state_trackers/xa/xa_tracker.c
+++ b/src/gallium/state_trackers/xa/xa_tracker.c
@@ -152,21 +152,13 @@ xa_tracker_create(int drm_fd)
     struct xa_tracker *xa = calloc(1, sizeof(struct xa_tracker));
     enum xa_surface_type stype;
     unsigned int num_formats;
-    int loader_fd;
 
     if (!xa)
 	return NULL;
 
-#if GALLIUM_STATIC_TARGETS
-    xa->screen = dd_create_screen(drm_fd);
-    (void) loader_fd; /* silence unused var warning */
-#else
-    loader_fd = dup(drm_fd);
-    if (loader_fd == -1)
-        return NULL;
-    if (pipe_loader_drm_probe_fd(&xa->dev, loader_fd))
+    if (pipe_loader_drm_probe_fd(&xa->dev, dup(drm_fd)))
 	xa->screen = pipe_loader_create_screen(xa->dev);
-#endif
+
     if (!xa->screen)
 	goto out_no_screen;
 
@@ -214,10 +206,8 @@ xa_tracker_create(int drm_fd)
  out_no_pipe:
     xa->screen->destroy(xa->screen);
  out_no_screen:
-#if !GALLIUM_STATIC_TARGETS
     if (xa->dev)
 	pipe_loader_release(&xa->dev, 1);
-#endif
     free(xa);
     return NULL;
 }
@@ -228,9 +218,7 @@ xa_tracker_destroy(struct xa_tracker *xa)
     free(xa->supported_formats);
     xa_context_destroy(xa->default_ctx);
     xa->screen->destroy(xa->screen);
-#if !GALLIUM_STATIC_TARGETS
     pipe_loader_release(&xa->dev, 1);
-#endif
     free(xa);
 }
 
diff --git a/src/gallium/targets/xa/Makefile.am b/src/gallium/targets/xa/Makefile.am
index 545d17eb4e1..a63fd6903a4 100644
--- a/src/gallium/targets/xa/Makefile.am
+++ b/src/gallium/targets/xa/Makefile.am
@@ -60,7 +60,7 @@ if HAVE_GALLIUM_STATIC_TARGETS
 
 TARGET_DRIVERS =
 TARGET_CPPFLAGS =
-TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la
+TARGET_LIB_DEPS =
 
 include $(top_srcdir)/src/gallium/drivers/i915/Automake.inc
 
@@ -74,7 +74,10 @@ include $(top_srcdir)/src/gallium/drivers/freedreno/Automake.inc
 
 libxatracker_la_SOURCES += target.c
 libxatracker_la_CPPFLAGS = $(TARGET_CPPFLAGS)
-libxatracker_la_LIBADD += $(TARGET_LIB_DEPS)
+libxatracker_la_LIBADD += \
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
+	$(TARGET_LIB_DEPS)
 
 else # HAVE_GALLIUM_STATIC_TARGETS
 
diff --git a/src/gallium/targets/xa/target.c b/src/gallium/targets/xa/target.c
index fde4a4a7dcf..42b1346d341 100644
--- a/src/gallium/targets/xa/target.c
+++ b/src/gallium/targets/xa/target.c
@@ -1 +1 @@
-#include "target-helpers/inline_drm_helper.h"
+#include "target-helpers/drm_helper.h"

From dddedbec0ed6a7791a92e40689b564999ca9b7eb Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 16:47:04 +0100
Subject: [PATCH 261/335] {st,targets}/nine: use static/dynamic pipe-loader

Analogous to previous commits.

v2: add the missing winsys libs linkage

Cc: Axel Davy <axel.davy@ens.fr>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/targets/d3dadapter9/Makefile.am | 14 +++++-------
 src/gallium/targets/d3dadapter9/drm.c       | 24 +++------------------
 2 files changed, 8 insertions(+), 30 deletions(-)

diff --git a/src/gallium/targets/d3dadapter9/Makefile.am b/src/gallium/targets/d3dadapter9/Makefile.am
index 776f86bda6d..d1d9829b6c5 100644
--- a/src/gallium/targets/d3dadapter9/Makefile.am
+++ b/src/gallium/targets/d3dadapter9/Makefile.am
@@ -34,12 +34,6 @@ AM_CFLAGS = \
 	$(GALLIUM_TARGET_CFLAGS) \
 	$(VISIBILITY_CFLAGS)
 
-if HAVE_GALLIUM_STATIC_TARGETS
-AM_CPPFLAGS = \
-	-DGALLIUM_STATIC_TARGETS=1
-
-endif
-
 ninedir = $(D3D_DRIVER_INSTALL_DIR)
 nine_LTLIBRARIES = d3dadapter9.la
 
@@ -71,7 +65,6 @@ d3dadapter9_la_LIBADD = \
 	$(top_builddir)/src/glsl/libnir.la \
 	$(top_builddir)/src/gallium/state_trackers/nine/libninetracker.la \
 	$(top_builddir)/src/util/libmesautil.la \
-	$(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \
 	$(EXPAT_LIBS) \
 	$(GALLIUM_COMMON_LIB_DEPS)
 
@@ -80,7 +73,7 @@ EXTRA_DIST = d3dadapter9.sym
 
 TARGET_DRIVERS =
 TARGET_CPPFLAGS =
-TARGET_LIB_DEPS = $(top_builddir)/src/loader/libloader.la
+TARGET_LIB_DEPS =
 
 include $(top_srcdir)/src/gallium/drivers/i915/Automake.inc
 
@@ -104,7 +97,10 @@ include $(top_srcdir)/src/gallium/drivers/llvmpipe/Automake.inc
 if HAVE_GALLIUM_STATIC_TARGETS
 
 d3dadapter9_la_CPPFLAGS = $(AM_CPPFLAGS) $(TARGET_CPPFLAGS)
-d3dadapter9_la_LIBADD += $(TARGET_LIB_DEPS) \
+d3dadapter9_la_LIBADD += \
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
+	$(TARGET_LIB_DEPS) \
 	$(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON)
 
 else # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c
index 78896cb34fd..c890191124e 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -29,8 +29,7 @@
 #include "pipe/p_screen.h"
 #include "pipe/p_state.h"
 
-#include "target-helpers/inline_drm_helper.h"
-#include "target-helpers/inline_wrapper_sw_helper.h"
+#include "target-helpers/drm_helper.h"
 #include "state_tracker/drm_driver.h"
 
 #include "d3dadapter/d3dadapter9.h"
@@ -91,12 +90,10 @@ drm_destroy( struct d3dadapter9_context *ctx )
     else if (ctx->hal)
         ctx->hal->destroy(ctx->hal);
 
-#if !GALLIUM_STATIC_TARGETS
     if (drm->swdev)
         pipe_loader_release(&drm->swdev, 1);
     if (drm->dev)
         pipe_loader_release(&drm->dev, 1);
-#endif
 
     close(drm->fd);
     FREE(ctx);
@@ -223,10 +220,6 @@ drm_create_adapter( int fd,
     ctx->fd = fd;
     ctx->base.linear_framebuffer = !!different_device;
 
-#if GALLIUM_STATIC_TARGETS
-    ctx->base.hal = dd_create_screen(fd);
-#else
-    /* use pipe-loader to dlopen appropriate drm driver */
     if (!pipe_loader_drm_probe_fd(&ctx->dev, fd)) {
         ERR("Failed to probe drm fd %d.\n", fd);
         FREE(ctx);
@@ -234,22 +227,15 @@ drm_create_adapter( int fd,
         return D3DERR_DRIVERINTERNALERROR;
     }
 
-    /* use pipe-loader to create a drm screen (hal) */
     ctx->base.hal = pipe_loader_create_screen(ctx->dev);
-#endif
     if (!ctx->base.hal) {
         ERR("Unable to load requested driver.\n");
         drm_destroy(&ctx->base);
         return D3DERR_DRIVERINTERNALERROR;
     }
 
-#if GALLIUM_STATIC_TARGETS
-    dmabuf_ret = dd_configuration(DRM_CONF_SHARE_FD);
-    throttle_ret = dd_configuration(DRM_CONF_THROTTLE);
-#else
     dmabuf_ret = pipe_loader_configuration(ctx->dev, DRM_CONF_SHARE_FD);
     throttle_ret = pipe_loader_configuration(ctx->dev, DRM_CONF_THROTTLE);
-#endif // GALLIUM_STATIC_TARGETS
     if (!dmabuf_ret || !dmabuf_ret->val.val_bool) {
         ERR("The driver is not capable of dma-buf sharing."
             "Abandon to load nine state tracker\n");
@@ -296,14 +282,10 @@ drm_create_adapter( int fd,
     driDestroyOptionCache(&userInitOptions);
     driDestroyOptionInfo(&defaultInitOptions);
 
-#if GALLIUM_STATIC_TARGETS
-    ctx->base.ref = sw_screen_wrap(ctx->base.hal);
-#else
     /* wrap it to create a software screen that can share resources */
-    if (pipe_loader_sw_probe_wrapped(&ctx->swdev, ctx->base.hal)) {
+    if (pipe_loader_sw_probe_wrapped(&ctx->swdev, ctx->base.hal))
         ctx->base.ref = pipe_loader_create_screen(ctx->swdev);
-    }
-#endif
+
     if (!ctx->base.ref) {
         ERR("Couldn't wrap drm screen to swrast screen. Software devices "
             "will be unavailable.\n");

From b7f5c2ee48a8e386a891996ca83eb56fdc21229e Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Thu, 19 Nov 2015 19:47:02 +0000
Subject: [PATCH 262/335] target-helpers: remove inline_drm_helper.h

As of earlier all the targets use the non inline version. Don't forget
to remove the function prototypes/declarations.

v2: rebase on top of virgl support.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 .../target-helpers/inline_drm_helper.h        | 371 ------------------
 .../include/state_tracker/drm_driver.h        |   6 -
 2 files changed, 377 deletions(-)
 delete mode 100644 src/gallium/auxiliary/target-helpers/inline_drm_helper.h

diff --git a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
deleted file mode 100644
index c57fbac7f35..00000000000
--- a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
+++ /dev/null
@@ -1,371 +0,0 @@
-#ifndef INLINE_DRM_HELPER_H
-#define INLINE_DRM_HELPER_H
-
-#include "state_tracker/drm_driver.h"
-#include "target-helpers/inline_debug_helper.h"
-#include "loader.h"
-
-#if GALLIUM_SOFTPIPE
-#include "target-helpers/inline_sw_helper.h"
-#include "sw/kms-dri/kms_dri_sw_winsys.h"
-#endif
-
-#if GALLIUM_I915
-#include "i915/drm/i915_drm_public.h"
-#include "i915/i915_public.h"
-#endif
-
-#if GALLIUM_ILO
-#include "intel/drm/intel_drm_public.h"
-#include "ilo/ilo_public.h"
-#endif
-
-#if GALLIUM_NOUVEAU
-#include "nouveau/drm/nouveau_drm_public.h"
-#endif
-
-#if GALLIUM_R300
-#include "radeon/radeon_winsys.h"
-#include "radeon/drm/radeon_drm_public.h"
-#include "r300/r300_public.h"
-#endif
-
-#if GALLIUM_R600
-#include "radeon/radeon_winsys.h"
-#include "radeon/drm/radeon_drm_public.h"
-#include "r600/r600_public.h"
-#endif
-
-#if GALLIUM_RADEONSI
-#include "radeon/radeon_winsys.h"
-#include "radeon/drm/radeon_drm_public.h"
-#include "amdgpu/drm/amdgpu_public.h"
-#include "radeonsi/si_public.h"
-#endif
-
-#if GALLIUM_VMWGFX
-#include "svga/drm/svga_drm_public.h"
-#include "svga/svga_public.h"
-#endif
-
-#if GALLIUM_FREEDRENO
-#include "freedreno/drm/freedreno_drm_public.h"
-#endif
-
-#if GALLIUM_VC4
-#include "vc4/drm/vc4_drm_public.h"
-#endif
-
-#if GALLIUM_VIRGL
-#include "virgl/drm/virgl_drm_public.h"
-#include "virgl/virgl_public.h"
-#endif
-
-static char* driver_name = NULL;
-
-/* XXX: We need to teardown the winsys if *screen_create() fails. */
-
-#if defined(GALLIUM_I915)
-
-static struct pipe_screen *
-pipe_i915_create_screen(int fd)
-{
-   struct i915_winsys *iws;
-   struct pipe_screen *screen;
-
-   iws = i915_drm_winsys_create(fd);
-   if (!iws)
-      return NULL;
-
-   screen = i915_screen_create(iws);
-   return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_ILO)
-
-static struct pipe_screen *
-pipe_ilo_create_screen(int fd)
-{
-   struct intel_winsys *iws;
-   struct pipe_screen *screen;
-
-   iws = intel_winsys_create_for_fd(fd);
-   if (!iws)
-      return NULL;
-
-   screen = ilo_screen_create(iws);
-   return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_NOUVEAU)
-
-static struct pipe_screen *
-pipe_nouveau_create_screen(int fd)
-{
-   struct pipe_screen *screen;
-
-   screen = nouveau_drm_screen_create(fd);
-   return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_R300)
-
-static struct pipe_screen *
-pipe_r300_create_screen(int fd)
-{
-   struct radeon_winsys *rw;
-
-   rw = radeon_drm_winsys_create(fd, r300_screen_create);
-   return rw ? debug_screen_wrap(rw->screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_R600)
-
-static struct pipe_screen *
-pipe_r600_create_screen(int fd)
-{
-   struct radeon_winsys *rw;
-
-   rw = radeon_drm_winsys_create(fd, r600_screen_create);
-   return rw ? debug_screen_wrap(rw->screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_RADEONSI)
-
-static struct pipe_screen *
-pipe_radeonsi_create_screen(int fd)
-{
-   struct radeon_winsys *rw;
-
-   /* First, try amdgpu. */
-   rw = amdgpu_winsys_create(fd, radeonsi_screen_create);
-
-   if (!rw)
-      rw = radeon_drm_winsys_create(fd, radeonsi_screen_create);
-
-   return rw ? debug_screen_wrap(rw->screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_VMWGFX)
-
-static struct pipe_screen *
-pipe_vmwgfx_create_screen(int fd)
-{
-   struct svga_winsys_screen *sws;
-   struct pipe_screen *screen;
-
-   sws = svga_drm_winsys_screen_create(fd);
-   if (!sws)
-      return NULL;
-
-   screen = svga_screen_create(sws);
-   return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_FREEDRENO)
-
-static struct pipe_screen *
-pipe_freedreno_create_screen(int fd)
-{
-   struct pipe_screen *screen;
-
-   screen = fd_drm_screen_create(fd);
-   return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_VIRGL)
-
-static struct pipe_screen *
-pipe_virgl_create_screen(int fd)
-{
-   struct virgl_winsys *vws;
-   struct pipe_screen *screen;
-
-   vws = virgl_drm_winsys_create(fd);
-   if (!vws)
-      return NULL;
-
-   screen = virgl_create_screen(vws);
-   return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_VC4)
-
-static struct pipe_screen *
-pipe_vc4_create_screen(int fd)
-{
-   struct pipe_screen *screen;
-
-   screen = vc4_drm_screen_create(fd);
-   return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-inline struct pipe_screen *
-dd_create_screen(int fd)
-{
-   driver_name = loader_get_driver_for_fd(fd, _LOADER_GALLIUM);
-   if (!driver_name)
-      return NULL;
-
-#if defined(GALLIUM_I915)
-   if (strcmp(driver_name, "i915") == 0)
-      return pipe_i915_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_ILO)
-   if (strcmp(driver_name, "i965") == 0)
-      return pipe_ilo_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_NOUVEAU)
-   if (strcmp(driver_name, "nouveau") == 0)
-      return pipe_nouveau_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_R300)
-   if (strcmp(driver_name, "r300") == 0)
-      return pipe_r300_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_R600)
-   if (strcmp(driver_name, "r600") == 0)
-      return pipe_r600_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_RADEONSI)
-   if (strcmp(driver_name, "radeonsi") == 0)
-      return pipe_radeonsi_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_VMWGFX)
-   if (strcmp(driver_name, "vmwgfx") == 0)
-      return pipe_vmwgfx_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_FREEDRENO)
-   if ((strcmp(driver_name, "kgsl") == 0) || (strcmp(driver_name, "msm") == 0))
-      return pipe_freedreno_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_VIRGL)
-   if ((strcmp(driver_name, "virtio_gpu") == 0))
-      return pipe_virgl_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_VC4)
-   if (strcmp(driver_name, "vc4") == 0)
-      return pipe_vc4_create_screen(fd);
-   else
-#if defined(USE_VC4_SIMULATOR)
-   if (strcmp(driver_name, "i965") == 0)
-      return pipe_vc4_create_screen(fd);
-   else
-#endif
-#endif
-      return NULL;
-}
-
-inline const char *
-dd_driver_name(void)
-{
-   return driver_name;
-}
-
-static const struct drm_conf_ret throttle_ret = {
-   DRM_CONF_INT,
-   {2},
-};
-
-static const struct drm_conf_ret share_fd_ret = {
-   DRM_CONF_BOOL,
-   {true},
-};
-
-static inline const struct drm_conf_ret *
-configuration_query(enum drm_conf conf)
-{
-   switch (conf) {
-   case DRM_CONF_THROTTLE:
-      return &throttle_ret;
-   case DRM_CONF_SHARE_FD:
-      return &share_fd_ret;
-   default:
-      break;
-   }
-   return NULL;
-}
-
-inline const struct drm_conf_ret *
-dd_configuration(enum drm_conf conf)
-{
-   if (!driver_name)
-      return NULL;
-
-#if defined(GALLIUM_I915)
-   if (strcmp(driver_name, "i915") == 0)
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_ILO)
-   if (strcmp(driver_name, "i965") == 0)
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_NOUVEAU)
-   if (strcmp(driver_name, "nouveau") == 0)
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_R300)
-   if (strcmp(driver_name, "r300") == 0)
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_R600)
-   if (strcmp(driver_name, "r600") == 0)
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_RADEONSI)
-   if (strcmp(driver_name, "radeonsi") == 0)
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_VMWGFX)
-   if (strcmp(driver_name, "vmwgfx") == 0)
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_FREEDRENO)
-   if ((strcmp(driver_name, "kgsl") == 0) || (strcmp(driver_name, "msm") == 0))
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_VIRGL)
-   if ((strcmp(driver_name, "virtio_gpu") == 0))
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_VC4)
-   if (strcmp(driver_name, "vc4") == 0)
-      return configuration_query(conf);
-   else
-#if defined(USE_VC4_SIMULATOR)
-   if (strcmp(driver_name, "i965") == 0)
-      return configuration_query(conf);
-   else
-#endif
-#endif
-      return NULL;
-}
-#endif /* INLINE_DRM_HELPER_H */
diff --git a/src/gallium/include/state_tracker/drm_driver.h b/src/gallium/include/state_tracker/drm_driver.h
index 740c4bbe1a6..959a7625e30 100644
--- a/src/gallium/include/state_tracker/drm_driver.h
+++ b/src/gallium/include/state_tracker/drm_driver.h
@@ -117,10 +117,4 @@ struct drm_driver_descriptor driver_descriptor = {             \
    .configuration = (conf),				       \
 };
 
-extern struct pipe_screen *dd_create_screen(int fd);
-
-extern const char *dd_driver_name(void);
-
-extern const struct drm_conf_ret *dd_configuration(enum drm_conf conf);
-
 #endif

From 13bccee87d6344ec7932b42de3fe3a2d10d30d96 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Thu, 15 Oct 2015 00:45:13 +0100
Subject: [PATCH 263/335] st/dri: Don't close the drm fd on failure

Ported from an identically named commit in st/xa

commit 35cf3831d71770211f29da6608313dc1f6213d7b
Author: Thomas Hellstrom <thellstrom@vmware.com>
Date:   Thu Jul 3 02:07:36 2014 -0700

    st/xa: Don't close the drm fd on failure v2

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/state_trackers/dri/dri2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c
index 62173750381..5b7be64ba5a 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -1457,7 +1457,7 @@ dri2_init_screen(__DRIscreen * sPriv)
 
    sPriv->driverPrivate = (void *)screen;
 
-   if (pipe_loader_drm_probe_fd(&screen->dev, screen->fd)) {
+   if (pipe_loader_drm_probe_fd(&screen->dev, dup(screen->fd))) {
       pscreen = pipe_loader_create_screen(screen->dev);
 
       throttle_ret = pipe_loader_configuration(screen->dev, DRM_CONF_THROTTLE);
@@ -1529,7 +1529,7 @@ dri_kms_init_screen(__DRIscreen * sPriv)
 
    sPriv->driverPrivate = (void *)screen;
 
-   if (pipe_loader_sw_probe_kms(&screen->dev, screen->fd))
+   if (pipe_loader_sw_probe_kms(&screen->dev, dup(screen->fd)))
       pscreen = pipe_loader_create_screen(screen->dev);
 
    if (drmGetCap(sPriv->fd, DRM_CAP_PRIME, &cap) == 0 &&

From e43a771dfa5b99899a4a09cdd2989afc25eef17c Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Thu, 15 Oct 2015 00:46:26 +0100
Subject: [PATCH 264/335] st/dri: NULL check the pscreen earlier

We delay the null check only to jump through hoops to work around that.
Check early to make our lives easier.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/state_trackers/dri/dri2.c       | 22 +++++++++++----------
 src/gallium/state_trackers/dri/dri_screen.c |  5 -----
 src/gallium/state_trackers/dri/drisw.c      |  3 ++-
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c
index 5b7be64ba5a..beb0866c83f 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -1444,8 +1444,8 @@ dri2_init_screen(__DRIscreen * sPriv)
    const __DRIconfig **configs;
    struct dri_screen *screen;
    struct pipe_screen *pscreen = NULL;
-   const struct drm_conf_ret *throttle_ret = NULL;
-   const struct drm_conf_ret *dmabuf_ret = NULL;
+   const struct drm_conf_ret *throttle_ret;
+   const struct drm_conf_ret *dmabuf_ret;
 
    screen = CALLOC_STRUCT(dri_screen);
    if (!screen)
@@ -1457,12 +1457,14 @@ dri2_init_screen(__DRIscreen * sPriv)
 
    sPriv->driverPrivate = (void *)screen;
 
-   if (pipe_loader_drm_probe_fd(&screen->dev, dup(screen->fd))) {
+   if (pipe_loader_drm_probe_fd(&screen->dev, dup(screen->fd)))
       pscreen = pipe_loader_create_screen(screen->dev);
 
-      throttle_ret = pipe_loader_configuration(screen->dev, DRM_CONF_THROTTLE);
-      dmabuf_ret = pipe_loader_configuration(screen->dev, DRM_CONF_SHARE_FD);
-   }
+   if (!pscreen)
+       goto fail;
+
+   throttle_ret = pipe_loader_configuration(screen->dev, DRM_CONF_THROTTLE);
+   dmabuf_ret = pipe_loader_configuration(screen->dev, DRM_CONF_SHARE_FD);
 
    if (throttle_ret && throttle_ret->val.val_int != -1) {
       screen->throttling_enabled = TRUE;
@@ -1479,15 +1481,13 @@ dri2_init_screen(__DRIscreen * sPriv)
       }
    }
 
-   if (pscreen && pscreen->get_param(pscreen, PIPE_CAP_DEVICE_RESET_STATUS_QUERY)) {
+   if (pscreen->get_param(pscreen, PIPE_CAP_DEVICE_RESET_STATUS_QUERY)) {
       sPriv->extensions = dri_robust_screen_extensions;
       screen->has_reset_status_query = true;
    }
    else
       sPriv->extensions = dri_screen_extensions;
 
-   /* dri_init_screen_helper checks pscreen for us */
-
    configs = dri_init_screen_helper(screen, pscreen, screen->dev->driver_name);
    if (!configs)
       goto fail;
@@ -1532,6 +1532,9 @@ dri_kms_init_screen(__DRIscreen * sPriv)
    if (pipe_loader_sw_probe_kms(&screen->dev, dup(screen->fd)))
       pscreen = pipe_loader_create_screen(screen->dev);
 
+   if (!pscreen)
+       goto fail;
+
    if (drmGetCap(sPriv->fd, DRM_CAP_PRIME, &cap) == 0 &&
           (cap & DRM_PRIME_CAP_IMPORT)) {
       dri2ImageExtension.createImageFromFds = dri2_from_fds;
@@ -1540,7 +1543,6 @@ dri_kms_init_screen(__DRIscreen * sPriv)
 
    sPriv->extensions = dri_screen_extensions;
 
-   /* dri_init_screen_helper checks pscreen for us */
    configs = dri_init_screen_helper(screen, pscreen, "swrast");
    if (!configs)
       goto fail;
diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c
index cf0f26554d3..2ac55c88926 100644
--- a/src/gallium/state_trackers/dri/dri_screen.c
+++ b/src/gallium/state_trackers/dri/dri_screen.c
@@ -414,11 +414,6 @@ dri_init_screen_helper(struct dri_screen *screen,
                        const char* driver_name)
 {
    screen->base.screen = pscreen;
-   if (!screen->base.screen) {
-      debug_printf("%s: failed to create pipe_screen\n", __FUNCTION__);
-      return NULL;
-   }
-
    screen->base.get_egl_image = dri_get_egl_image;
    screen->base.get_param = dri_get_param;
 
diff --git a/src/gallium/state_trackers/dri/drisw.c b/src/gallium/state_trackers/dri/drisw.c
index 06fa9427667..b85a73c57d2 100644
--- a/src/gallium/state_trackers/dri/drisw.c
+++ b/src/gallium/state_trackers/dri/drisw.c
@@ -400,7 +400,8 @@ drisw_init_screen(__DRIscreen * sPriv)
    if (pipe_loader_sw_probe_dri(&screen->dev, &drisw_lf))
       pscreen = pipe_loader_create_screen(screen->dev);
 
-   /* dri_init_screen_helper checks pscreen for us */
+   if (!pscreen)
+      goto fail;
 
    configs = dri_init_screen_helper(screen, pscreen, "swrast");
    if (!configs)

From f8a1665542d2bee829ab0c1bbde533f32d983e96 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 15:37:03 +0100
Subject: [PATCH 265/335] auxiliary/vl: Don't close the drm fd on failure

Ported from an identically named commit in st/xa

commit 35cf3831d71770211f29da6608313dc1f6213d7b
Author: Thomas Hellstrom <thellstrom@vmware.com>
Date:   Thu Jul 3 02:07:36 2014 -0700

    st/xa: Don't close the drm fd on failure v2

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/auxiliary/vl/vl_winsys_dri.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri.c b/src/gallium/auxiliary/vl/vl_winsys_dri.c
index c4f9295271f..ae0d4cdee1b 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c
@@ -402,7 +402,7 @@ vl_dri2_screen_create(Display *display, int screen)
    if (authenticate == NULL || !authenticate->authenticated)
       goto free_authenticate;
 
-   if (pipe_loader_drm_probe_fd(&scrn->base.dev, fd))
+   if (pipe_loader_drm_probe_fd(&scrn->base.dev, dup(fd)))
       scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev);
 
    if (!scrn->base.pscreen)

From 09422507817d21e989004bc93e7447f37cffc9ab Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 16:49:59 +0100
Subject: [PATCH 266/335] targets/nine: add note about fd owndership

v2:
 - move autotools hunk into correct patch
 - correct the note based on Axel's feedback

Cc: Axel Davy <axel.davy@ens.fr>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/targets/d3dadapter9/drm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c
index c890191124e..84cfb943eaf 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -216,6 +216,8 @@ drm_create_adapter( int fd,
 
     ctx->base.destroy = drm_destroy;
 
+    /* Although the fd is provided from external source, mesa/nine
+     * takes ownership of it. */
     fd = loader_get_user_preferred_fd(fd, &different_device);
     ctx->fd = fd;
     ctx->base.linear_framebuffer = !!different_device;

From 42dde5aa24f4aed176baefb9b72c4184026196ed Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Wed, 14 Oct 2015 16:52:07 +0100
Subject: [PATCH 267/335] targets/nine: add note about messy header inclusion
 order

Cc: Axel Davy <axel.davy@ens.fr>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/targets/d3dadapter9/drm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c
index 84cfb943eaf..00e58e0ed16 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -20,6 +20,7 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
+/* XXX: header order is slightly screwy here */
 #include "loader.h"
 
 #include "adapter9.h"

From 8943a562e26091b13295af734f3034077fc825e1 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Thu, 15 Oct 2015 19:29:00 +0100
Subject: [PATCH 268/335] targets/nine: remove unused static functions

Dead code since commit 8f50614910c40366d94964fe2c5da5772aff2f96

Cc: Axel Davy <axel.davy@ens.fr>
Cc: Tiziano Bacocco <tizbac2@gmail.com>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/targets/d3dadapter9/drm.c | 36 ---------------------------
 1 file changed, 36 deletions(-)

diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c
index 00e58e0ed16..ad712db05eb 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -100,42 +100,6 @@ drm_destroy( struct d3dadapter9_context *ctx )
     FREE(ctx);
 }
 
-/* read a DWORD in the form 0xnnnnnnnn, which is how sysfs pci id stuff is
- * formatted. */
-static inline DWORD
-read_file_dword( const char *name )
-{
-    char buf[32];
-    int fd, r;
-
-    fd = open(name, O_RDONLY);
-    if (fd < 0) {
-        DBG("Unable to get PCI information from `%s'\n", name);
-        return 0;
-    }
-
-    r = read(fd, buf, 32);
-    close(fd);
-
-    return (r > 0) ? (DWORD)strtol(buf, NULL, 0) : 0;
-}
-
-/* sysfs doesn't expose the revision as its own file, so this function grabs a
- * dword at an offset in the raw PCI header. The reason this isn't used for all
- * data is that the kernel will make corrections but not expose them in the raw
- * header bytes. */
-static inline DWORD
-read_config_dword( int fd,
-                   unsigned offset )
-{
-    DWORD r = 0;
-
-    if (lseek(fd, offset, SEEK_SET) != offset) { return 0; }
-    if (read(fd, &r, 4) != 4) { return 0; }
-
-    return r;
-}
-
 static inline void
 get_bus_info( int fd,
               DWORD *vendorid,

From 623f64efc1630fa6c287d4de107430835f9a5fa3 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Fri, 16 Oct 2015 20:34:52 +0100
Subject: [PATCH 269/335] util: use RTLD_LOCAL with util_dl_open()

Otherwise we risk things blowing up due to conflicting symbols.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/auxiliary/util/u_dl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/util/u_dl.c b/src/gallium/auxiliary/util/u_dl.c
index aca435d6cad..9b97d8dc4b9 100644
--- a/src/gallium/auxiliary/util/u_dl.c
+++ b/src/gallium/auxiliary/util/u_dl.c
@@ -45,7 +45,7 @@ struct util_dl_library *
 util_dl_open(const char *filename)
 {
 #if defined(PIPE_OS_UNIX)
-   return (struct util_dl_library *)dlopen(filename, RTLD_LAZY | RTLD_GLOBAL);
+   return (struct util_dl_library *)dlopen(filename, RTLD_LAZY | RTLD_LOCAL);
 #elif defined(PIPE_OS_WINDOWS)
    return (struct util_dl_library *)LoadLibraryA(filename);
 #else

From 428146522b3836d8a9b8131d1e56c372c49679a8 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Sat, 21 Nov 2015 14:10:08 +0000
Subject: [PATCH 270/335] docs: add 11.2.0-devel release notes template, bump
 version

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
---
 VERSION                   |  2 +-
 docs/relnotes/11.2.0.html | 60 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+), 1 deletion(-)
 create mode 100644 docs/relnotes/11.2.0.html

diff --git a/VERSION b/VERSION
index 8a1e8e32743..96cb83954d3 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-11.1.0-devel
+11.2.0-devel
diff --git a/docs/relnotes/11.2.0.html b/docs/relnotes/11.2.0.html
new file mode 100644
index 00000000000..8e00d16d64b
--- /dev/null
+++ b/docs/relnotes/11.2.0.html
@@ -0,0 +1,60 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.2.0 Release Notes / TBD</h1>
+
+<p>
+Mesa 11.2.0 is a new development release.
+People who are concerned with stability and reliability should stick
+with a previous release or wait for Mesa 11.2.1.
+</p>
+<p>
+Mesa 11.2.0 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD.
+</pre>
+
+
+<h2>New features</h2>
+
+<p>
+Note: some of the new features are only available with certain drivers.
+</p>
+
+<ul>
+TBD.
+</ul>
+
+<h2>Bug fixes</h2>
+
+TBD.
+
+<h2>Changes</h2>
+
+TBD.
+
+</div>
+</body>
+</html>

From ecb0dcd34c9bb31b240a213cd17c236b224cd290 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 20 Nov 2015 21:20:00 -0500
Subject: [PATCH 271/335] freedreno/a4xx: only align slices in non-layer_first
 textures

When layer is the container, slices are tightly packed inside of each
layer. We don't need any additional alignment. On a3xx, each slice
contains all the layers, so having alignment makes sense.

This fixes a whole slew of array-related piglits, including texelFetch
and tex-miplevel-selection varieties.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/freedreno/freedreno_resource.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index 43b818f4014..c8e2779d390 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -551,7 +551,7 @@ fd_resource_create(struct pipe_screen *pscreen,
 	struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
 	struct pipe_resource *prsc = &rsc->base.b;
 	enum pipe_format format = tmpl->format;
-	uint32_t size;
+	uint32_t size, alignment;
 
 	DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
 			"nr_samples=%u, usage=%u, bind=%x, flags=%x",
@@ -583,6 +583,7 @@ fd_resource_create(struct pipe_screen *pscreen,
 
 	assert(rsc->cpp);
 
+	alignment = slice_alignment(pscreen, tmpl);
 	if (is_a4xx(fd_screen(pscreen))) {
 		switch (tmpl->target) {
 		case PIPE_TEXTURE_3D:
@@ -590,11 +591,12 @@ fd_resource_create(struct pipe_screen *pscreen,
 			break;
 		default:
 			rsc->layer_first = true;
+			alignment = 1;
 			break;
 		}
 	}
 
-	size = setup_slices(rsc, slice_alignment(pscreen, tmpl), format);
+	size = setup_slices(rsc, alignment, format);
 
 	if (rsc->layer_first) {
 		rsc->layer_size = align(size, 4096);

From 740eb63aa78a48bae5248b72f023d725ed82d1b3 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 20 Nov 2015 21:34:20 -0500
Subject: [PATCH 272/335] freedreno/a4xx: fix 3d texture setup

Same fix as on a3xx - set the second (tiny) layer size bitfield to the
smallest level's size so that the hw knows not to minify beyond that.

This fixes texelFetch sampler3D piglits.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c    | 2 +-
 src/gallium/drivers/freedreno/a4xx/fd4_texture.c | 6 +++++-
 src/gallium/drivers/freedreno/a4xx/fd4_texture.h | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 99d1602d74b..8d3112fde3c 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -194,7 +194,7 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 			if (view->base.texture) {
 				struct fd_resource *rsc = fd_resource(view->base.texture);
 				uint32_t offset = fd_resource_offset(rsc, start, 0);
-				OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0);
+				OUT_RELOC(ring, rsc->bo, offset, view->texconst4, 0);
 			} else {
 				OUT_RING(ring, 0x00000000);
 			}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index 75b083b8ca0..598f1e19116 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -214,6 +214,7 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 	struct fd_resource *rsc = fd_resource(prsc);
 	unsigned lvl = fd_sampler_first_level(cso);
 	unsigned miplevels = fd_sampler_last_level(cso) - lvl;
+	uint32_t sz2 = 0;
 
 	if (!so)
 		return NULL;
@@ -259,7 +260,10 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 	case PIPE_TEXTURE_3D:
 		so->texconst3 =
 			A4XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
-			A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[0].size0);
+			A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[lvl].size0);
+		while (lvl < cso->u.tex.last_level && sz2 != rsc->slices[lvl+1].size0)
+			sz2 = rsc->slices[++lvl].size0;
+		so->texconst4 = A4XX_TEX_CONST_4_LAYERSZ(sz2);
 		break;
 	default:
 		so->texconst3 = 0x00000000;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
index 31955770a85..d74d88701a8 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
@@ -51,7 +51,7 @@ fd4_sampler_stateobj(struct pipe_sampler_state *samp)
 
 struct fd4_pipe_sampler_view {
 	struct pipe_sampler_view base;
-	uint32_t texconst0, texconst1, texconst2, texconst3, textconst4;
+	uint32_t texconst0, texconst1, texconst2, texconst3, texconst4;
 };
 
 static inline struct fd4_pipe_sampler_view *

From 7426d9581afaa572bbf16b9eb69214743615c42d Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 20 Nov 2015 21:49:28 -0500
Subject: [PATCH 273/335] freedreno/a4xx: add 11_11_10_float vertex type
 support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h   | 1 +
 src/gallium/drivers/freedreno/a4xx/fd4_format.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 1192fc3c5fe..e05b7ee0f85 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -112,6 +112,7 @@ enum a4xx_vtx_fmt {
 	VFMT4_32_32_FIXED = 10,
 	VFMT4_32_32_32_FIXED = 11,
 	VFMT4_32_32_32_32_FIXED = 12,
+	VFMT4_11_11_10_FLOAT = 13,
 	VFMT4_16_SINT = 16,
 	VFMT4_16_16_SINT = 17,
 	VFMT4_16_16_16_SINT = 18,
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index eeaa0a2b847..dceb3b98c75 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -208,7 +208,7 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT,  NONE,              WZYX),
 	V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT,  NONE,              WXYZ),
 
-	_T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
+	VT(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
 	_T(R9G9B9E5_FLOAT,  9_9_9_E5_FLOAT, NONE,            WZYX),
 
 	_T(Z24X8_UNORM,       X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),

From f54c89f13e6c4759854d0b40b2b9bbac04dde5be Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 20 Nov 2015 21:01:28 -0500
Subject: [PATCH 274/335] freedreno/a4xx: set fetchsize in mem2gmem texture
 restore

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 8d3112fde3c..2d41cfe95b3 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -286,7 +286,8 @@ fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs,
 							PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
 			OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) |
 					A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height));
-			OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp));
+			OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) |
+					A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(format)));
 			OUT_RING(ring, 0x00000000);
 			OUT_RELOC(ring, rsc->bo, offset, 0, 0);
 			OUT_RING(ring, 0x00000000);

From 801b55c2eef53597972d84774faab76cb906164f Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 20 Nov 2015 22:55:28 -0500
Subject: [PATCH 275/335] freedreno/a4xx: enable ARB_base_instance support

We already pass in start_instance in fd4_draw. Expose the extension.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/freedreno_screen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index adb0982132d..7bffc8f68c2 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -163,7 +163,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_TEXTURE_MULTISAMPLE:
 	case PIPE_CAP_TEXTURE_BARRIER:
 	case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
-	case PIPE_CAP_START_INSTANCE:
 	case PIPE_CAP_COMPUTE:
 		return 0;
 
@@ -196,6 +195,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 
 	case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
 	case PIPE_CAP_CUBE_MAP_ARRAY:
+	case PIPE_CAP_START_INSTANCE:
 		return is_a4xx(screen);
 
 	case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:

From 4c170d9e1d5b4e3e642b0241fd1e33948e60df9b Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 21 Nov 2015 00:02:36 -0500
Subject: [PATCH 276/335] freedreno/a4xx: fix independent blend

This fixes the ext_draw_buffers2 and arb_draw_buffers_blend tests.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h  | 2 +-
 src/gallium/drivers/freedreno/a4xx/fd4_blend.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index e05b7ee0f85..bd9b0a46bc1 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -628,7 +628,7 @@ static inline uint32_t A4XX_RB_FS_OUTPUT_ENABLE_BLEND(uint32_t val)
 {
 	return ((val) << A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT) & A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK;
 }
-#define A4XX_RB_FS_OUTPUT_FAST_CLEAR				0x00000100
+#define A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND			0x00000100
 #define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK			0xffff0000
 #define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT			16
 static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val)
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
index 98a96c131c5..f19702280e0 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
@@ -137,7 +137,8 @@ fd4_blend_state_create(struct pipe_context *pctx,
 			so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
 	}
 
-	so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend);
+	so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend) |
+		COND(cso->independent_blend_enable, A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND);
 
 	return so;
 }

From 22aeb0c5684cec11fd8cb4a159b10edbcfe8d6ec Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 21 Nov 2015 01:27:01 -0500
Subject: [PATCH 277/335] freedreno/a4xx: disable blending and alphatest for
 integer rt0

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 2d41cfe95b3..60069aeedc5 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -498,11 +498,16 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches);
 	}
 
-	if (dirty & FD_DIRTY_ZSA) {
+	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
 		struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
+		struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+		uint32_t rb_alpha_control = zsa->rb_alpha_control;
+
+		if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
+			rb_alpha_control &= ~A4XX_RB_ALPHA_CONTROL_ALPHA_TEST;
 
 		OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
-		OUT_RING(ring, zsa->rb_alpha_control);
+		OUT_RING(ring, rb_alpha_control);
 
 		OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
 		OUT_RING(ring, zsa->rb_stencil_control);
@@ -629,10 +634,16 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
 			enum pipe_format format = pipe_surface_format(
 					ctx->framebuffer.cbufs[i]);
+			bool is_int = util_format_is_pure_integer(format);
 			bool has_alpha = util_format_has_alpha(format);
 			uint32_t control = blend->rb_mrt[i].control;
 			uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha;
 
+			if (is_int) {
+				control &= A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+				control |= A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
+			}
+
 			if (has_alpha) {
 				blend_control |= blend->rb_mrt[i].blend_control_rgb;
 			} else {

From 02afbd247620bd51a5b1661ced9b01a865136484 Mon Sep 17 00:00:00 2001
From: Jose Fonseca <jfonseca@vmware.com>
Date: Sat, 21 Nov 2015 21:19:57 +0000
Subject: [PATCH 278/335] scons: Conditionally use DRM module on pipe-loader.

Fixes non Linux builds.

Trivial.
---
 src/gallium/auxiliary/pipe-loader/SConscript | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/pipe-loader/SConscript b/src/gallium/auxiliary/pipe-loader/SConscript
index 393b6021bee..c611fb892f8 100644
--- a/src/gallium/auxiliary/pipe-loader/SConscript
+++ b/src/gallium/auxiliary/pipe-loader/SConscript
@@ -17,12 +17,11 @@ env.Append(CPPDEFINES = [
 
 source = env.ParseSourceList('Makefile.sources', 'COMMON_SOURCES')
 
-#if HAVE_LIBDRM
-source += env.ParseSourceList('Makefile.sources', 'DRM_SOURCES')
+if env['HAVE_DRM']:
+    source += env.ParseSourceList('Makefile.sources', 'DRM_SOURCES')
 
-env.PkgUseModules('DRM')
-env.Append(LIBS = [libloader])
-#endif
+    env.PkgUseModules('DRM')
+    env.Append(LIBS = [libloader])
 
 pipe_loader = env.ConvenienceLibrary(
     target = 'pipe_loader',

From 4befd82a649e926e64bc2c17cf362a84d5be42e6 Mon Sep 17 00:00:00 2001
From: Jose Fonseca <jfonseca@vmware.com>
Date: Sat, 21 Nov 2015 23:03:20 +0000
Subject: [PATCH 279/335] pipe-loader: Fix PATH_MAX define on MSVC.

---
 src/gallium/auxiliary/pipe-loader/pipe_loader.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.c b/src/gallium/auxiliary/pipe-loader/pipe_loader.c
index 40df2167797..aef996c4617 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.c
@@ -32,6 +32,11 @@
 #include "util/u_string.h"
 #include "util/u_dl.h"
 
+#ifdef _MSC_VER
+#include <stdlib.h>
+#define PATH_MAX _MAX_PATH
+#endif
+
 #define MODULE_PREFIX "pipe_"
 
 static int (*backends[])(struct pipe_loader_device **, int) = {

From 81544f231ad6eba1c7eb8b89273c59eb53a25879 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 20 Nov 2015 18:52:58 -0800
Subject: [PATCH 280/335] vc4: Fix point size lookup.

I think I may have regressed this in the NIR conversion.  TGSI-to-NIR is
putting the PSIZ in the .x channel, not .w, so we were grabbing some
garbage for point size, which ended up meaning just not drawing points.

Fixes glean pointAtten and pointsprite.
---
 src/gallium/drivers/vc4/vc4_program.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 52317bd02af..197577b6c20 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1171,7 +1171,7 @@ emit_point_size_write(struct vc4_compile *c)
         struct qreg point_size;
 
         if (c->output_point_size_index != -1)
-                point_size = c->outputs[c->output_point_size_index + 3];
+                point_size = c->outputs[c->output_point_size_index];
         else
                 point_size = qir_uniform_f(c, 1.0);
 

From 4cff16bc3a84569da05e672c8226931678aa62c0 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 20 Nov 2015 20:42:12 -0800
Subject: [PATCH 281/335] vc4: Use nir_channel() to simplify all of our
 nir_swizzle() cases.

---
 src/gallium/drivers/vc4/vc4_nir_lower_blend.c | 4 ++--
 src/gallium/drivers/vc4/vc4_nir_lower_io.c    | 7 +++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index 373c9e12d11..0672a92226f 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -509,8 +509,8 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
         nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
         nir_ssa_def *src_color[4], *unpacked_dst_color[4];
         for (unsigned i = 0; i < 4; i++) {
-                src_color[i] = nir_swizzle(b, intr->src[0].ssa, &i, 1, false);
-                unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false);
+                src_color[i] = nir_channel(b, intr->src[0].ssa, i);
+                unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
         }
 
         vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index 7ea263afb68..1afe52a63f4 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -84,7 +84,7 @@ vc4_nir_unpack_16u(nir_builder *b, nir_ssa_def *src, unsigned chan)
 static nir_ssa_def *
 vc4_nir_unpack_8f(nir_builder *b, nir_ssa_def *src, unsigned chan)
 {
-        return nir_swizzle(b, nir_unpack_unorm_4x8(b, src), &chan, 1, false);
+        return nir_channel(b, nir_unpack_unorm_4x8(b, src), chan);
 }
 
 static nir_ssa_def *
@@ -326,9 +326,8 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
                 intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
 
                 assert(intr->src[0].is_ssa);
-                intr_comp->src[0] = nir_src_for_ssa(nir_swizzle(b,
-                                                                intr->src[0].ssa,
-                                                                &i, 1, false));
+                intr_comp->src[0] =
+                        nir_src_for_ssa(nir_channel(b, intr->src[0].ssa, i));
                 nir_builder_instr_insert(b, &intr_comp->instr);
         }
 

From 86fc97da0627cd4a81c2e190d2e157571eead111 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 13 Nov 2015 13:29:16 -0800
Subject: [PATCH 282/335] i965: Fix num_uniforms count for scalar GS.

I noticed that brw_vs.c does this.

I believe the point is that nir->num_uniforms is either counted in
scalar components (in scalar mode), or vec4 slots (in vector mode).
But we want param_count to be in scalar components regardless, so
we have to scale up in vector mode.

We don't have to scale up in scalar mode, though.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/mesa/drivers/dri/i965/brw_gs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index ad5b242a3ab..149b43ba055 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -75,7 +75,9 @@ brw_codegen_gs_prog(struct brw_context *brw,
     * every uniform is a float which gets padded to the size of a vec4.
     */
    struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
-   int param_count = gp->program.Base.nir->num_uniforms * 4;
+   int param_count = gp->program.Base.nir->num_uniforms;
+   if (!compiler->scalar_stage[MESA_SHADER_GEOMETRY])
+      param_count *= 4;
 
    prog_data.base.base.param =
       rzalloc_array(NULL, const gl_constant_value *, param_count);

From 05eed0eca71f8c35f315b3b0d7de30f3ff1878b4 Mon Sep 17 00:00:00 2001
From: Igor Gnatenko <i.gnatenko.brain@gmail.com>
Date: Sun, 22 Nov 2015 10:12:09 +0100
Subject: [PATCH 283/335] virgl: pipe_virgl_create_screen is not static

Cc: mesa-stable@lists.freedesktop.org
Fixes: 17d3a5f8579 "target-helpers: add a non-inline drm_helper.h"
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93063
Signed-off-by: Igor Gnatenko <i.gnatenko.brain@gmail.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/gallium/auxiliary/target-helpers/drm_helper.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/target-helpers/drm_helper.h b/src/gallium/auxiliary/target-helpers/drm_helper.h
index 73a80b6c1dc..332b1cba984 100644
--- a/src/gallium/auxiliary/target-helpers/drm_helper.h
+++ b/src/gallium/auxiliary/target-helpers/drm_helper.h
@@ -223,7 +223,7 @@ pipe_freedreno_create_screen(int fd)
 #include "virgl/drm/virgl_drm_public.h"
 #include "virgl/virgl_public.h"
 
-static struct pipe_screen *
+struct pipe_screen *
 pipe_virgl_create_screen(int fd)
 {
    struct virgl_winsys *vws;

From 21d43fe51ab5bcbc89ad5c61a51d3517c4243298 Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Fri, 20 Nov 2015 16:15:04 -0800
Subject: [PATCH 284/335] mesa/extensions: Enable overriding permanently
 enabled extensions

Provide the ability to prevent any permanently enabled extension
from appearing in the string returned by glGetString[i]().

Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Tested-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/extensions.c | 64 ++++++++++++++------------------------
 1 file changed, 24 insertions(+), 40 deletions(-)

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 01cfdf1a4ec..fa50cb68cca 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -40,7 +40,6 @@
 struct gl_extensions _mesa_extension_override_enables;
 struct gl_extensions _mesa_extension_override_disables;
 static char *extra_extensions = NULL;
-static char *cant_disable_extensions = NULL;
 
 
 /**
@@ -68,29 +67,30 @@ const struct mesa_extension _mesa_extension_table[] = {
 #undef EXT
 };
 
+static bool disabled_extensions[ARRAY_SIZE(_mesa_extension_table)];
 
 /**
  * Given an extension name, lookup up the corresponding member of struct
- * gl_extensions and return that member's offset (in bytes).  If the name is
- * not found in the \c _mesa_extension_table, return 0.
+ * gl_extensions and return that member's index.  If the name is
+ * not found in the \c _mesa_extension_table, return -1.
  *
  * \param name Name of extension.
- * \return Offset of member in struct gl_extensions.
+ * \return Index of member in struct gl_extensions.
  */
-static size_t
-name_to_offset(const char* name)
+static int
+name_to_index(const char* name)
 {
    unsigned i;
 
    if (name == 0)
-      return 0;
+      return -1;
 
    for (i = 0; i < ARRAY_SIZE(_mesa_extension_table); ++i) {
       if (strcmp(name, _mesa_extension_table[i].name) == 0)
-	 return _mesa_extension_table[i].offset;
+	 return i;
    }
 
-   return 0;
+   return -1;
 }
 
 /**
@@ -206,11 +206,11 @@ _mesa_enable_sw_extensions(struct gl_context *ctx)
  * \return offset of extensions withint `ext' or 0 if extension is not known
  */
 static size_t
-set_extension(struct gl_extensions *ext, const char *name, GLboolean state)
+set_extension(struct gl_extensions *ext, int i, GLboolean state)
 {
    size_t offset;
 
-   offset = name_to_offset(name);
+   offset = i < 0 ? 0 : _mesa_extension_table[i].offset;
    if (offset != 0 && (offset != o(dummy_true) || state != GL_FALSE)) {
       ((GLboolean *) ext)[offset] = state;
    }
@@ -240,12 +240,6 @@ get_extension_override( struct gl_context *ctx )
 {
    override_extensions_in_context(ctx);
 
-   if (cant_disable_extensions != NULL) {
-      _mesa_problem(ctx,
-                    "Trying to disable permanently enabled extensions: %s",
-	            cant_disable_extensions);
-   }
-
    if (extra_extensions == NULL) {
       return calloc(1, sizeof(char));
    } else {
@@ -257,7 +251,7 @@ get_extension_override( struct gl_context *ctx )
 
 
 /**
- * \brief Free extra_extensions and cant_disable_extensions strings
+ * \brief Free extra_extensions string
  *
  * These strings are allocated early during the first context creation by
  * _mesa_one_time_init_extension_overrides.
@@ -266,7 +260,6 @@ static void
 free_unknown_extensions_strings(void)
 {
    free(extra_extensions);
-   free(cant_disable_extensions);
 }
 
 
@@ -295,22 +288,20 @@ _mesa_one_time_init_extension_overrides(void)
 
    /* extra_exts: List of unrecognized extensions. */
    extra_extensions = calloc(ALIGN(strlen(env_const) + 2, 4), sizeof(char));
-   cant_disable_extensions = calloc(ALIGN(strlen(env_const) + 2, 4), sizeof(char));
 
    /* Copy env_const because strtok() is destructive. */
    env = strdup(env_const);
 
    if (env == NULL ||
-       extra_extensions == NULL ||
-       cant_disable_extensions == NULL) {
+       extra_extensions == NULL) {
       free(env);
       free(extra_extensions);
-      free(cant_disable_extensions);
       return;
    }
 
    for (ext = strtok(env, " "); ext != NULL; ext = strtok(NULL, " ")) {
       int enable;
+      int i;
       bool recognized;
       switch (ext[0]) {
       case '+':
@@ -326,7 +317,8 @@ _mesa_one_time_init_extension_overrides(void)
          break;
       }
 
-      offset = set_extension(&_mesa_extension_override_enables, ext, enable);
+      i = name_to_index(ext);
+      offset = set_extension(&_mesa_extension_override_enables, i, enable);
       if (offset != 0 && (offset != o(dummy_true) || enable != GL_FALSE)) {
          ((GLboolean *) &_mesa_extension_override_disables)[offset] = !enable;
          recognized = true;
@@ -334,14 +326,12 @@ _mesa_one_time_init_extension_overrides(void)
          recognized = false;
       }
 
-      if (!recognized) {
-         if (enable) {
-            strcat(extra_extensions, ext);
-            strcat(extra_extensions, " ");
-         } else if (offset == o(dummy_true)) {
-            strcat(cant_disable_extensions, ext);
-            strcat(cant_disable_extensions, " ");
-         }
+      if (i >= 0)
+         disabled_extensions[i] = !enable;
+
+      if (!recognized && enable) {
+         strcat(extra_extensions, ext);
+         strcat(extra_extensions, " ");
       }
    }
 
@@ -355,13 +345,6 @@ _mesa_one_time_init_extension_overrides(void)
    } else if (extra_extensions[len - 1] == ' ') {
       extra_extensions[len - 1] = '\0';
    }
-   len = strlen(cant_disable_extensions);
-   if (len == 0) {
-      free(cant_disable_extensions);
-      cant_disable_extensions = NULL;
-   } else if (cant_disable_extensions[len - 1] == ' ') {
-      cant_disable_extensions[len - 1] = '\0';
-   }
 }
 
 
@@ -402,7 +385,8 @@ _mesa_extension_supported(const struct gl_context *ctx, extension_index i)
    const bool *base = (bool *) &ctx->Extensions;
    const struct mesa_extension *ext = _mesa_extension_table + i;
 
-   return (ctx->Version >= ext->version[ctx->API]) && base[ext->offset];
+   return !disabled_extensions[i] &&
+          (ctx->Version >= ext->version[ctx->API]) && base[ext->offset];
 }
 
 /**

From d1212abf505a468c9947a66dbf2d59acb4616e42 Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Wed, 28 Oct 2015 14:50:58 -0700
Subject: [PATCH 285/335] mesa/teximage: Fix S3TC regression due to ASTC
 interaction

A prior, literal reading of the ASTC spec led to the prohibition
of some compressed formats being used against the targets:
TEXTURE_CUBE_MAP_ARRAY and TEXTURE_3D. Since the spec does not specify
interactions with other extensions for specific compressed textures,
remove such interactions.

Fixes the following Piglit tests on Gen9:
piglit.spec.arb_direct_state_access.getcompressedtextureimage
piglit.spec.arb_get_texture_sub_image.arb_get_texture_sub_image-getcompressed
piglit.spec.arb_texture_cube_map_array.fbo-generatemipmap-cubemap array s3tc_dxt1
piglit.spec.ext_texture_compression_s3tc.getteximage-targets cube_array s3tc

v2. Don't interact with other specific compressed formats (Ian).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91927
Suggested-by: Neil Roberts <neil@linux.intel.com>
Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/mesa/main/teximage.c | 43 ++++++++++++++--------------------------
 1 file changed, 15 insertions(+), 28 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index d9453e3a281..ac7599f9fd4 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -1333,21 +1333,6 @@ _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target,
       break;
    case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY:
    case GL_TEXTURE_CUBE_MAP_ARRAY:
-      /* From section 3.8.6, page 146 of OpenGL ES 3.0 spec:
-       *
-       *    "The ETC2/EAC texture compression algorithm supports only
-       *     two-dimensional images. If internalformat is an ETC2/EAC format,
-       *     glCompressedTexImage3D will generate an INVALID_OPERATION error if
-       *     target is not TEXTURE_2D_ARRAY."
-       *
-       * This should also be applicable for glTexStorage3D(). Other available
-       * targets for these functions are: TEXTURE_3D and TEXTURE_CUBE_MAP_ARRAY.
-       */
-      if (layout == MESA_FORMAT_LAYOUT_ETC2 && _mesa_is_gles3(ctx))
-            return write_error(error, GL_INVALID_OPERATION);
-
-      target_can_be_compresed = ctx->Extensions.ARB_texture_cube_map_array;
-
       /* From the KHR_texture_compression_astc_hdr spec:
        *
        *     Add a second new column "3D Tex." which is empty for all non-ASTC
@@ -1368,16 +1353,24 @@ _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target,
        *      8.19 is *not* checked'
        *
        * The instances of <internalformat> above should say <target>.
+       *
+       * ETC2/EAC formats are the only alternative in GLES and thus such errors
+       * have already been handled by normal ETC2/EAC behavior.
        */
 
-      /* Throw an INVALID_OPERATION error if the target is
-       * TEXTURE_CUBE_MAP_ARRAY and the format is not ASTC.
+      /* From section 3.8.6, page 146 of OpenGL ES 3.0 spec:
+       *
+       *    "The ETC2/EAC texture compression algorithm supports only
+       *     two-dimensional images. If internalformat is an ETC2/EAC format,
+       *     glCompressedTexImage3D will generate an INVALID_OPERATION error if
+       *     target is not TEXTURE_2D_ARRAY."
+       *
+       * This should also be applicable for glTexStorage3D(). Other available
+       * targets for these functions are: TEXTURE_3D and TEXTURE_CUBE_MAP_ARRAY.
        */
-      if (target_can_be_compresed &&
-          ctx->Extensions.KHR_texture_compression_astc_ldr &&
-          layout != MESA_FORMAT_LAYOUT_ASTC)
-         return write_error(error, GL_INVALID_OPERATION);
-
+      if (layout == MESA_FORMAT_LAYOUT_ETC2 && _mesa_is_gles3(ctx))
+            return write_error(error, GL_INVALID_OPERATION);
+      target_can_be_compresed = ctx->Extensions.ARB_texture_cube_map_array;
       break;
    case GL_TEXTURE_3D:
       switch (layout) {
@@ -1401,12 +1394,6 @@ _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target,
             return write_error(error, GL_INVALID_OPERATION);
          break;
       default:
-         /* Throw an INVALID_OPERATION error if the target is TEXTURE_3D and
-          * the format is not ASTC.
-          * See comment in switch case GL_TEXTURE_CUBE_MAP_ARRAY for more info.
-          */
-         if (ctx->Extensions.KHR_texture_compression_astc_ldr)
-            return write_error(error, GL_INVALID_OPERATION);
          break;
       }
    default:

From a39eac80fd491abb990b0b77dd5e4adc5b9c53e1 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sat, 21 Nov 2015 13:07:42 -0800
Subject: [PATCH 286/335] vc4: Just put USE_VC4_SIMULATOR in DEFINES.

In the pipe-loader reworks, it was missed in one of the new directories it
was used.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 configure.ac                         | 4 +++-
 src/gallium/drivers/vc4/Automake.inc | 4 ----
 src/gallium/drivers/vc4/Makefile.am  | 1 -
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index 91fdfe5f9cb..120c93e7e73 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2173,7 +2173,9 @@ if test -n "$with_gallium_drivers"; then
             gallium_require_drm_loader
 
             PKG_CHECK_MODULES([SIMPENROSE], [simpenrose],
-                              [USE_VC4_SIMULATOR=yes], [USE_VC4_SIMULATOR=no])
+                              [USE_VC4_SIMULATOR=yes;
+                               DEFINES="$DEFINES -DUSE_VC4_SIMULATOR"],
+                              [USE_VC4_SIMULATOR=no])
             ;;
         xvirgl)
             HAVE_GALLIUM_VIRGL=yes
diff --git a/src/gallium/drivers/vc4/Automake.inc b/src/gallium/drivers/vc4/Automake.inc
index 6fa3e190cac..5664c2ab14e 100644
--- a/src/gallium/drivers/vc4/Automake.inc
+++ b/src/gallium/drivers/vc4/Automake.inc
@@ -6,8 +6,4 @@ TARGET_LIB_DEPS += \
 	$(top_builddir)/src/gallium/winsys/vc4/drm/libvc4drm.la \
 	$(top_builddir)/src/gallium/drivers/vc4/libvc4.la
 
-if USE_VC4_SIMULATOR
-TARGET_CPPFLAGS += -DUSE_VC4_SIMULATOR
-endif
-
 endif
diff --git a/src/gallium/drivers/vc4/Makefile.am b/src/gallium/drivers/vc4/Makefile.am
index f4a57ba3404..a3bf72fc72a 100644
--- a/src/gallium/drivers/vc4/Makefile.am
+++ b/src/gallium/drivers/vc4/Makefile.am
@@ -23,7 +23,6 @@ include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
 if USE_VC4_SIMULATOR
-SIM_CFLAGS = -DUSE_VC4_SIMULATOR=1
 SIM_LDFLAGS = -lsimpenrose
 endif
 

From 1b62a4e885267c374dbbe5d5bb2c36515eee6a95 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sat, 21 Nov 2015 12:52:48 -0800
Subject: [PATCH 287/335] vc4: Take precedence over ilo when in simulator mode.

They're exclusive at build time, but the ilo entry is always present, so
we'd try to use it and fail out.

v2: Add comment in the code, from Emil.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 .../auxiliary/pipe-loader/pipe_loader_drm.c   | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index b5dfc56f49d..994a284385c 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -94,6 +94,18 @@ static const struct drm_driver_descriptor driver_descriptors[] = {
         .create_screen = pipe_i915_create_screen,
         .configuration = configuration_query,
     },
+#ifdef USE_VC4_SIMULATOR
+    /* VC4 simulator and ILO (i965) are mutually exclusive (error at
+     * configure). As the latter is unconditionally added, keep this one above
+     * it.
+     */
+    {
+        .name = "i965",
+        .driver_name = "vc4",
+        .create_screen = pipe_vc4_create_screen,
+        .configuration = configuration_query,
+    },
+#endif
     {
         .name = "i965",
         .driver_name = "i915",
@@ -154,14 +166,6 @@ static const struct drm_driver_descriptor driver_descriptors[] = {
         .create_screen = pipe_vc4_create_screen,
         .configuration = configuration_query,
     },
-#ifdef USE_VC4_SIMULATOR
-    {
-        .name = "i965",
-        .driver_name = "vc4",
-        .create_screen = pipe_vc4_create_screen,
-        .configuration = configuration_query,
-    },
-#endif
 };
 #endif
 

From 079f713754a9e5d7802b655d54320bd37f24fbfa Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 22 Nov 2015 20:58:56 -0500
Subject: [PATCH 288/335] nouveau: use the buffer usage to determine placement
 when no binding

With ARB_direct_state_access, buffers can be created without any binding
hints at all. We still need to allocate these buffers to VRAM or GART,
as we don't have logic down the line to place them into GPU-mappable
space. Ideally we'd be able to shift these things around based on usage.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92438
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "11.0 11.1" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/nouveau_buffer.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 68e69beb08f..1695553d793 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -657,8 +657,8 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
    if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
                              PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
       buffer->domain = NOUVEAU_BO_GART;
-   } else if (buffer->base.bind &
-              (screen->vidmem_bindings & screen->sysmem_bindings)) {
+   } else if (buffer->base.bind == 0 || (buffer->base.bind &
+              (screen->vidmem_bindings & screen->sysmem_bindings))) {
       switch (buffer->base.usage) {
       case PIPE_USAGE_DEFAULT:
       case PIPE_USAGE_IMMUTABLE:
@@ -685,6 +685,10 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
       if (buffer->base.bind & screen->sysmem_bindings)
          buffer->domain = NOUVEAU_BO_GART;
    }
+   /* There can be very special situations where we want non-gpu-mapped
+    * buffers, but never through this interface.
+    */
+   assert(buffer->domain);
    ret = nouveau_buffer_allocate(screen, buffer, buffer->domain);
 
    if (ret == false)

From ad5f6b03e793b9390e3b9f3eca68bd43f9d809eb Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 22 Nov 2015 21:08:16 -0500
Subject: [PATCH 289/335] nv50,nvc0: properly handle buffer storage
 invalidation on dsa buffer

In case that the buffer has no bind at all, assume it can be a regular
buffer. This can happen on buffers created through the ARB_dsa
interfaces.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "11.0 11.1" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/nv50/nv50_context.c | 15 ++++++++-------
 src/gallium/drivers/nouveau/nvc0/nvc0_context.c | 17 +++++++++--------
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index f645a4d4e6b..4874b77b1e1 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -168,9 +168,10 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
                                  int ref)
 {
    struct nv50_context *nv50 = nv50_context(&ctx->pipe);
+   unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
    unsigned s, i;
 
-   if (res->bind & PIPE_BIND_RENDER_TARGET) {
+   if (bind & PIPE_BIND_RENDER_TARGET) {
       assert(nv50->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS);
       for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) {
          if (nv50->framebuffer.cbufs[i] &&
@@ -182,7 +183,7 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
          }
       }
    }
-   if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+   if (bind & PIPE_BIND_DEPTH_STENCIL) {
       if (nv50->framebuffer.zsbuf &&
           nv50->framebuffer.zsbuf->texture == res) {
          nv50->dirty |= NV50_NEW_FRAMEBUFFER;
@@ -192,11 +193,11 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
       }
    }
 
-   if (res->bind & (PIPE_BIND_VERTEX_BUFFER |
-                    PIPE_BIND_INDEX_BUFFER |
-                    PIPE_BIND_CONSTANT_BUFFER |
-                    PIPE_BIND_STREAM_OUTPUT |
-                    PIPE_BIND_SAMPLER_VIEW)) {
+   if (bind & (PIPE_BIND_VERTEX_BUFFER |
+               PIPE_BIND_INDEX_BUFFER |
+               PIPE_BIND_CONSTANT_BUFFER |
+               PIPE_BIND_STREAM_OUTPUT |
+               PIPE_BIND_SAMPLER_VIEW)) {
 
       assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
       for (i = 0; i < nv50->num_vtxbufs; ++i) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 82ed5a1864e..162661ff2a7 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -180,9 +180,10 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
                                  int ref)
 {
    struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe);
+   unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
    unsigned s, i;
 
-   if (res->bind & PIPE_BIND_RENDER_TARGET) {
+   if (bind & PIPE_BIND_RENDER_TARGET) {
       for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
          if (nvc0->framebuffer.cbufs[i] &&
              nvc0->framebuffer.cbufs[i]->texture == res) {
@@ -193,7 +194,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
          }
       }
    }
-   if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+   if (bind & PIPE_BIND_DEPTH_STENCIL) {
       if (nvc0->framebuffer.zsbuf &&
           nvc0->framebuffer.zsbuf->texture == res) {
          nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
@@ -203,12 +204,12 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
       }
    }
 
-   if (res->bind & (PIPE_BIND_VERTEX_BUFFER |
-                    PIPE_BIND_INDEX_BUFFER |
-                    PIPE_BIND_CONSTANT_BUFFER |
-                    PIPE_BIND_STREAM_OUTPUT |
-                    PIPE_BIND_COMMAND_ARGS_BUFFER |
-                    PIPE_BIND_SAMPLER_VIEW)) {
+   if (bind & (PIPE_BIND_VERTEX_BUFFER |
+               PIPE_BIND_INDEX_BUFFER |
+               PIPE_BIND_CONSTANT_BUFFER |
+               PIPE_BIND_STREAM_OUTPUT |
+               PIPE_BIND_COMMAND_ARGS_BUFFER |
+               PIPE_BIND_SAMPLER_VIEW)) {
       for (i = 0; i < nvc0->num_vtxbufs; ++i) {
          if (nvc0->vtxbuf[i].buffer == res) {
             nvc0->dirty |= NVC0_NEW_ARRAYS;

From 6463d36394bf95f73cfe3ba6bdf900da431e4e55 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Mon, 23 Nov 2015 10:07:30 +1100
Subject: [PATCH 290/335] glsl: fix max binding validation for uniform blocks

Regression as of 64710db66461e

We can't use the type returned by get_interface_type() as
the interface type has arrays removed.

Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index df6dd9b4759..81cde73798a 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -6962,8 +6962,8 @@ ast_interface_block::hir(exec_list *instructions,
          delete var;
       } else {
          if (this->layout.flags.q.explicit_binding) {
-            apply_explicit_binding(state, &loc, var,
-                                   var->get_interface_type(), &this->layout);
+            apply_explicit_binding(state, &loc, var, var->type,
+                                   &this->layout);
          }
 
          var->data.stream = qual_stream;

From 4deb118d06e96731f3481daa72c201d7258bfbbb Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 18 Apr 2015 15:00:45 -0400
Subject: [PATCH 291/335] nv50/ir: fix (un)spilling of 3-wide results

There is no 96-bit load/store operations, so we have to split it up
into a 32-bit parts, with a split/merge around it.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90348
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "11.0 11.1" <mesa-stable@lists.freedesktop.org>
---
 .../drivers/nouveau/codegen/nv50_ir_ra.cpp    | 46 +++++++++++++++++--
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 7859c8e79bd..41d2cc9167c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1573,10 +1573,28 @@ SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval)
 
    Instruction *st;
    if (slot->reg.file == FILE_MEMORY_LOCAL) {
-      st = new_Instruction(func, OP_STORE, ty);
-      st->setSrc(0, slot);
-      st->setSrc(1, lval);
       lval->noSpill = 1;
+      if (ty != TYPE_B96) {
+         st = new_Instruction(func, OP_STORE, ty);
+         st->setSrc(0, slot);
+         st->setSrc(1, lval);
+      } else {
+         st = new_Instruction(func, OP_SPLIT, ty);
+         st->setSrc(0, lval);
+         for (int d = 0; d < lval->reg.size / 4; ++d)
+            st->setDef(d, new_LValue(func, FILE_GPR));
+
+         for (int d = lval->reg.size / 4 - 1; d >= 0; --d) {
+            Value *tmp = cloneShallow(func, slot);
+            tmp->reg.size = 4;
+            tmp->reg.data.offset += 4 * d;
+
+            Instruction *s = new_Instruction(func, OP_STORE, TYPE_U32);
+            s->setSrc(0, tmp);
+            s->setSrc(1, st->getDef(d));
+            defi->bb->insertAfter(defi, s);
+         }
+      }
    } else {
       st = new_Instruction(func, OP_CVT, ty);
       st->setDef(0, slot);
@@ -1596,7 +1614,27 @@ SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot)
    Instruction *ld;
    if (slot->reg.file == FILE_MEMORY_LOCAL) {
       lval->noSpill = 1;
-      ld = new_Instruction(func, OP_LOAD, ty);
+      if (ty != TYPE_B96) {
+         ld = new_Instruction(func, OP_LOAD, ty);
+      } else {
+         ld = new_Instruction(func, OP_MERGE, ty);
+         for (int d = 0; d < lval->reg.size / 4; ++d) {
+            Value *tmp = cloneShallow(func, slot);
+            LValue *val;
+            tmp->reg.size = 4;
+            tmp->reg.data.offset += 4 * d;
+
+            Instruction *l = new_Instruction(func, OP_LOAD, TYPE_U32);
+            l->setDef(0, (val = new_LValue(func, FILE_GPR)));
+            l->setSrc(0, tmp);
+            usei->bb->insertBefore(usei, l);
+            ld->setSrc(d, val);
+            val->noSpill = 1;
+         }
+         ld->setDef(0, lval);
+         usei->bb->insertBefore(usei, ld);
+         return lval;
+      }
    } else {
       ld = new_Instruction(func, OP_CVT, ty);
    }

From 718b9f52dd9ba780decf5bb59f5100cf590393a0 Mon Sep 17 00:00:00 2001
From: Connor Abbott <connor.w.abbott@intel.com>
Date: Wed, 5 Aug 2015 09:41:18 -0700
Subject: [PATCH 292/335] i965/fs: print non-1 strides when dumping
 instructions

v2:
  - Simplify code (Iago)

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e9c990d4308..7376f951fa8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -4621,6 +4621,8 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
    case IMM:
       unreachable("not reached");
    }
+   if (inst->dst.stride != 1)
+      fprintf(file, "<%u>", inst->dst.stride);
    fprintf(file, ":%s, ", brw_reg_type_letters(inst->dst.type));
 
    for (int i = 0; i < inst->sources; i++) {
@@ -4708,6 +4710,16 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
          fprintf(file, "|");
 
       if (inst->src[i].file != IMM) {
+         unsigned stride;
+         if (inst->src[i].file == ARF || inst->src[i].file == FIXED_GRF) {
+            unsigned hstride = inst->src[i].hstride;
+            stride = (hstride == 0 ? 0 : (1 << (hstride - 1)));
+         } else {
+            stride = inst->src[i].stride;
+         }
+         if (stride != 1)
+            fprintf(file, "<%u>", stride);
+
          fprintf(file, ":%s", brw_reg_type_letters(inst->src[i].type));
       }
 

From b1a83b5d1b677faf650a41cd2c152b4d1cd18f84 Mon Sep 17 00:00:00 2001
From: Connor Abbott <connor.w.abbott@intel.com>
Date: Mon, 3 Aug 2015 14:38:12 -0700
Subject: [PATCH 293/335] i965: fix 64-bit immediates in brw_inst(_set)_bits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we tried to get/set something that was exactly 64 bits, we would
try to do (1 << 64) - 1 to calculate the mask which doesn't give us all
1's like we want.

v2 (Iago)
 - Replace ~0 by ~0ull
 - Removed unnecessary parenthesis

v3 (Kristian)
 - Avoid the conditional

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_inst.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_inst.h b/src/mesa/drivers/dri/i965/brw_inst.h
index 4ed95c473cd..b2afe17f950 100644
--- a/src/mesa/drivers/dri/i965/brw_inst.h
+++ b/src/mesa/drivers/dri/i965/brw_inst.h
@@ -694,7 +694,7 @@ brw_inst_bits(const brw_inst *inst, unsigned high, unsigned low)
    high %= 64;
    low %= 64;
 
-   const uint64_t mask = (1ull << (high - low + 1)) - 1;
+   const uint64_t mask = (~0ul >> (64 - (high - low + 1)));
 
    return (inst->data[word] >> low) & mask;
 }
@@ -713,7 +713,7 @@ brw_inst_set_bits(brw_inst *inst, unsigned high, unsigned low, uint64_t value)
    high %= 64;
    low %= 64;
 
-   const uint64_t mask = ((1ull << (high - low + 1)) - 1) << low;
+   const uint64_t mask = (~0ul >> (64 - (high - low + 1))) << low;
 
    /* Make sure the supplied value actually fits in the given bitfield. */
    assert((value & (mask >> low)) == value);

From 70171a9c89ebd885f30bd432452ee35099b6874a Mon Sep 17 00:00:00 2001
From: Connor Abbott <connor.w.abbott@intel.com>
Date: Tue, 11 Aug 2015 14:25:36 -0700
Subject: [PATCH 294/335] i965/fs: respect force_sechalf/force_writemask_all in
 CSE

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 3c40fcd4fd2..3b65a382dc8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -210,6 +210,8 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
       copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
    } else {
       copy = bld.MOV(inst->dst, src);
+      copy->force_sechalf = inst->force_sechalf;
+      copy->force_writemask_all = inst->force_writemask_all;
       copy->src[0].negate = negate;
    }
    assert(copy->regs_written == written);

From 95ac3b1daeaa7d40d49fa2e0bdef46346c2996d5 Mon Sep 17 00:00:00 2001
From: Connor Abbott <connor.w.abbott@intel.com>
Date: Tue, 11 Aug 2015 16:16:42 -0700
Subject: [PATCH 295/335] i965/fs: don't propagate cmod when the exec sizes
 differ

This can happen when the source of the compare was split by the SIMD
lowering pass. Potentially, we could allow the case where the exec size
of scan_inst is larger, and scan_inst has the right quarter selected,
but doing that seems a little more risky.

v2: Merge the bail condition into the the previous if/break block (Matt)

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
index 8fdc959f992..7c01f1e3d62 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
@@ -90,7 +90,8 @@ opt_cmod_propagation_local(bblock_t *block)
       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
          if (scan_inst->overwrites_reg(inst->src[0])) {
             if (scan_inst->is_partial_write() ||
-                scan_inst->dst.reg_offset != inst->src[0].reg_offset)
+                scan_inst->dst.reg_offset != inst->src[0].reg_offset ||
+                scan_inst->exec_size != inst->exec_size)
                break;
 
             /* CMP's result is the same regardless of dest type. */

From d982922b184930a4ceed1d97b772cce5c371865d Mon Sep 17 00:00:00 2001
From: Connor Abbott <connor.w.abbott@intel.com>
Date: Fri, 14 Aug 2015 12:00:13 -0700
Subject: [PATCH 296/335] i965/fs: add stride restrictions for copy propagation

There are various restrictions on what the hstride can be that depend on
the Gen, and now that we're using hstride == 2 for packing/unpacking
doubles, we're going to run into these restrictions a lot more often.
Pull them out into a separate function, and move the one restriction we
checked previously into it.

Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 .../dri/i965/brw_fs_copy_propagation.cpp      | 56 ++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 426ea57d8f9..62ae9abede7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -275,6 +275,59 @@ is_logic_op(enum opcode opcode)
            opcode == BRW_OPCODE_NOT);
 }
 
+static bool
+can_take_stride(fs_inst *inst, unsigned arg, unsigned stride,
+                const brw_device_info *devinfo)
+{
+   if (stride > 4)
+      return false;
+
+   /* 3-source instructions can only be Align16, which restricts what strides
+    * they can take. They can only take a stride of 1 (the usual case), or 0
+    * with a special "repctrl" bit. But the repctrl bit doesn't work for
+    * 64-bit datatypes, so if the source type is 64-bit then only a stride of
+    * 1 is allowed. From the Broadwell PRM, Volume 7 "3D Media GPGPU", page
+    * 944:
+    *
+    *    This is applicable to 32b datatypes and 16b datatype. 64b datatypes
+    *    cannot use the replicate control.
+    */
+   if (inst->is_3src()) {
+      if (type_sz(inst->src[arg].type) > 4)
+         return stride == 1;
+      else
+         return stride == 1 || stride == 0;
+   }
+
+   /* From the Broadwell PRM, Volume 2a "Command Reference - Instructions",
+    * page 391 ("Extended Math Function"):
+    *
+    *     The following restrictions apply for align1 mode: Scalar source is
+    *     supported. Source and destination horizontal stride must be the
+    *     same.
+    *
+    * From the Haswell PRM Volume 2b "Command Reference - Instructions", page
+    * 134 ("Extended Math Function"):
+    *
+    *    Scalar source is supported. Source and destination horizontal stride
+    *    must be 1.
+    *
+    * and similar language exists for IVB and SNB. Pre-SNB, math instructions
+    * are sends, so the sources are moved to MRF's and there are no
+    * restrictions.
+    */
+   if (inst->is_math()) {
+      if (devinfo->gen == 6 || devinfo->gen == 7) {
+         assert(inst->dst.stride == 1);
+         return stride == 1 || stride == 0;
+      } else if (devinfo->gen >= 8) {
+         return stride == inst->dst.stride || stride == 0;
+      }
+   }
+
+   return true;
+}
+
 bool
 fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
 {
@@ -326,7 +379,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
    /* Bail if the result of composing both strides would exceed the
     * hardware limit.
     */
-   if (entry->src.stride * inst->src[arg].stride > 4)
+   if (!can_take_stride(inst, arg, entry->src.stride * inst->src[arg].stride,
+                        devinfo))
       return false;
 
    /* Bail if the instruction type is larger than the execution type of the

From fb93dd7aa8f2cac520bbbd3fc2af807bd2573480 Mon Sep 17 00:00:00 2001
From: Connor Abbott <connor.w.abbott@intel.com>
Date: Mon, 3 Aug 2015 15:04:13 -0700
Subject: [PATCH 297/335] nir/builder: only read meaningful channels in
 nir_swizzle()

This way the caller doesn't have to initialize all 4 channels when they
aren't using them.

v2: Fix signed/unsigned comparison warning (Iago)

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/glsl/nir/nir_builder.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index d09f929405b..b909f483579 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -242,7 +242,7 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
 {
    nir_alu_src alu_src = { NIR_SRC_INIT };
    alu_src.src = nir_src_for_ssa(src);
-   for (int i = 0; i < 4; i++)
+   for (unsigned i = 0; i < num_components; i++)
       alu_src.swizzle[i] = swiz[i];
 
    return use_fmov ? nir_fmov_alu(build, alu_src, num_components) :

From f58813842bcece3498f55ec5d582466ccff92a5e Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Fri, 15 May 2015 09:14:47 -0700
Subject: [PATCH 298/335] nir: s/nir_type_unsigned/nir_type_uint

v2: do the same in tgsi_to_nir (Samuel)

v3: added missing cases after rebase (Iago)

v4: Add a blank space after '#' in one of the comments (Matt)

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c       |  2 +-
 .../drivers/freedreno/ir3/ir3_compiler_nir.c  |  2 +-
 src/glsl/nir/glsl_to_nir.cpp                  |  2 +-
 src/glsl/nir/nir.h                            |  2 +-
 src/glsl/nir/nir_constant_expressions.py      |  2 +-
 src/glsl/nir/nir_opcodes.py                   | 78 +++++++++----------
 src/glsl/nir/nir_search.c                     |  4 +-
 src/mesa/drivers/dri/i965/brw_nir.c           |  4 +-
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp    |  2 +-
 9 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 1da00b2a650..ce1da102777 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -295,7 +295,7 @@ ttn_emit_declaration(struct ttn_compile *c)
          type = nir_type_int;
          break;
       case TGSI_RETURN_TYPE_UINT:
-         type = nir_type_unsigned;
+         type = nir_type_uint;
          break;
       case TGSI_RETURN_TYPE_FLOAT:
       default:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 0f5c7e901ba..25e84121d0c 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1718,7 +1718,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
 	case nir_type_int:
 		type = TYPE_S32;
 		break;
-	case nir_type_unsigned:
+	case nir_type_uint:
 	case nir_type_bool:
 		type = TYPE_U32;
 		break;
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 18ef4909049..45d045cd4d6 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -1832,7 +1832,7 @@ nir_visitor::visit(ir_texture *ir)
       break;
    case GLSL_TYPE_BOOL:
    case GLSL_TYPE_UINT:
-      instr->dest_type = nir_type_unsigned;
+      instr->dest_type = nir_type_uint;
       break;
    default:
       unreachable("not reached");
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 087b4537c09..b4be145e5ec 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -634,7 +634,7 @@ typedef enum {
    nir_type_invalid = 0, /* Not a valid type */
    nir_type_float,
    nir_type_int,
-   nir_type_unsigned,
+   nir_type_uint,
    nir_type_bool
 } nir_alu_type;
 
diff --git a/src/glsl/nir/nir_constant_expressions.py b/src/glsl/nir/nir_constant_expressions.py
index 2ba8554645d..b16ef503c92 100644
--- a/src/glsl/nir/nir_constant_expressions.py
+++ b/src/glsl/nir/nir_constant_expressions.py
@@ -213,7 +213,7 @@ unpack_half_1x16(uint16_t u)
 }
 
 /* Some typed vector structures to make things like src0.y work */
-% for type in ["float", "int", "unsigned", "bool"]:
+% for type in ["float", "int", "uint", "bool"]:
 struct ${type}_vec {
    ${type} x;
    ${type} y;
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index 729f695cf9c..37d3dfc4588 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -91,7 +91,7 @@ class Opcode(object):
 tfloat = "float"
 tint = "int"
 tbool = "bool"
-tunsigned = "unsigned"
+tuint = "uint"
 
 commutative = "commutative "
 associative = "associative "
@@ -156,7 +156,7 @@ unop("fsqrt", tfloat, "sqrtf(src0)")
 unop("fexp2", tfloat, "exp2f(src0)")
 unop("flog2", tfloat, "log2f(src0)")
 unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
-unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion
+unop_convert("f2u", tfloat, tuint, "src0") # Float-to-unsigned conversion
 unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
 # Float-to-boolean conversion
 unop_convert("f2b", tfloat, tbool, "src0 != 0.0f")
@@ -165,7 +165,7 @@ unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
 # Int-to-boolean conversion
 unop_convert("i2b", tint, tbool, "src0 != 0")
 unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion
-unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion.
+unop_convert("u2f", tuint, tfloat, "src0") # Unsigned-to-float conversion.
 
 unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}")
 unop_reduce("ball", 1, tbool, tbool, "{src}", "{src0} && {src1}", "{src}")
@@ -205,13 +205,13 @@ unop("fddy_coarse", tfloat, "0.0f")
 # Floating point pack and unpack operations.
 
 def pack_2x16(fmt):
-   unop_horiz("pack_" + fmt + "_2x16", 1, tunsigned, 2, tfloat, """
+   unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """
 dst.x = (uint32_t) pack_fmt_1x16(src0.x);
 dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16;
 """.replace("fmt", fmt))
 
 def pack_4x8(fmt):
-   unop_horiz("pack_" + fmt + "_4x8", 1, tunsigned, 4, tfloat, """
+   unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """
 dst.x = (uint32_t) pack_fmt_1x8(src0.x);
 dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8;
 dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16;
@@ -219,13 +219,13 @@ dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24;
 """.replace("fmt", fmt))
 
 def unpack_2x16(fmt):
-   unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tunsigned, """
+   unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """
 dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff));
 dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16));
 """.replace("fmt", fmt))
 
 def unpack_4x8(fmt):
-   unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tunsigned, """
+   unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """
 dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff));
 dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff));
 dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff));
@@ -248,22 +248,22 @@ unpack_2x16("half")
 # Lowered floating point unpacking operations.
 
 
-unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tunsigned,
+unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint,
            "unpack_half_1x16((uint16_t)(src0.x & 0xffff))")
-unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tunsigned,
+unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint,
            "unpack_half_1x16((uint16_t)(src0.x >> 16))")
 
 
 # Bit operations, part of ARB_gpu_shader5.
 
 
-unop("bitfield_reverse", tunsigned, """
+unop("bitfield_reverse", tuint, """
 /* we're not winning any awards for speed here, but that's ok */
 dst = 0;
 for (unsigned bit = 0; bit < 32; bit++)
    dst |= ((src0 >> bit) & 1) << (31 - bit);
 """)
-unop("bit_count", tunsigned, """
+unop("bit_count", tuint, """
 dst = 0;
 for (unsigned bit = 0; bit < 32; bit++) {
    if ((src0 >> bit) & 1)
@@ -271,7 +271,7 @@ for (unsigned bit = 0; bit < 32; bit++) {
 }
 """)
 
-unop_convert("ufind_msb", tunsigned, tint, """
+unop_convert("ufind_msb", tuint, tint, """
 dst = -1;
 for (int bit = 31; bit > 0; bit--) {
    if ((src0 >> bit) & 1) {
@@ -358,25 +358,25 @@ binop("imul", tint, commutative + associative, "src0 * src1")
 binop("imul_high", tint, commutative,
       "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
 # high 32-bits of unsigned integer multiply
-binop("umul_high", tunsigned, commutative,
+binop("umul_high", tuint, commutative,
       "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
 
 binop("fdiv", tfloat, "", "src0 / src1")
 binop("idiv", tint, "", "src0 / src1")
-binop("udiv", tunsigned, "", "src0 / src1")
+binop("udiv", tuint, "", "src0 / src1")
 
 # returns a boolean representing the carry resulting from the addition of
 # the two unsigned arguments.
 
-binop_convert("uadd_carry", tbool, tunsigned, commutative, "src0 + src1 < src0")
+binop_convert("uadd_carry", tbool, tuint, commutative, "src0 + src1 < src0")
 
 # returns a boolean representing the borrow resulting from the subtraction
 # of the two unsigned arguments.
 
-binop_convert("usub_borrow", tbool, tunsigned, "", "src1 < src0")
+binop_convert("usub_borrow", tbool, tuint, "", "src1 < src0")
 
 binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
-binop("umod", tunsigned, "", "src1 == 0 ? 0 : src0 % src1")
+binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1")
 
 #
 # Comparisons
@@ -393,8 +393,8 @@ binop_compare("ilt", tint, "", "src0 < src1")
 binop_compare("ige", tint, "", "src0 >= src1")
 binop_compare("ieq", tint, commutative, "src0 == src1")
 binop_compare("ine", tint, commutative, "src0 != src1")
-binop_compare("ult", tunsigned, "", "src0 < src1")
-binop_compare("uge", tunsigned, "", "src0 >= src1")
+binop_compare("ult", tuint, "", "src0 < src1")
+binop_compare("uge", tuint, "", "src0 >= src1")
 
 # integer-aware GLSL-style comparisons that compare floats and ints
 
@@ -425,7 +425,7 @@ binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not E
 
 binop("ishl", tint, "", "src0 << src1")
 binop("ishr", tint, "", "src0 >> src1")
-binop("ushr", tunsigned, "", "src0 >> src1")
+binop("ushr", tuint, "", "src0 >> src1")
 
 # bitwise logic operators
 #
@@ -433,9 +433,9 @@ binop("ushr", tunsigned, "", "src0 >> src1")
 # integers.
 
 
-binop("iand", tunsigned, commutative + associative, "src0 & src1")
-binop("ior", tunsigned, commutative + associative, "src0 | src1")
-binop("ixor", tunsigned, commutative + associative, "src0 ^ src1")
+binop("iand", tuint, commutative + associative, "src0 & src1")
+binop("ior", tuint, commutative + associative, "src0 | src1")
+binop("ixor", tuint, commutative + associative, "src0 ^ src1")
 
 
 # floating point logic operators
@@ -463,10 +463,10 @@ opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], "",
 
 binop("fmin", tfloat, "", "fminf(src0, src1)")
 binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
-binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1")
+binop("umin", tuint, commutative + associative, "src1 > src0 ? src0 : src1")
 binop("fmax", tfloat, "", "fmaxf(src0, src1)")
 binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
-binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0")
+binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0")
 
 # Saturated vector add for 4 8bit ints.
 binop("usadd_4x8", tint, commutative + associative, """
@@ -515,10 +515,10 @@ for (int i = 0; i < 32; i += 8) {
 
 binop("fpow", tfloat, "", "powf(src0, src1)")
 
-binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat,
+binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat,
             "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
 
-binop_convert("bfm", tunsigned, tint, "", """
+binop_convert("bfm", tuint, tint, "", """
 int offset = src0, bits = src1;
 if (offset < 0 || bits < 0 || offset + bits > 32)
    dst = 0; /* undefined per the spec */
@@ -535,7 +535,7 @@ if (!isnormal(dst))
 
 # Combines the first component of each input to make a 2-component vector.
 
-binop_horiz("vec2", 2, tunsigned, 1, tunsigned, 1, tunsigned, """
+binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """
 dst.x = src0.x;
 dst.y = src1.x;
 """)
@@ -543,9 +543,9 @@ dst.y = src1.x;
 def triop(name, ty, const_expr):
    opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr)
 def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr):
-   opcode(name, output_size, tunsigned,
+   opcode(name, output_size, tuint,
    [src1_size, src2_size, src3_size],
-   [tunsigned, tunsigned, tunsigned], "", const_expr)
+   [tuint, tuint, tuint], "", const_expr)
 
 triop("ffma", tfloat, "src0 * src1 + src2")
 
@@ -559,10 +559,10 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
 
 
 triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
-opcode("bcsel", 0, tunsigned, [0, 0, 0],
-      [tbool, tunsigned, tunsigned], "", "src0 ? src1 : src2")
+opcode("bcsel", 0, tuint, [0, 0, 0],
+      [tbool, tuint, tuint], "", "src0 ? src1 : src2")
 
-triop("bfi", tunsigned, """
+triop("bfi", tuint, """
 unsigned mask = src0, insert = src1, base = src2;
 if (mask == 0) {
    dst = base;
@@ -576,8 +576,8 @@ if (mask == 0) {
 }
 """)
 
-opcode("ubitfield_extract", 0, tunsigned,
-       [0, 1, 1], [tunsigned, tint, tint], "", """
+opcode("ubitfield_extract", 0, tuint,
+       [0, 1, 1], [tuint, tint, tint], "", """
 unsigned base = src0;
 int offset = src1.x, bits = src2.x;
 if (bits == 0) {
@@ -611,13 +611,13 @@ dst.z = src2.x;
 
 def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
                  src4_size, const_expr):
-   opcode(name, output_size, tunsigned,
+   opcode(name, output_size, tuint,
           [src1_size, src2_size, src3_size, src4_size],
-          [tunsigned, tunsigned, tunsigned, tunsigned],
+          [tuint, tuint, tuint, tuint],
           "", const_expr)
 
-opcode("bitfield_insert", 0, tunsigned, [0, 0, 1, 1],
-       [tunsigned, tunsigned, tint, tint], "", """
+opcode("bitfield_insert", 0, tuint, [0, 0, 1, 1],
+       [tuint, tuint, tint, tint], "", """
 unsigned base = src0, insert = src1;
 int offset = src2.x, bits = src3.x;
 if (bits == 0) {
diff --git a/src/glsl/nir/nir_search.c b/src/glsl/nir/nir_search.c
index bb154407914..56d7e8162f3 100644
--- a/src/glsl/nir/nir_search.c
+++ b/src/glsl/nir/nir_search.c
@@ -166,7 +166,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
          }
          return true;
       case nir_type_int:
-      case nir_type_unsigned:
+      case nir_type_uint:
       case nir_type_bool:
          for (unsigned i = 0; i < num_components; ++i) {
             if (load->value.i[new_swizzle[i]] != const_val->data.i)
@@ -310,7 +310,7 @@ construct_value(const nir_search_value *value, nir_alu_type type,
          load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i);
          load->value.i[0] = c->data.i;
          break;
-      case nir_type_unsigned:
+      case nir_type_uint:
       case nir_type_bool:
          load->value.u[0] = c->data.u;
          break;
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index bd91254f5bf..973a20c0b4e 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -364,7 +364,7 @@ enum brw_reg_type
 brw_type_for_nir_type(nir_alu_type type)
 {
    switch (type) {
-   case nir_type_unsigned:
+   case nir_type_uint:
       return BRW_REGISTER_TYPE_UD;
    case nir_type_bool:
    case nir_type_int:
@@ -391,7 +391,7 @@ brw_glsl_base_type_for_nir_type(nir_alu_type type)
    case nir_type_int:
       return GLSL_TYPE_INT;
 
-   case nir_type_unsigned:
+   case nir_type_uint:
       return GLSL_TYPE_UINT;
 
    default:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 7e17a6a6246..8d2ebfb7c89 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1561,7 +1561,7 @@ glsl_type_for_nir_alu_type(nir_alu_type alu_type,
       return glsl_type::vec(components);
    case nir_type_int:
       return glsl_type::ivec(components);
-   case nir_type_unsigned:
+   case nir_type_uint:
       return glsl_type::uvec(components);
    case nir_type_bool:
       return glsl_type::bvec(components);

From 2010de4015c96f241e81012b395cb4254091f0bb Mon Sep 17 00:00:00 2001
From: Neil Roberts <neil@linux.intel.com>
Date: Wed, 4 Nov 2015 15:52:06 +0100
Subject: [PATCH 299/335] i965: Handle lum, intensity and missing components in
 the fast clear

It looks like the sampler hardware doesn't take into account the
surface format when sampling a cleared color after a fast clear has
been done. So for example if you clear a GL_RED surface to 1,1,1,1
then the sampling instructions will return 1,1,1,1 instead of 1,0,0,1.
This patch makes it override the color that is programmed in the
surface state in order to swizzle for luminance and intensity as well
as overriding the missing components.

Fixes the ext_framebuffer_multisample-fast-clear Piglit test.

v2: Handle luminance and intensity formats
Reviewed-by: Ben Widawsky <benjamin.widawsky@intel.com>
---
 .../drivers/dri/i965/brw_meta_fast_clear.c    | 34 +++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index 499daba3b00..1f8bfdfa492 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -380,13 +380,43 @@ set_fast_clear_color(struct brw_context *brw,
                      struct intel_mipmap_tree *mt,
                      const union gl_color_union *color)
 {
+   union gl_color_union override_color = *color;
+
+   /* The sampler doesn't look at the format of the surface when the fast
+    * clear color is used so we need to implement luminance, intensity and
+    * missing components manually.
+    */
+   switch (_mesa_get_format_base_format(mt->format)) {
+   case GL_INTENSITY:
+      override_color.ui[3] = override_color.ui[0];
+      /* flow through */
+   case GL_LUMINANCE:
+   case GL_LUMINANCE_ALPHA:
+      override_color.ui[1] = override_color.ui[0];
+      override_color.ui[2] = override_color.ui[0];
+      break;
+   default:
+      for (int i = 0; i < 3; i++) {
+         if (!_mesa_format_has_color_component(mt->format, i))
+            override_color.ui[i] = 0;
+      }
+      break;
+   }
+
+   if (!_mesa_format_has_color_component(mt->format, 3)) {
+      if (_mesa_is_format_integer_color(mt->format))
+         override_color.ui[3] = 1;
+      else
+         override_color.f[3] = 1.0f;
+   }
+
    if (brw->gen >= 9) {
-      mt->gen9_fast_clear_color = *color;
+      mt->gen9_fast_clear_color = override_color;
    } else {
       mt->fast_clear_color_value = 0;
       for (int i = 0; i < 4; i++) {
          /* Testing for non-0 works for integer and float colors */
-         if (color->f[i] != 0.0f) {
+         if (override_color.f[i] != 0.0f) {
              mt->fast_clear_color_value |=
                 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
          }

From 8a6d4765880a2c9000970b2b6e584291090b1dc3 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Sat, 21 Nov 2015 19:43:09 +0000
Subject: [PATCH 300/335] pipe-loader: link against libloader regardless of
 libdrm presence

Whether or not the loader has libdrm support is up-to it. Anyone using
the loader should just include it whenever they depend on it.

Cc: mesa-stable@lists.freedesktop.org
Fixes: 0f39f9cb7ad "pipe-loader: add a dummy 'static' pipe-loader"
Reported-by: Jon TURNEY <jon.turney@dronecode.org.uk>
Tested-by: Jon TURNEY <jon.turney@dronecode.org.uk>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/gallium/auxiliary/pipe-loader/Makefile.am | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am b/src/gallium/auxiliary/pipe-loader/Makefile.am
index f661897fdf7..8039a957b1b 100644
--- a/src/gallium/auxiliary/pipe-loader/Makefile.am
+++ b/src/gallium/auxiliary/pipe-loader/Makefile.am
@@ -37,12 +37,12 @@ libpipe_loader_static_la_SOURCES += \
 libpipe_loader_dynamic_la_SOURCES += \
 	$(DRM_SOURCES)
 
+endif
+
 libpipe_loader_static_la_LIBADD = \
 	$(top_builddir)/src/loader/libloader.la
 
 libpipe_loader_dynamic_la_LIBADD = \
 	$(top_builddir)/src/loader/libloader.la
 
-endif
-
 EXTRA_DIST = SConscript

From b9b0a1f58e41ba4027a16300393835dc8c632f50 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Sun, 22 Nov 2015 22:05:00 +0000
Subject: [PATCH 301/335] loader: unconditionally add AM_CPPFLAGS to
 libloader_la_CPPFLAGS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It seems that due to the conditional autotools is getting confused and
forgetting to add AM_CPPFLAGS when building libloader (when
HAVE_DRICOMMON is not set).

Cc: mesa-stable@lists.freedesktop.org
Fixes: 5a79e0a8e37 "automake: loader: rework the CPPFLAGS"
Reported-by: Pali Rohár <pali.rohar@gmail.com>
Tested-by: Pali Rohár <pali.rohar@gmail.com>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/loader/Makefile.am | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/loader/Makefile.am b/src/loader/Makefile.am
index 5021120c96d..9ca17540d54 100644
--- a/src/loader/Makefile.am
+++ b/src/loader/Makefile.am
@@ -34,12 +34,12 @@ AM_CPPFLAGS = \
 	$(LIBDRM_CFLAGS) \
 	$(LIBUDEV_CFLAGS)
 
+libloader_la_CPPFLAGS = $(AM_CPPFLAGS)
 libloader_la_SOURCES = $(LOADER_C_FILES)
 libloader_la_LIBADD =
 
 if HAVE_DRICOMMON
-libloader_la_CPPFLAGS = \
-	$(AM_CPPFLAGS) \
+libloader_la_CPPFLAGS += \
 	-I$(top_srcdir)/src/mesa/drivers/dri/common/ \
 	-I$(top_builddir)/src/mesa/drivers/dri/common/ \
 	-I$(top_srcdir)/src/mesa/ \

From b89d1b2ccf16c0bee3708560b69e99b6dbb212f0 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Sun, 22 Nov 2015 22:05:01 +0000
Subject: [PATCH 302/335] configure.ac: default to disabled dri3 when
 --disable-dri is set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Not too long ago, the dri3 code was living in src/glx, which in itself
was guarded by HAVE_DRI_GLX. As the name suggests we didn't dive into
the folder when dri was disabled, thus we missed that dri3 does not
consider/honour --enable-dri.

Cc: mesa-stable@lists.freedesktop.org
Fixes: 6bd9ba7d074 "loader: Add dri3 helper"
Cc: Pali Rohár <pali.rohar@gmail.com>
Reported-by: Pali Rohár <pali.rohar@gmail.com>
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 configure.ac | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/configure.ac b/configure.ac
index 120c93e7e73..40168715fc6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -767,6 +767,11 @@ linux*)
     dri3_default=no
     ;;
 esac
+
+if test "x$enable_dri" = xno; then
+    dri3_default=no
+fi
+
 AC_ARG_ENABLE([dri3],
     [AS_HELP_STRING([--enable-dri3],
         [enable DRI3 @<:@default=auto@:>@])],

From d4c40f99ab9b8ccf807cea45599231c0072f740b Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 21 Nov 2015 13:56:22 -0500
Subject: [PATCH 303/335] freedreno/a4xx: add polygon mode support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h     | 15 ++++++++++++++-
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c     |  7 ++++---
 .../drivers/freedreno/a4xx/fd4_rasterizer.c       |  7 +++++++
 .../drivers/freedreno/a4xx/fd4_rasterizer.h       |  1 +
 4 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index bd9b0a46bc1..df59438f106 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -2626,7 +2626,20 @@ static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val)
 #define A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST		0x02000000
 #define A4XX_PC_PRIM_VTX_CNTL_PSIZE				0x04000000
 
-#define REG_A4XX_UNKNOWN_21C5					0x000021c5
+#define REG_A4XX_PC_PRIM_VTX_CNTL2				0x000021c5
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK	0x00000007
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT	0
+static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK;
+}
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK	0x00000038
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT	3
+static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK;
+}
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE			0x00000040
 
 #define REG_A4XX_PC_RESTART_INDEX				0x000021c6
 
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 60069aeedc5..ec454b2a10f 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -568,8 +568,9 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 	 */
 	if (emit->info) {
 		const struct pipe_draw_info *info = emit->info;
-		uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer)
-				->pc_prim_vtx_cntl;
+		struct fd4_rasterizer_stateobj *rast =
+			fd4_rasterizer_stateobj(ctx->rasterizer);
+		uint32_t val = rast->pc_prim_vtx_cntl;
 
 		if (info->indexed && info->primitive_restart)
 			val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;
@@ -585,7 +586,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
 		OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
 		OUT_RING(ring, val);
-		OUT_RING(ring, 0x12);     /* XXX UNKNOWN_21C5 */
+		OUT_RING(ring, rast->pc_prim_vtx_cntl2);
 	}
 
 	if (dirty & FD_DIRTY_SCISSOR) {
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
index d894b6b9e09..7456c63febe 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
@@ -77,6 +77,13 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
 
 	so->gras_su_mode_control =
 			A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0);
+	so->pc_prim_vtx_cntl2 =
+		A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
+		A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
+
+	if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
+		cso->fill_back != PIPE_POLYGON_MODE_FILL)
+		so->pc_prim_vtx_cntl2 |= A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE;
 
 	if (cso->cull_face & PIPE_FACE_FRONT)
 		so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
index 64e81a9983b..b56a04da6a8 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
@@ -42,6 +42,7 @@ struct fd4_rasterizer_stateobj {
 	uint32_t gras_su_mode_control;
 	uint32_t gras_cl_clip_cntl;
 	uint32_t pc_prim_vtx_cntl;
+	uint32_t pc_prim_vtx_cntl2;
 };
 
 static inline struct fd4_rasterizer_stateobj *

From 99f12a3f1a825c53d790f12dae114f388bb6244c Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 21 Nov 2015 10:02:05 -0500
Subject: [PATCH 304/335] freedreno/a4xx: add ARB_texture_buffer_range support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 14 +++++--
 .../drivers/freedreno/a4xx/fd4_texture.c      | 38 +++++++++++++------
 .../drivers/freedreno/freedreno_screen.c      |  4 +-
 3 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index ec454b2a10f..e488450498e 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -181,11 +181,12 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
 				CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
 		for (i = 0; i < tex->num_textures; i++) {
-			static const struct fd4_pipe_sampler_view dummy_view = {};
+			static const struct fd4_pipe_sampler_view dummy_view = {
+				.base.target = PIPE_TEXTURE_1D,
+			};
 			const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
 					fd4_pipe_sampler_view(tex->textures[i]) :
 					&dummy_view;
-			unsigned start = fd_sampler_first_level(&view->base);
 
 			OUT_RING(ring, view->texconst0);
 			OUT_RING(ring, view->texconst1);
@@ -193,7 +194,14 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 			OUT_RING(ring, view->texconst3);
 			if (view->base.texture) {
 				struct fd_resource *rsc = fd_resource(view->base.texture);
-				uint32_t offset = fd_resource_offset(rsc, start, 0);
+				unsigned start = fd_sampler_first_level(&view->base);
+				uint32_t offset;
+				if (rsc->base.b.target == PIPE_BUFFER) {
+					offset = view->base.u.buf.first_element *
+						util_format_get_blocksize(view->base.format);
+				} else {
+					offset = fd_resource_offset(rsc, start, 0);
+				}
 				OUT_RELOC(ring, rsc->bo, offset, view->texconst4, 0);
 			} else {
 				OUT_RING(ring, 0x00000000);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index 598f1e19116..a37c64473bd 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -212,8 +212,7 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 {
 	struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
 	struct fd_resource *rsc = fd_resource(prsc);
-	unsigned lvl = fd_sampler_first_level(cso);
-	unsigned miplevels = fd_sampler_last_level(cso) - lvl;
+	unsigned lvl;
 	uint32_t sz2 = 0;
 
 	if (!so)
@@ -228,21 +227,38 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 	so->texconst0 =
 		A4XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
 		A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(cso->format)) |
-		A4XX_TEX_CONST_0_MIPLVLS(miplevels) |
 		fd4_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
 				cso->swizzle_b, cso->swizzle_a);
 
 	if (util_format_is_srgb(cso->format))
 		so->texconst0 |= A4XX_TEX_CONST_0_SRGB;
 
-	so->texconst1 =
-		A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
-		A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
-	so->texconst2 =
-		A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
-		A4XX_TEX_CONST_2_PITCH(
-			util_format_get_nblocksx(
-				cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
+	if (prsc->target == PIPE_BUFFER) {
+		unsigned elements = cso->u.buf.last_element -
+			cso->u.buf.first_element + 1;
+		lvl = 0;
+		so->texconst1 =
+			A4XX_TEX_CONST_1_WIDTH(elements) |
+			A4XX_TEX_CONST_1_HEIGHT(1);
+		so->texconst2 =
+			A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
+			A4XX_TEX_CONST_2_PITCH(elements * rsc->cpp);
+	} else {
+		unsigned miplevels;
+
+		lvl = fd_sampler_first_level(cso);
+		miplevels = fd_sampler_last_level(cso) - lvl;
+
+		so->texconst0 |= A4XX_TEX_CONST_0_MIPLVLS(miplevels);
+		so->texconst1 =
+			A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
+			A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+		so->texconst2 =
+			A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
+			A4XX_TEX_CONST_2_PITCH(
+					util_format_get_nblocksx(
+							cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
+	}
 
 	switch (prsc->target) {
 	case PIPE_TEXTURE_1D_ARRAY:
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 7bffc8f68c2..0c494d4f4c8 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -183,7 +183,9 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 		return is_a3xx(screen) || is_a4xx(screen);
 
 	case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
-		return is_a3xx(screen) ? 16 : 0;
+		if (is_a3xx(screen)) return 16;
+		if (is_a4xx(screen)) return 32;
+		return 0;
 	case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
 		/* I think 32k on a4xx.. and we could possibly emulate more
 		 * by pretending 2d/rect textures and splitting high bits

From c65bc2e805a1fbcb72b9b77e0d9557ebce3a379a Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 21 Nov 2015 10:28:45 -0500
Subject: [PATCH 305/335] freedreno/a4xx: support 16384 texels in buffer
 texture

Looks like the width field's bitmask was off-by-one.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h    | 2 +-
 src/gallium/drivers/freedreno/freedreno_screen.c | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index df59438f106..5a5323042d6 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -2864,7 +2864,7 @@ static inline uint32_t A4XX_TEX_CONST_1_HEIGHT(uint32_t val)
 {
 	return ((val) << A4XX_TEX_CONST_1_HEIGHT__SHIFT) & A4XX_TEX_CONST_1_HEIGHT__MASK;
 }
-#define A4XX_TEX_CONST_1_WIDTH__MASK				0x1fff8000
+#define A4XX_TEX_CONST_1_WIDTH__MASK				0x3fff8000
 #define A4XX_TEX_CONST_1_WIDTH__SHIFT				15
 static inline uint32_t A4XX_TEX_CONST_1_WIDTH(uint32_t val)
 {
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 0c494d4f4c8..9e51c4e3c4b 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -187,12 +187,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 		if (is_a4xx(screen)) return 32;
 		return 0;
 	case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
-		/* I think 32k on a4xx.. and we could possibly emulate more
-		 * by pretending 2d/rect textures and splitting high bits
-		 * of index into 2nd dimension..
+		/* We could possibly emulate more by pretending 2d/rect textures and
+		 * splitting high bits of index into 2nd dimension..
 		 */
 		if (is_a3xx(screen)) return 8192;
-		if (is_a4xx(screen)) return 16383;
+		if (is_a4xx(screen)) return 16384;
 		return 0;
 
 	case PIPE_CAP_TEXTURE_FLOAT_LINEAR:

From 1c7d0a6aa4f5cb38af7e281e1e5437cd1a20f781 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 21 Nov 2015 10:44:00 -0500
Subject: [PATCH 306/335] gallium/util: remove the fake format helpers for bptc
 and etc2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This was a silly hack that kept growing and growing. Instead, just write
NULLs for those functions. No need to have helpers that just assert(0)
when you call them.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/gallium/auxiliary/Makefile.sources       |  2 -
 src/gallium/auxiliary/util/u_format_fake.c   | 37 -----------
 src/gallium/auxiliary/util/u_format_fake.h   | 66 --------------------
 src/gallium/auxiliary/util/u_format_table.py | 10 ++-
 4 files changed, 7 insertions(+), 108 deletions(-)
 delete mode 100644 src/gallium/auxiliary/util/u_format_fake.c
 delete mode 100644 src/gallium/auxiliary/util/u_format_fake.h

diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index 82ef5ecfce4..61601920a94 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -219,8 +219,6 @@ C_SOURCES := \
 	util/u_format.h \
 	util/u_format_etc.c \
 	util/u_format_etc.h \
-	util/u_format_fake.c \
-	util/u_format_fake.h \
 	util/u_format_latc.c \
 	util/u_format_latc.h \
 	util/u_format_other.c \
diff --git a/src/gallium/auxiliary/util/u_format_fake.c b/src/gallium/auxiliary/util/u_format_fake.c
deleted file mode 100644
index 77e896d27bd..00000000000
--- a/src/gallium/auxiliary/util/u_format_fake.c
+++ /dev/null
@@ -1,37 +0,0 @@
-#include "u_format.h"
-#include "u_format_fake.h"
-
-#define fake(format) \
-void \
-util_format_##format##_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) {assert(0);} \
-\
-void \
-util_format_##format##_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) {assert(0);} \
-\
-void \
-util_format_##format##_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) {assert(0);} \
-\
-void \
-util_format_##format##_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) {assert(0);} \
-\
-void \
-util_format_##format##_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) {assert(0);} \
-\
-void \
-util_format_##format##_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) {assert(0);}
-
-fake(bptc_rgba_unorm)
-fake(bptc_srgba)
-fake(bptc_rgb_float)
-fake(bptc_rgb_ufloat)
-
-fake(etc2_rgb8)
-fake(etc2_srgb8)
-fake(etc2_rgb8a1)
-fake(etc2_srgb8a1)
-fake(etc2_rgba8)
-fake(etc2_srgba8)
-fake(etc2_r11_unorm)
-fake(etc2_r11_snorm)
-fake(etc2_rg11_unorm)
-fake(etc2_rg11_snorm)
diff --git a/src/gallium/auxiliary/util/u_format_fake.h b/src/gallium/auxiliary/util/u_format_fake.h
deleted file mode 100644
index e6bfd4e1594..00000000000
--- a/src/gallium/auxiliary/util/u_format_fake.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2011 Red Hat Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- **************************************************************************/
-
-#ifndef U_FORMAT_FAKE_H_
-#define U_FORMAT_FAKE_H_
-
-#define __format_fake(format) \
-void \
-util_format_##format##_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); \
-\
-void \
-util_format_##format##_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); \
-\
-void \
-util_format_##format##_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); \
-\
-void \
-util_format_##format##_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); \
-\
-void \
-util_format_##format##_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); \
-\
-void \
-util_format_##format##_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
-
-__format_fake(bptc_rgba_unorm)
-__format_fake(bptc_srgba)
-__format_fake(bptc_rgb_float)
-__format_fake(bptc_rgb_ufloat)
-
-__format_fake(etc2_rgb8)
-__format_fake(etc2_srgb8)
-__format_fake(etc2_rgb8a1)
-__format_fake(etc2_srgb8a1)
-__format_fake(etc2_rgba8)
-__format_fake(etc2_srgba8)
-__format_fake(etc2_r11_unorm)
-__format_fake(etc2_r11_snorm)
-__format_fake(etc2_rg11_unorm)
-__format_fake(etc2_rg11_snorm)
-
-#endif
diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py
index aceb0caf7e1..1fec0cbfd15 100755
--- a/src/gallium/auxiliary/util/u_format_table.py
+++ b/src/gallium/auxiliary/util/u_format_table.py
@@ -90,7 +90,6 @@ def write_format_table(formats):
     print '#include "u_format_rgtc.h"'
     print '#include "u_format_latc.h"'
     print '#include "u_format_etc.h"'
-    print '#include "u_format_fake.h"'
     print
     
     u_format_pack.generate(formats)
@@ -139,10 +138,15 @@ def write_format_table(formats):
         u_format_pack.print_channels(format, do_channel_array)
         u_format_pack.print_channels(format, do_swizzle_array)
         print "   %s," % (colorspace_map(format.colorspace),)
-        if format.colorspace != ZS and not format.is_pure_color():
+        access = True
+        if format.layout == 'bptc':
+            access = False
+        if format.layout == 'etc' and format.short_name() != 'etc1_rgb8':
+            access = False
+        if format.colorspace != ZS and not format.is_pure_color() and access:
             print "   &util_format_%s_unpack_rgba_8unorm," % format.short_name() 
             print "   &util_format_%s_pack_rgba_8unorm," % format.short_name() 
-            if format.layout == 's3tc' or format.layout == 'rgtc' or format.layout == 'bptc':
+            if format.layout == 's3tc' or format.layout == 'rgtc':
                 print "   &util_format_%s_fetch_rgba_8unorm," % format.short_name()
             else:
                 print "   NULL, /* fetch_rgba_8unorm */" 

From 33339775565154040e0c4ea2e196217dccc08cdf Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 21 Nov 2015 11:23:34 -0500
Subject: [PATCH 307/335] gallium: add ASTC formats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/gallium/auxiliary/util/u_format.csv      | 30 +++++++++
 src/gallium/auxiliary/util/u_format.h        | 65 +++++++++++++++++++-
 src/gallium/auxiliary/util/u_format_pack.py  |  2 +-
 src/gallium/auxiliary/util/u_format_table.py |  2 +-
 src/gallium/include/pipe/p_format.h          | 30 +++++++++
 5 files changed, 126 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv
index d3b77e6b99b..c26d7331d4c 100644
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -202,6 +202,36 @@ PIPE_FORMAT_BPTC_SRGBA            , bptc, 4, 4, x128,     ,     ,     , xyzw, sr
 PIPE_FORMAT_BPTC_RGB_FLOAT        , bptc, 4, 4, x128,     ,     ,     , xyz1, rgb
 PIPE_FORMAT_BPTC_RGB_UFLOAT       , bptc, 4, 4, x128,     ,     ,     , xyz1, rgb
 
+PIPE_FORMAT_ASTC_4x4              , astc, 4, 4, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_ASTC_5x4              , astc, 5, 4, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_ASTC_5x5              , astc, 5, 5, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_ASTC_6x5              , astc, 6, 5, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_ASTC_6x6              , astc, 6, 6, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_ASTC_8x5              , astc, 8, 5, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_ASTC_8x6              , astc, 8, 6, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_ASTC_8x8              , astc, 8, 8, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_ASTC_10x5             , astc,10, 5, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_ASTC_10x6             , astc,10, 6, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_ASTC_10x8             , astc,10, 8, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_ASTC_10x10            , astc,10,10, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_ASTC_12x10            , astc,12,10, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_ASTC_12x12            , astc,12,12, x128,     ,     ,     , xyzw, rgb
+
+PIPE_FORMAT_ASTC_4x4_SRGB         , astc, 4, 4, x128,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_ASTC_5x4_SRGB         , astc, 5, 4, x128,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_ASTC_5x5_SRGB         , astc, 5, 5, x128,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_ASTC_6x5_SRGB         , astc, 6, 5, x128,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_ASTC_6x6_SRGB         , astc, 6, 6, x128,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_ASTC_8x5_SRGB         , astc, 8, 5, x128,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_ASTC_8x6_SRGB         , astc, 8, 6, x128,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_ASTC_8x8_SRGB         , astc, 8, 8, x128,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_ASTC_10x5_SRGB        , astc,10, 5, x128,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_ASTC_10x6_SRGB        , astc,10, 6, x128,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_ASTC_10x8_SRGB        , astc,10, 8, x128,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_ASTC_10x10_SRGB       , astc,10,10, x128,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_ASTC_12x10_SRGB       , astc,12,10, x128,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_ASTC_12x12_SRGB       , astc,12,12, x128,     ,     ,     , xyzw, srgb
+
 # Straightforward D3D10-like formats (also used for 
 # vertex buffer element description)
 # 
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index a1b1b28fa41..ffdb864fa83 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -83,10 +83,15 @@ enum util_format_layout {
     */
    UTIL_FORMAT_LAYOUT_BPTC = 7,
 
+   /**
+    * ASTC
+    */
+   UTIL_FORMAT_LAYOUT_ASTC = 8,
+
    /**
     * Everything else that doesn't fit in any of the above layouts.
     */
-   UTIL_FORMAT_LAYOUT_OTHER = 8
+   UTIL_FORMAT_LAYOUT_OTHER = 9
 };
 
 
@@ -481,6 +486,7 @@ util_format_is_compressed(enum pipe_format format)
    case UTIL_FORMAT_LAYOUT_RGTC:
    case UTIL_FORMAT_LAYOUT_ETC:
    case UTIL_FORMAT_LAYOUT_BPTC:
+   case UTIL_FORMAT_LAYOUT_ASTC:
       /* XXX add other formats in the future */
       return TRUE;
    default:
@@ -924,6 +930,35 @@ util_format_srgb(enum pipe_format format)
       return PIPE_FORMAT_B5G6R5_SRGB;
    case PIPE_FORMAT_BPTC_RGBA_UNORM:
       return PIPE_FORMAT_BPTC_SRGBA;
+   case PIPE_FORMAT_ASTC_4x4:
+      return PIPE_FORMAT_ASTC_4x4_SRGB;
+   case PIPE_FORMAT_ASTC_5x4:
+      return PIPE_FORMAT_ASTC_5x4_SRGB;
+   case PIPE_FORMAT_ASTC_5x5:
+      return PIPE_FORMAT_ASTC_5x5_SRGB;
+   case PIPE_FORMAT_ASTC_6x5:
+      return PIPE_FORMAT_ASTC_6x5_SRGB;
+   case PIPE_FORMAT_ASTC_6x6:
+      return PIPE_FORMAT_ASTC_6x6_SRGB;
+   case PIPE_FORMAT_ASTC_8x5:
+      return PIPE_FORMAT_ASTC_8x5_SRGB;
+   case PIPE_FORMAT_ASTC_8x6:
+      return PIPE_FORMAT_ASTC_8x6_SRGB;
+   case PIPE_FORMAT_ASTC_8x8:
+      return PIPE_FORMAT_ASTC_8x8_SRGB;
+   case PIPE_FORMAT_ASTC_10x5:
+      return PIPE_FORMAT_ASTC_10x5_SRGB;
+   case PIPE_FORMAT_ASTC_10x6:
+      return PIPE_FORMAT_ASTC_10x6_SRGB;
+   case PIPE_FORMAT_ASTC_10x8:
+      return PIPE_FORMAT_ASTC_10x8_SRGB;
+   case PIPE_FORMAT_ASTC_10x10:
+      return PIPE_FORMAT_ASTC_10x10_SRGB;
+   case PIPE_FORMAT_ASTC_12x10:
+      return PIPE_FORMAT_ASTC_12x10_SRGB;
+   case PIPE_FORMAT_ASTC_12x12:
+      return PIPE_FORMAT_ASTC_12x12_SRGB;
+
    default:
       return PIPE_FORMAT_NONE;
    }
@@ -971,6 +1006,34 @@ util_format_linear(enum pipe_format format)
       return PIPE_FORMAT_B5G6R5_UNORM;
    case PIPE_FORMAT_BPTC_SRGBA:
       return PIPE_FORMAT_BPTC_RGBA_UNORM;
+   case PIPE_FORMAT_ASTC_4x4_SRGB:
+      return PIPE_FORMAT_ASTC_4x4;
+   case PIPE_FORMAT_ASTC_5x4_SRGB:
+      return PIPE_FORMAT_ASTC_5x4;
+   case PIPE_FORMAT_ASTC_5x5_SRGB:
+      return PIPE_FORMAT_ASTC_5x5;
+   case PIPE_FORMAT_ASTC_6x5_SRGB:
+      return PIPE_FORMAT_ASTC_6x5;
+   case PIPE_FORMAT_ASTC_6x6_SRGB:
+      return PIPE_FORMAT_ASTC_6x6;
+   case PIPE_FORMAT_ASTC_8x5_SRGB:
+      return PIPE_FORMAT_ASTC_8x5;
+   case PIPE_FORMAT_ASTC_8x6_SRGB:
+      return PIPE_FORMAT_ASTC_8x6;
+   case PIPE_FORMAT_ASTC_8x8_SRGB:
+      return PIPE_FORMAT_ASTC_8x8;
+   case PIPE_FORMAT_ASTC_10x5_SRGB:
+      return PIPE_FORMAT_ASTC_10x5;
+   case PIPE_FORMAT_ASTC_10x6_SRGB:
+      return PIPE_FORMAT_ASTC_10x6;
+   case PIPE_FORMAT_ASTC_10x8_SRGB:
+      return PIPE_FORMAT_ASTC_10x8;
+   case PIPE_FORMAT_ASTC_10x10_SRGB:
+      return PIPE_FORMAT_ASTC_10x10;
+   case PIPE_FORMAT_ASTC_12x10_SRGB:
+      return PIPE_FORMAT_ASTC_12x10;
+   case PIPE_FORMAT_ASTC_12x12_SRGB:
+      return PIPE_FORMAT_ASTC_12x12;
    default:
       return format;
    }
diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py
index fb42de723c4..d4bb1de4cb5 100644
--- a/src/gallium/auxiliary/util/u_format_pack.py
+++ b/src/gallium/auxiliary/util/u_format_pack.py
@@ -686,7 +686,7 @@ def generate_format_fetch(format, dst_channel, dst_native_type, dst_suffix):
 
 
 def is_format_hand_written(format):
-    return format.layout in ('s3tc', 'rgtc', 'etc', 'bptc', 'subsampled', 'other') or format.colorspace == ZS
+    return format.layout in ('s3tc', 'rgtc', 'etc', 'bptc', 'astc', 'subsampled', 'other') or format.colorspace == ZS
 
 
 def generate(formats):
diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py
index 1fec0cbfd15..879d10ff01d 100755
--- a/src/gallium/auxiliary/util/u_format_table.py
+++ b/src/gallium/auxiliary/util/u_format_table.py
@@ -139,7 +139,7 @@ def write_format_table(formats):
         u_format_pack.print_channels(format, do_swizzle_array)
         print "   %s," % (colorspace_map(format.colorspace),)
         access = True
-        if format.layout == 'bptc':
+        if format.layout in ('bptc', 'astc'):
             access = False
         if format.layout == 'etc' and format.short_name() != 'etc1_rgb8':
             access = False
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index 5f0690e5ae6..d9c9f9b5cc2 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -359,6 +359,36 @@ enum pipe_format {
    PIPE_FORMAT_ETC2_RG11_UNORM         = 277,
    PIPE_FORMAT_ETC2_RG11_SNORM         = 278,
 
+   PIPE_FORMAT_ASTC_4x4                = 279,
+   PIPE_FORMAT_ASTC_5x4                = 280,
+   PIPE_FORMAT_ASTC_5x5                = 281,
+   PIPE_FORMAT_ASTC_6x5                = 282,
+   PIPE_FORMAT_ASTC_6x6                = 283,
+   PIPE_FORMAT_ASTC_8x5                = 284,
+   PIPE_FORMAT_ASTC_8x6                = 285,
+   PIPE_FORMAT_ASTC_8x8                = 286,
+   PIPE_FORMAT_ASTC_10x5               = 287,
+   PIPE_FORMAT_ASTC_10x6               = 288,
+   PIPE_FORMAT_ASTC_10x8               = 289,
+   PIPE_FORMAT_ASTC_10x10              = 290,
+   PIPE_FORMAT_ASTC_12x10              = 291,
+   PIPE_FORMAT_ASTC_12x12              = 292,
+
+   PIPE_FORMAT_ASTC_4x4_SRGB           = 293,
+   PIPE_FORMAT_ASTC_5x4_SRGB           = 294,
+   PIPE_FORMAT_ASTC_5x5_SRGB           = 295,
+   PIPE_FORMAT_ASTC_6x5_SRGB           = 296,
+   PIPE_FORMAT_ASTC_6x6_SRGB           = 297,
+   PIPE_FORMAT_ASTC_8x5_SRGB           = 298,
+   PIPE_FORMAT_ASTC_8x6_SRGB           = 299,
+   PIPE_FORMAT_ASTC_8x8_SRGB           = 300,
+   PIPE_FORMAT_ASTC_10x5_SRGB          = 301,
+   PIPE_FORMAT_ASTC_10x6_SRGB          = 302,
+   PIPE_FORMAT_ASTC_10x8_SRGB          = 303,
+   PIPE_FORMAT_ASTC_10x10_SRGB         = 304,
+   PIPE_FORMAT_ASTC_12x10_SRGB         = 305,
+   PIPE_FORMAT_ASTC_12x12_SRGB         = 306,
+
    PIPE_FORMAT_COUNT
 };
 

From 6b21d3c92e5878284c008edb68a0155edfcf89b4 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 21 Nov 2015 12:19:52 -0500
Subject: [PATCH 308/335] st/mesa: add astc support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This doesn't account for the ldr/hdr distinction... that will probably
have to be exposed via a separate cap. When relevant hardware appears,
this can be worked out.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/mesa/state_tracker/st_extensions.c |  32 +++-
 src/mesa/state_tracker/st_format.c     | 231 +++++++++++++++++++++++++
 2 files changed, 262 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 99e96e1f3ae..a2418e28a91 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -343,7 +343,7 @@ struct st_extension_cap_mapping {
 
 struct st_extension_format_mapping {
    int extension_offset[2];
-   enum pipe_format format[8];
+   enum pipe_format format[32];
 
    /* If TRUE, at least one format must be supported for the extensions to be
     * advertised. If FALSE, all the formats must be supported. */
@@ -569,6 +569,36 @@ void st_init_extensions(struct pipe_screen *screen,
           PIPE_FORMAT_BPTC_RGB_FLOAT,
           PIPE_FORMAT_BPTC_RGB_UFLOAT } },
 
+      { { o(KHR_texture_compression_astc_ldr) },
+        { PIPE_FORMAT_ASTC_4x4,
+          PIPE_FORMAT_ASTC_5x4,
+          PIPE_FORMAT_ASTC_5x5,
+          PIPE_FORMAT_ASTC_6x5,
+          PIPE_FORMAT_ASTC_6x6,
+          PIPE_FORMAT_ASTC_8x5,
+          PIPE_FORMAT_ASTC_8x6,
+          PIPE_FORMAT_ASTC_8x8,
+          PIPE_FORMAT_ASTC_10x5,
+          PIPE_FORMAT_ASTC_10x6,
+          PIPE_FORMAT_ASTC_10x8,
+          PIPE_FORMAT_ASTC_10x10,
+          PIPE_FORMAT_ASTC_12x10,
+          PIPE_FORMAT_ASTC_12x12,
+          PIPE_FORMAT_ASTC_4x4_SRGB,
+          PIPE_FORMAT_ASTC_5x4_SRGB,
+          PIPE_FORMAT_ASTC_5x5_SRGB,
+          PIPE_FORMAT_ASTC_6x5_SRGB,
+          PIPE_FORMAT_ASTC_6x6_SRGB,
+          PIPE_FORMAT_ASTC_8x5_SRGB,
+          PIPE_FORMAT_ASTC_8x6_SRGB,
+          PIPE_FORMAT_ASTC_8x8_SRGB,
+          PIPE_FORMAT_ASTC_10x5_SRGB,
+          PIPE_FORMAT_ASTC_10x6_SRGB,
+          PIPE_FORMAT_ASTC_10x8_SRGB,
+          PIPE_FORMAT_ASTC_10x10_SRGB,
+          PIPE_FORMAT_ASTC_12x10_SRGB,
+          PIPE_FORMAT_ASTC_12x12_SRGB } },
+
       { { o(EXT_texture_shared_exponent) },
         { PIPE_FORMAT_R9G9B9E5_FLOAT } },
 
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index 144b7d6f659..2b92bade440 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -465,6 +465,64 @@ st_mesa_format_to_pipe_format(struct st_context *st, mesa_format mesaFormat)
    case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
       return st->has_etc2 ? PIPE_FORMAT_ETC2_SRGB8A1 : PIPE_FORMAT_B8G8R8A8_SRGB;
 
+   case MESA_FORMAT_RGBA_ASTC_4x4:
+      return PIPE_FORMAT_ASTC_4x4;
+   case MESA_FORMAT_RGBA_ASTC_5x4:
+      return PIPE_FORMAT_ASTC_5x4;
+   case MESA_FORMAT_RGBA_ASTC_5x5:
+      return PIPE_FORMAT_ASTC_5x5;
+   case MESA_FORMAT_RGBA_ASTC_6x5:
+      return PIPE_FORMAT_ASTC_6x5;
+   case MESA_FORMAT_RGBA_ASTC_6x6:
+      return PIPE_FORMAT_ASTC_6x6;
+   case MESA_FORMAT_RGBA_ASTC_8x5:
+      return PIPE_FORMAT_ASTC_8x5;
+   case MESA_FORMAT_RGBA_ASTC_8x6:
+      return PIPE_FORMAT_ASTC_8x6;
+   case MESA_FORMAT_RGBA_ASTC_8x8:
+      return PIPE_FORMAT_ASTC_8x8;
+   case MESA_FORMAT_RGBA_ASTC_10x5:
+      return PIPE_FORMAT_ASTC_10x5;
+   case MESA_FORMAT_RGBA_ASTC_10x6:
+      return PIPE_FORMAT_ASTC_10x6;
+   case MESA_FORMAT_RGBA_ASTC_10x8:
+      return PIPE_FORMAT_ASTC_10x8;
+   case MESA_FORMAT_RGBA_ASTC_10x10:
+      return PIPE_FORMAT_ASTC_10x10;
+   case MESA_FORMAT_RGBA_ASTC_12x10:
+      return PIPE_FORMAT_ASTC_12x10;
+   case MESA_FORMAT_RGBA_ASTC_12x12:
+      return PIPE_FORMAT_ASTC_12x12;
+
+   case MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4:
+      return PIPE_FORMAT_ASTC_4x4_SRGB;
+   case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4:
+      return PIPE_FORMAT_ASTC_5x4_SRGB;
+   case MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5:
+      return PIPE_FORMAT_ASTC_5x5_SRGB;
+   case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5:
+      return PIPE_FORMAT_ASTC_6x5_SRGB;
+   case MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6:
+      return PIPE_FORMAT_ASTC_6x6_SRGB;
+   case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5:
+      return PIPE_FORMAT_ASTC_8x5_SRGB;
+   case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6:
+      return PIPE_FORMAT_ASTC_8x6_SRGB;
+   case MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8:
+      return PIPE_FORMAT_ASTC_8x8_SRGB;
+   case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5:
+      return PIPE_FORMAT_ASTC_10x5_SRGB;
+   case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6:
+      return PIPE_FORMAT_ASTC_10x6_SRGB;
+   case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8:
+      return PIPE_FORMAT_ASTC_10x8_SRGB;
+   case MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10:
+      return PIPE_FORMAT_ASTC_10x10_SRGB;
+   case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10:
+      return PIPE_FORMAT_ASTC_12x10_SRGB;
+   case MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12:
+      return PIPE_FORMAT_ASTC_12x12_SRGB;
+
    default:
       return PIPE_FORMAT_NONE;
    }
@@ -883,6 +941,64 @@ st_pipe_format_to_mesa_format(enum pipe_format format)
    case PIPE_FORMAT_ETC2_RG11_SNORM:
       return MESA_FORMAT_ETC2_SIGNED_RG11_EAC;
 
+   case PIPE_FORMAT_ASTC_4x4:
+      return MESA_FORMAT_RGBA_ASTC_4x4;
+   case PIPE_FORMAT_ASTC_5x4:
+      return MESA_FORMAT_RGBA_ASTC_5x4;
+   case PIPE_FORMAT_ASTC_5x5:
+      return MESA_FORMAT_RGBA_ASTC_5x5;
+   case PIPE_FORMAT_ASTC_6x5:
+      return MESA_FORMAT_RGBA_ASTC_6x5;
+   case PIPE_FORMAT_ASTC_6x6:
+      return MESA_FORMAT_RGBA_ASTC_6x6;
+   case PIPE_FORMAT_ASTC_8x5:
+      return MESA_FORMAT_RGBA_ASTC_8x5;
+   case PIPE_FORMAT_ASTC_8x6:
+      return MESA_FORMAT_RGBA_ASTC_8x6;
+   case PIPE_FORMAT_ASTC_8x8:
+      return MESA_FORMAT_RGBA_ASTC_8x8;
+   case PIPE_FORMAT_ASTC_10x5:
+      return MESA_FORMAT_RGBA_ASTC_10x5;
+   case PIPE_FORMAT_ASTC_10x6:
+      return MESA_FORMAT_RGBA_ASTC_10x6;
+   case PIPE_FORMAT_ASTC_10x8:
+      return MESA_FORMAT_RGBA_ASTC_10x8;
+   case PIPE_FORMAT_ASTC_10x10:
+      return MESA_FORMAT_RGBA_ASTC_10x10;
+   case PIPE_FORMAT_ASTC_12x10:
+      return MESA_FORMAT_RGBA_ASTC_12x10;
+   case PIPE_FORMAT_ASTC_12x12:
+      return MESA_FORMAT_RGBA_ASTC_12x12;
+
+   case PIPE_FORMAT_ASTC_4x4_SRGB:
+      return MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4;
+   case PIPE_FORMAT_ASTC_5x4_SRGB:
+      return MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4;
+   case PIPE_FORMAT_ASTC_5x5_SRGB:
+      return MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5;
+   case PIPE_FORMAT_ASTC_6x5_SRGB:
+      return MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5;
+   case PIPE_FORMAT_ASTC_6x6_SRGB:
+      return MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6;
+   case PIPE_FORMAT_ASTC_8x5_SRGB:
+      return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5;
+   case PIPE_FORMAT_ASTC_8x6_SRGB:
+      return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6;
+   case PIPE_FORMAT_ASTC_8x8_SRGB:
+      return MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8;
+   case PIPE_FORMAT_ASTC_10x5_SRGB:
+      return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5;
+   case PIPE_FORMAT_ASTC_10x6_SRGB:
+      return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6;
+   case PIPE_FORMAT_ASTC_10x8_SRGB:
+      return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8;
+   case PIPE_FORMAT_ASTC_10x10_SRGB:
+      return MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10;
+   case PIPE_FORMAT_ASTC_12x10_SRGB:
+      return MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10;
+   case PIPE_FORMAT_ASTC_12x12_SRGB:
+      return MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12;
+
    default:
       return MESA_FORMAT_NONE;
    }
@@ -1386,6 +1502,121 @@ static const struct format_mapping format_map[] = {
       { PIPE_FORMAT_BPTC_RGB_UFLOAT, 0 },
    },
 
+   /* ASTC */
+   {
+      { GL_COMPRESSED_RGBA_ASTC_4x4_KHR, 0 },
+      { PIPE_FORMAT_ASTC_4x4, 0},
+   },
+   {
+      { GL_COMPRESSED_RGBA_ASTC_5x4_KHR, 0 },
+      { PIPE_FORMAT_ASTC_5x4, 0},
+   },
+   {
+      { GL_COMPRESSED_RGBA_ASTC_5x5_KHR, 0 },
+      { PIPE_FORMAT_ASTC_5x5, 0},
+   },
+   {
+      { GL_COMPRESSED_RGBA_ASTC_6x5_KHR, 0 },
+      { PIPE_FORMAT_ASTC_6x5, 0},
+   },
+   {
+      { GL_COMPRESSED_RGBA_ASTC_6x6_KHR, 0 },
+      { PIPE_FORMAT_ASTC_6x6, 0},
+   },
+   {
+      { GL_COMPRESSED_RGBA_ASTC_8x5_KHR, 0 },
+      { PIPE_FORMAT_ASTC_8x5, 0},
+   },
+   {
+      { GL_COMPRESSED_RGBA_ASTC_8x6_KHR, 0 },
+      { PIPE_FORMAT_ASTC_8x6, 0},
+   },
+   {
+      { GL_COMPRESSED_RGBA_ASTC_8x8_KHR, 0 },
+      { PIPE_FORMAT_ASTC_8x8, 0},
+   },
+   {
+      { GL_COMPRESSED_RGBA_ASTC_10x5_KHR, 0 },
+      { PIPE_FORMAT_ASTC_10x5, 0},
+   },
+   {
+      { GL_COMPRESSED_RGBA_ASTC_10x6_KHR, 0 },
+      { PIPE_FORMAT_ASTC_10x6, 0},
+   },
+   {
+      { GL_COMPRESSED_RGBA_ASTC_10x8_KHR, 0 },
+      { PIPE_FORMAT_ASTC_10x8, 0},
+   },
+   {
+      { GL_COMPRESSED_RGBA_ASTC_10x10_KHR, 0 },
+      { PIPE_FORMAT_ASTC_10x10, 0},
+   },
+   {
+      { GL_COMPRESSED_RGBA_ASTC_12x10_KHR, 0 },
+      { PIPE_FORMAT_ASTC_12x10, 0},
+   },
+   {
+      { GL_COMPRESSED_RGBA_ASTC_12x12_KHR, 0 },
+      { PIPE_FORMAT_ASTC_12x12, 0},
+   },
+
+   {
+      { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, 0 },
+      { PIPE_FORMAT_ASTC_4x4_SRGB, 0},
+   },
+   {
+      { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR, 0 },
+      { PIPE_FORMAT_ASTC_5x4_SRGB, 0},
+   },
+   {
+      { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, 0 },
+      { PIPE_FORMAT_ASTC_5x5_SRGB, 0},
+   },
+   {
+      { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR, 0 },
+      { PIPE_FORMAT_ASTC_6x5_SRGB, 0},
+   },
+   {
+      { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, 0 },
+      { PIPE_FORMAT_ASTC_6x6_SRGB, 0},
+   },
+   {
+      { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR, 0 },
+      { PIPE_FORMAT_ASTC_8x5_SRGB, 0},
+   },
+   {
+      { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, 0 },
+      { PIPE_FORMAT_ASTC_8x6_SRGB, 0},
+   },
+   {
+      { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR, 0 },
+      { PIPE_FORMAT_ASTC_8x8_SRGB, 0},
+   },
+   {
+      { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, 0 },
+      { PIPE_FORMAT_ASTC_10x5_SRGB, 0},
+   },
+   {
+      { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR, 0 },
+      { PIPE_FORMAT_ASTC_10x6_SRGB, 0},
+   },
+   {
+      { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, 0 },
+      { PIPE_FORMAT_ASTC_10x8_SRGB, 0},
+   },
+   {
+      { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, 0 },
+      { PIPE_FORMAT_ASTC_10x10_SRGB, 0},
+   },
+   {
+      { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, 0 },
+      { PIPE_FORMAT_ASTC_12x10_SRGB, 0},
+   },
+   {
+      { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, 0 },
+      { PIPE_FORMAT_ASTC_12x12_SRGB, 0},
+   },
+
    /* signed/unsigned integer formats.
     */
    {

From 93905a8df1d982d171ee690f9ec73f90f00fa49f Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 21 Nov 2015 11:49:03 -0500
Subject: [PATCH 309/335] freedreno/a4xx: add astc formats

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../drivers/freedreno/a4xx/fd4_format.c       | 33 +++++++++++++++++++
 .../drivers/freedreno/freedreno_resource.c    |  7 +++-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index dceb3b98c75..dc126b153cf 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -303,6 +303,36 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	_T(LATC1_SNORM, RGTC1_SNORM, NONE, WZYX),
 	_T(LATC2_UNORM, RGTC2_UNORM, NONE, WZYX),
 	_T(LATC2_SNORM, RGTC2_SNORM, NONE, WZYX),
+
+	_T(ASTC_4x4,   ASTC_4x4,   NONE, WZYX),
+	_T(ASTC_5x4,   ASTC_5x4,   NONE, WZYX),
+	_T(ASTC_5x5,   ASTC_5x5,   NONE, WZYX),
+	_T(ASTC_6x5,   ASTC_6x5,   NONE, WZYX),
+	_T(ASTC_6x6,   ASTC_6x6,   NONE, WZYX),
+	_T(ASTC_8x5,   ASTC_8x5,   NONE, WZYX),
+	_T(ASTC_8x6,   ASTC_8x6,   NONE, WZYX),
+	_T(ASTC_8x8,   ASTC_8x8,   NONE, WZYX),
+	_T(ASTC_10x5,  ASTC_10x5,  NONE, WZYX),
+	_T(ASTC_10x6,  ASTC_10x6,  NONE, WZYX),
+	_T(ASTC_10x8,  ASTC_10x8,  NONE, WZYX),
+	_T(ASTC_10x10, ASTC_10x10, NONE, WZYX),
+	_T(ASTC_12x10, ASTC_12x10, NONE, WZYX),
+	_T(ASTC_12x12, ASTC_12x12, NONE, WZYX),
+
+	_T(ASTC_4x4_SRGB,   ASTC_4x4,   NONE, WZYX),
+	_T(ASTC_5x4_SRGB,   ASTC_5x4,   NONE, WZYX),
+	_T(ASTC_5x5_SRGB,   ASTC_5x5,   NONE, WZYX),
+	_T(ASTC_6x5_SRGB,   ASTC_6x5,   NONE, WZYX),
+	_T(ASTC_6x6_SRGB,   ASTC_6x6,   NONE, WZYX),
+	_T(ASTC_8x5_SRGB,   ASTC_8x5,   NONE, WZYX),
+	_T(ASTC_8x6_SRGB,   ASTC_8x6,   NONE, WZYX),
+	_T(ASTC_8x8_SRGB,   ASTC_8x8,   NONE, WZYX),
+	_T(ASTC_10x5_SRGB,  ASTC_10x5,  NONE, WZYX),
+	_T(ASTC_10x6_SRGB,  ASTC_10x6,  NONE, WZYX),
+	_T(ASTC_10x8_SRGB,  ASTC_10x8,  NONE, WZYX),
+	_T(ASTC_10x10_SRGB, ASTC_10x10, NONE, WZYX),
+	_T(ASTC_12x10_SRGB, ASTC_12x10, NONE, WZYX),
+	_T(ASTC_12x12_SRGB, ASTC_12x12, NONE, WZYX),
 };
 
 /* convert pipe format to vertex buffer format: */
@@ -346,6 +376,9 @@ fd4_pipe2fetchsize(enum pipe_format format)
 	if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
 		format = PIPE_FORMAT_Z32_FLOAT;
 
+	if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC)
+		return TFETCH4_16_BYTE;
+
 	switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) {
 	case 8:   return TFETCH4_1_BYTE;
 	case 16:  return TFETCH4_2_BYTE;
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index c8e2779d390..63ca9e30620 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -484,6 +484,7 @@ static uint32_t
 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
 {
 	struct pipe_resource *prsc = &rsc->base.b;
+	enum util_format_layout layout = util_format_description(format)->layout;
 	uint32_t level, size = 0;
 	uint32_t width = prsc->width0;
 	uint32_t height = prsc->height0;
@@ -497,7 +498,11 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format forma
 		struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
 		uint32_t blocks;
 
-		slice->pitch = width = align(width, 32);
+		if (layout == UTIL_FORMAT_LAYOUT_ASTC)
+			slice->pitch = width =
+				util_align_npot(width, 32 * util_format_get_blockwidth(format));
+		else
+			slice->pitch = width = align(width, 32);
 		slice->offset = size;
 		blocks = util_format_get_nblocks(format, width, height);
 		/* 1d array and 2d array textures must all have the same layer size

From f9549d0a0f31f9e63933bb1833e9793ccc19c902 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 21 Nov 2015 19:26:34 -0500
Subject: [PATCH 310/335] freedreno/a4xx: add ARB_texture_rgb10_a2ui support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h   | 1 +
 src/gallium/drivers/freedreno/a4xx/fd4_format.c | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 5a5323042d6..e465334868d 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -165,6 +165,7 @@ enum a4xx_tex_fmt {
 	TFMT4_4_4_4_4_UNORM = 8,
 	TFMT4_X8Z24_UNORM = 71,
 	TFMT4_10_10_10_2_UNORM = 33,
+	TFMT4_10_10_10_2_UINT = 34,
 	TFMT4_A8_UNORM = 3,
 	TFMT4_L8_A8_UNORM = 13,
 	TFMT4_8_UNORM = 4,
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index dc126b153cf..ace5b3f30b0 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -201,8 +201,8 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	_T(B10G10R10X2_UNORM,   10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
 	V_(R10G10B10A2_SNORM,   10_10_10_2_SNORM, NONE,              WZYX),
 	V_(B10G10R10A2_SNORM,   10_10_10_2_SNORM, NONE,              WXYZ),
-	V_(R10G10B10A2_UINT,    10_10_10_2_UINT,  NONE,              WZYX),
-	V_(B10G10R10A2_UINT,    10_10_10_2_UINT,  NONE,              WXYZ),
+	VT(R10G10B10A2_UINT,    10_10_10_2_UINT,  R10G10B10A2_UINT,  WZYX),
+	VT(B10G10R10A2_UINT,    10_10_10_2_UINT,  R10G10B10A2_UINT,  WXYZ),
 	V_(R10G10B10A2_USCALED, 10_10_10_2_UINT,  NONE,              WZYX),
 	V_(B10G10R10A2_USCALED, 10_10_10_2_UINT,  NONE,              WXYZ),
 	V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT,  NONE,              WZYX),

From 1b9992b8038781589ac40db22e44d2da0485cf02 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 21 Nov 2015 20:33:23 -0500
Subject: [PATCH 311/335] freedreno/a4xx: add formats for
 ARB_texture_buffer_object_rgb32 support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h   | 3 +++
 src/gallium/drivers/freedreno/a4xx/fd4_format.c | 7 ++++---
 src/gallium/drivers/freedreno/a4xx/fd4_screen.c | 2 ++
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index e465334868d..a450379e98d 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -204,6 +204,9 @@ enum a4xx_tex_fmt {
 	TFMT4_32_FLOAT = 43,
 	TFMT4_32_32_FLOAT = 56,
 	TFMT4_32_32_32_32_FLOAT = 63,
+	TFMT4_32_32_32_FLOAT = 59,
+	TFMT4_32_32_32_UINT = 60,
+	TFMT4_32_32_32_SINT = 61,
 	TFMT4_9_9_9_E5_FLOAT = 32,
 	TFMT4_11_11_10_FLOAT = 37,
 	TFMT4_DXT1 = 86,
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index ace5b3f30b0..c240745cec1 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -250,11 +250,11 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	_T(L32A32_SINT,    32_32_SINT,  NONE,        WZYX),
 
 	/* 96-bit */
-	V_(R32G32B32_UINT,    32_32_32_UINT,  NONE, WZYX),
-	V_(R32G32B32_SINT,    32_32_32_SINT,  NONE, WZYX),
+	VT(R32G32B32_UINT,    32_32_32_UINT,  NONE, WZYX),
+	VT(R32G32B32_SINT,    32_32_32_SINT,  NONE, WZYX),
 	V_(R32G32B32_USCALED, 32_32_32_UINT,  NONE, WZYX),
 	V_(R32G32B32_SSCALED, 32_32_32_SINT,  NONE, WZYX),
-	V_(R32G32B32_FLOAT,   32_32_32_FLOAT, NONE, WZYX),
+	VT(R32G32B32_FLOAT,   32_32_32_FLOAT, NONE, WZYX),
 	V_(R32G32B32_FIXED,   32_32_32_FIXED, NONE, WZYX),
 
 	/* 128-bit */
@@ -384,6 +384,7 @@ fd4_pipe2fetchsize(enum pipe_format format)
 	case 16:  return TFETCH4_2_BYTE;
 	case 32:  return TFETCH4_4_BYTE;
 	case 64:  return TFETCH4_8_BYTE;
+	case 96:  return TFETCH4_1_BYTE; /* Does this matter? */
 	case 128: return TFETCH4_16_BYTE;
 	default:
 		debug_printf("Unknown block size for format %s: %d\n",
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
index d8ea414f300..b2a69cca56c 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
@@ -57,6 +57,8 @@ fd4_screen_is_format_supported(struct pipe_screen *pscreen,
 	}
 
 	if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+			(target == PIPE_BUFFER ||
+			 util_format_get_blocksize(format) != 12) &&
 			(fd4_pipe2tex(format) != ~0)) {
 		retval |= PIPE_BIND_SAMPLER_VIEW;
 	}

From f10bb0ac9ea558efe7b6dccd673eb2b6604119db Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 21 Nov 2015 21:24:48 -0500
Subject: [PATCH 312/335] freedreno/a4xx: add ARB_texture_view support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c    |  3 ++-
 src/gallium/drivers/freedreno/a4xx/fd4_texture.c | 13 +++++++------
 src/gallium/drivers/freedreno/freedreno_screen.c |  2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index e488450498e..1df0657357e 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -200,7 +200,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 					offset = view->base.u.buf.first_element *
 						util_format_get_blocksize(view->base.format);
 				} else {
-					offset = fd_resource_offset(rsc, start, 0);
+					offset = fd_resource_offset(
+							rsc, start, view->base.u.tex.first_layer);
 				}
 				OUT_RELOC(ring, rsc->bo, offset, view->texconst4, 0);
 			} else {
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index a37c64473bd..5217c8a1dc5 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -212,7 +212,7 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 {
 	struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
 	struct fd_resource *rsc = fd_resource(prsc);
-	unsigned lvl;
+	unsigned lvl, layers;
 	uint32_t sz2 = 0;
 
 	if (!so)
@@ -225,7 +225,7 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 	so->base.context = pctx;
 
 	so->texconst0 =
-		A4XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
+		A4XX_TEX_CONST_0_TYPE(tex_type(cso->target)) |
 		A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(cso->format)) |
 		fd4_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
 				cso->swizzle_b, cso->swizzle_a);
@@ -233,7 +233,7 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 	if (util_format_is_srgb(cso->format))
 		so->texconst0 |= A4XX_TEX_CONST_0_SRGB;
 
-	if (prsc->target == PIPE_BUFFER) {
+	if (cso->target == PIPE_BUFFER) {
 		unsigned elements = cso->u.buf.last_element -
 			cso->u.buf.first_element + 1;
 		lvl = 0;
@@ -248,6 +248,7 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 
 		lvl = fd_sampler_first_level(cso);
 		miplevels = fd_sampler_last_level(cso) - lvl;
+		layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
 
 		so->texconst0 |= A4XX_TEX_CONST_0_MIPLVLS(miplevels);
 		so->texconst1 =
@@ -260,17 +261,17 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 							cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
 	}
 
-	switch (prsc->target) {
+	switch (cso->target) {
 	case PIPE_TEXTURE_1D_ARRAY:
 	case PIPE_TEXTURE_2D_ARRAY:
 		so->texconst3 =
-			A4XX_TEX_CONST_3_DEPTH(prsc->array_size) |
+			A4XX_TEX_CONST_3_DEPTH(layers) |
 			A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size);
 		break;
 	case PIPE_TEXTURE_CUBE:
 	case PIPE_TEXTURE_CUBE_ARRAY:
 		so->texconst3 =
-			A4XX_TEX_CONST_3_DEPTH(prsc->array_size / 6) |
+			A4XX_TEX_CONST_3_DEPTH(layers / 6) |
 			A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size);
 		break;
 	case PIPE_TEXTURE_3D:
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 9e51c4e3c4b..da7dbc91eb0 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -197,6 +197,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
 	case PIPE_CAP_CUBE_MAP_ARRAY:
 	case PIPE_CAP_START_INSTANCE:
+	case PIPE_CAP_SAMPLER_VIEW_TARGET:
 		return is_a4xx(screen);
 
 	case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
@@ -226,7 +227,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
 	case PIPE_CAP_DRAW_INDIRECT:
 	case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
-	case PIPE_CAP_SAMPLER_VIEW_TARGET:
 	case PIPE_CAP_POLYGON_OFFSET_CLAMP:
 	case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
 	case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:

From 6f17f19b17d6150788e713f92f6a05ef410c4060 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 22 Nov 2015 12:13:46 -0500
Subject: [PATCH 313/335] freedreno/a4xx: only compute texture offset once for
 the view

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c    | 15 ++-------------
 src/gallium/drivers/freedreno/a4xx/fd4_texture.c |  3 +++
 src/gallium/drivers/freedreno/a4xx/fd4_texture.h |  1 +
 3 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 1df0657357e..0e4a8cad8c7 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -181,9 +181,7 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
 				CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
 		for (i = 0; i < tex->num_textures; i++) {
-			static const struct fd4_pipe_sampler_view dummy_view = {
-				.base.target = PIPE_TEXTURE_1D,
-			};
+			static const struct fd4_pipe_sampler_view dummy_view = {};
 			const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
 					fd4_pipe_sampler_view(tex->textures[i]) :
 					&dummy_view;
@@ -194,16 +192,7 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 			OUT_RING(ring, view->texconst3);
 			if (view->base.texture) {
 				struct fd_resource *rsc = fd_resource(view->base.texture);
-				unsigned start = fd_sampler_first_level(&view->base);
-				uint32_t offset;
-				if (rsc->base.b.target == PIPE_BUFFER) {
-					offset = view->base.u.buf.first_element *
-						util_format_get_blocksize(view->base.format);
-				} else {
-					offset = fd_resource_offset(
-							rsc, start, view->base.u.tex.first_layer);
-				}
-				OUT_RELOC(ring, rsc->bo, offset, view->texconst4, 0);
+				OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
 			} else {
 				OUT_RING(ring, 0x00000000);
 			}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index 5217c8a1dc5..0eba75577b0 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -243,6 +243,8 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 		so->texconst2 =
 			A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
 			A4XX_TEX_CONST_2_PITCH(elements * rsc->cpp);
+		so->offset = cso->u.buf.first_element *
+			util_format_get_blocksize(cso->format);
 	} else {
 		unsigned miplevels;
 
@@ -259,6 +261,7 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 			A4XX_TEX_CONST_2_PITCH(
 					util_format_get_nblocksx(
 							cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
+		so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
 	}
 
 	switch (cso->target) {
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
index d74d88701a8..6ca34ade60d 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
@@ -52,6 +52,7 @@ fd4_sampler_stateobj(struct pipe_sampler_state *samp)
 struct fd4_pipe_sampler_view {
 	struct pipe_sampler_view base;
 	uint32_t texconst0, texconst1, texconst2, texconst3, texconst4;
+	uint32_t offset;
 };
 
 static inline struct fd4_pipe_sampler_view *

From 81b16350fa2e7c1b47d976be12d2313283f22e24 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 22 Nov 2015 14:03:29 -0500
Subject: [PATCH 314/335] freedreno/a4xx: use a factor of 32767 for snorm8
 blending

It appears that the hardware wants the integer to be scaled the same way
that the hardware representation is. snorm16 uses one of the float
factors, so this is only relevant for snorm8.

This fixes a number of subcases of
  bin/fbo-blending-formats GL_EXT_texture_snorm

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 39 ++++++++++++++++---
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 0e4a8cad8c7..69f263e3ba8 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -662,19 +662,48 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 				A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
 	}
 
-	if (dirty & FD_DIRTY_BLEND_COLOR) {
+	if (dirty & (FD_DIRTY_BLEND_COLOR | FD_DIRTY_FRAMEBUFFER)) {
 		struct pipe_blend_color *bcolor = &ctx->blend_color;
+		struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+		float factor = 65535.0;
+		int i;
+
+		for (i = 0; i < pfb->nr_cbufs; i++) {
+			enum pipe_format format = pipe_surface_format(pfb->cbufs[i]);
+			const struct util_format_description *desc =
+				util_format_description(format);
+			int j;
+
+			if (desc->is_mixed)
+				continue;
+
+			j = util_format_get_first_non_void_channel(format);
+			if (j == -1)
+				continue;
+
+			if (desc->channel[j].size > 8 || !desc->channel[j].normalized ||
+				desc->channel[j].pure_integer)
+				continue;
+
+			/* Just use the first unorm8/snorm8 render buffer. Can't keep
+			 * everyone happy.
+			 */
+			if (desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED)
+				factor = 32767.0;
+			break;
+		}
+
 		OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8);
-		OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 65535.0) |
+		OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * factor) |
 				A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]));
 		OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0]));
-		OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 65535.0) |
+		OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * factor) |
 				A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]));
 		OUT_RING(ring, A4XX_RB_BLEND_GREEN_F32(bcolor->color[1]));
-		OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 65535.0) |
+		OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * factor) |
 				A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]));
 		OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
-		OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 65535.0) |
+		OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * factor) |
 				A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
 		OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
 	}

From 9761d5146fa76dbb03da0ba94beba4c249f061d1 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 22 Nov 2015 14:06:26 -0500
Subject: [PATCH 315/335] freedreno/a4xx: re-emit program on dirty framebuffer

The program emit depends on certain fb details. Make sure those get
updated when the fb changes.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 69f263e3ba8..f220fc7ac1f 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -613,7 +613,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
 	}
 
-	if (dirty & FD_DIRTY_PROG) {
+	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
 		struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
 		fd4_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs);
 	}

From f0e670bdd7e7084c4c21197770aa95cf0ab8139a Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 22 Nov 2015 16:41:16 -0500
Subject: [PATCH 316/335] ttn: add LODQ support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index ce1da102777..e2b1040e636 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -1280,6 +1280,10 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
       num_srcs = 3;
       samp = 3;
       break;
+   case TGSI_OPCODE_LODQ:
+      op = nir_texop_lod;
+      num_srcs = 1;
+      break;
 
    default:
       fprintf(stderr, "unknown TGSI tex op %d\n", tgsi_inst->Instruction.Opcode);
@@ -1332,7 +1336,9 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
     */
    sview = instr->sampler_index;
 
-   if (sview < c->num_samp_types) {
+   if (op == nir_texop_lod) {
+      instr->dest_type = nir_type_float;
+   } else if (sview < c->num_samp_types) {
       instr->dest_type = c->samp_types[sview];
    } else {
       instr->dest_type = nir_type_float;
@@ -1648,7 +1654,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
    [TGSI_OPCODE_UMUL_HI] = nir_op_umul_high,
 
    [TGSI_OPCODE_TG4] = 0,
-   [TGSI_OPCODE_LODQ] = 0, /* XXX */
+   [TGSI_OPCODE_LODQ] = 0,
 
    [TGSI_OPCODE_IBFE] = nir_op_ibitfield_extract,
    [TGSI_OPCODE_UBFE] = nir_op_ubitfield_extract,
@@ -1816,6 +1822,7 @@ ttn_emit_instruction(struct ttn_compile *c)
    case TGSI_OPCODE_TXQ_LZ:
    case TGSI_OPCODE_TXF:
    case TGSI_OPCODE_TG4:
+   case TGSI_OPCODE_LODQ:
       ttn_tex(c, dest, src);
       break;
 

From 190acb34ca165c840f87a25149eab1d1b7dc85c4 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 22 Nov 2015 16:47:25 -0500
Subject: [PATCH 317/335] freedreno/a4xx: add ARB_texture_query_lod support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../drivers/freedreno/freedreno_screen.c      |  2 +-
 .../drivers/freedreno/ir3/ir3_compiler_nir.c  | 24 +++++++++++++++----
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index da7dbc91eb0..5bbe4016a2a 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -198,6 +198,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_CUBE_MAP_ARRAY:
 	case PIPE_CAP_START_INSTANCE:
 	case PIPE_CAP_SAMPLER_VIEW_TARGET:
+	case PIPE_CAP_TEXTURE_QUERY_LOD:
 		return is_a4xx(screen);
 
 	case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
@@ -221,7 +222,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
 	case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
 	case PIPE_CAP_TEXTURE_GATHER_SM5:
-	case PIPE_CAP_TEXTURE_QUERY_LOD:
 	case PIPE_CAP_SAMPLE_SHADING:
 	case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
 	case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 25e84121d0c..fc163b49975 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1547,10 +1547,10 @@ tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp)
 		unreachable("bad sampler_dim");
 	}
 
-	if (tex->is_shadow)
+	if (tex->is_shadow && tex->op != nir_texop_lod)
 		flags |= IR3_INSTR_S;
 
-	if (tex->is_array)
+	if (tex->is_array && tex->op != nir_texop_lod)
 		flags |= IR3_INSTR_A;
 
 	*flagsp = flags;
@@ -1618,9 +1618,9 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
 	case nir_texop_txl:      opc = OPC_SAML;     break;
 	case nir_texop_txd:      opc = OPC_SAMGQ;    break;
 	case nir_texop_txf:      opc = OPC_ISAML;    break;
+	case nir_texop_lod:      opc = OPC_GETLOD;   break;
 	case nir_texop_txf_ms:
 	case nir_texop_txs:
-	case nir_texop_lod:
 	case nir_texop_tg4:
 	case nir_texop_query_levels:
 	case nir_texop_texture_samples:
@@ -1666,10 +1666,10 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
 		src0[nsrc0++] = create_immed(b, fui(0.5));
 	}
 
-	if (tex->is_shadow)
+	if (tex->is_shadow && tex->op != nir_texop_lod)
 		src0[nsrc0++] = compare;
 
-	if (tex->is_array)
+	if (tex->is_array && tex->op != nir_texop_lod)
 		src0[nsrc0++] = coord[coords];
 
 	if (has_proj) {
@@ -1726,12 +1726,26 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
 		unreachable("bad dest_type");
 	}
 
+	if (opc == OPC_GETLOD)
+		type = TYPE_U32;
+
 	sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW,
 			flags, tex->sampler_index, tex->sampler_index,
 			create_collect(b, src0, nsrc0),
 			create_collect(b, src1, nsrc1));
 
 	split_dest(b, dst, sam, 4);
+
+	/* GETLOD returns results in 4.8 fixed point */
+	if (opc == OPC_GETLOD) {
+		struct ir3_instruction *factor = create_immed(b, fui(1.0 / 256));
+
+		compile_assert(ctx, tex->dest_type == nir_type_float);
+		for (i = 0; i < 2; i++) {
+			dst[i] = ir3_MUL_F(b, ir3_COV(b, dst[i], TYPE_U32, TYPE_F32), 0,
+							   factor, 0);
+		}
+	}
 }
 
 static void

From cca8dd4e938e6c2bd0a28564347aa69211529e1b Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 22 Nov 2015 17:37:47 -0500
Subject: [PATCH 318/335] ttn: fix UMSB conversion

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index e2b1040e636..86c2ffadbc8 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -1663,7 +1663,7 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
    [TGSI_OPCODE_POPC] = nir_op_bit_count,
    [TGSI_OPCODE_LSB] = nir_op_find_lsb,
    [TGSI_OPCODE_IMSB] = nir_op_ifind_msb,
-   [TGSI_OPCODE_UMSB] = nir_op_ifind_msb, /* XXX: signed vs unsigned */
+   [TGSI_OPCODE_UMSB] = nir_op_ufind_msb,
 
    [TGSI_OPCODE_INTERP_CENTROID] = 0, /* XXX */
    [TGSI_OPCODE_INTERP_SAMPLE] = 0, /* XXX */

From 754b26e76dd7738a3dc5d6be8eb5a6d7b256ee1a Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 22 Nov 2015 17:46:13 -0500
Subject: [PATCH 319/335] freedreno/ir3: add support for a few gs5 ops

Tested on a4xx. This is part of the builtins added by ARB_gpu_shader5
and GLSL ES 3.10.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../drivers/freedreno/ir3/ir3_compiler_nir.c  | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index fc163b49975..156bb0be247 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1177,6 +1177,33 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu)
 		dst[0] = ir3_SEL_B32(b, src[1], 0, ir3_b2n(b, src[0]), 0, src[2], 0);
 		break;
 
+	case nir_op_bit_count:
+		dst[0] = ir3_CBITS_B(b, src[0], 0);
+		break;
+	case nir_op_ifind_msb: {
+		struct ir3_instruction *cmp;
+		dst[0] = ir3_CLZ_S(b, src[0], 0);
+		cmp = ir3_CMPS_S(b, dst[0], 0, create_immed(b, 0), 0);
+		cmp->cat2.condition = IR3_COND_GE;
+		dst[0] = ir3_SEL_B32(b,
+				ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0,
+				cmp, 0, dst[0], 0);
+		break;
+	}
+	case nir_op_ufind_msb:
+		dst[0] = ir3_CLZ_B(b, src[0], 0);
+		dst[0] = ir3_SEL_B32(b,
+				ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0,
+				src[0], 0, dst[0], 0);
+		break;
+	case nir_op_find_lsb:
+		dst[0] = ir3_BFREV_B(b, src[0], 0);
+		dst[0] = ir3_CLZ_B(b, dst[0], 0);
+		break;
+	case nir_op_bitfield_reverse:
+		dst[0] = ir3_BFREV_B(b, src[0], 0);
+		break;
+
 	default:
 		compile_error(ctx, "Unhandled ALU op: %s\n",
 				nir_op_infos[alu->op].name);

From c9651f0264cc4189d62c3bda2a5effadeb2974e2 Mon Sep 17 00:00:00 2001
From: Jose Fonseca <jfonseca@vmware.com>
Date: Mon, 23 Nov 2015 16:45:28 +0000
Subject: [PATCH 320/335] svga: Add ASTC formats to format table.

Fixes build.  Otherwise untested.

Trivial.
---
 src/gallium/drivers/svga/svga_format.c | 28 ++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/src/gallium/drivers/svga/svga_format.c b/src/gallium/drivers/svga/svga_format.c
index 0104e8a273a..2b549dfa5bb 100644
--- a/src/gallium/drivers/svga/svga_format.c
+++ b/src/gallium/drivers/svga/svga_format.c
@@ -326,6 +326,34 @@ static const struct vgpu10_format_entry format_conversion_table[] =
    { PIPE_FORMAT_ETC2_R11_SNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
    { PIPE_FORMAT_ETC2_RG11_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
    { PIPE_FORMAT_ETC2_RG11_SNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_4x4,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_5x4,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_5x5,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_6x5,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_6x6,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_8x5,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_8x6,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_8x8,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_10x5,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_10x6,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_10x8,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_10x10,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_12x10,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_12x12,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_4x4_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_5x4_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_5x5_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_6x5_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_6x6_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_8x5_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_8x6_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_8x8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_10x5_SRGB,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_10x6_SRGB,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_10x8_SRGB,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_10x10_SRGB,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_12x10_SRGB,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ASTC_12x12_SRGB,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
 };
 
 

From 33dc9aac07537378e88ec85d8900ff280a81ffca Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Mon, 23 Nov 2015 12:04:50 -0500
Subject: [PATCH 321/335] docs: update relnotes with new freedreno/a4xx support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 docs/relnotes/11.2.0.html | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/docs/relnotes/11.2.0.html b/docs/relnotes/11.2.0.html
index 8e00d16d64b..20f4543a562 100644
--- a/docs/relnotes/11.2.0.html
+++ b/docs/relnotes/11.2.0.html
@@ -44,7 +44,14 @@ Note: some of the new features are only available with certain drivers.
 </p>
 
 <ul>
-TBD.
+<li>GL_ARB_base_instance on freedreno/a4xx</li>
+<li>GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx</li>
+<li>GL_ARB_texture_buffer_range on freedreno/a4xx</li>
+<li>GL_ARB_texture_query_lod on freedreno/a4xx</li>
+<li>GL_ARB_texture_rgb10_a2ui on freedreno/a4xx</li>
+<li>GL_ARB_texture_view on freedreno/a4xx</li>
+<li>GL_ARB_vertex_type_10f_11f_11f_rev on freedreno/a4xx</li>
+<li>GL_KHR_texture_compression_astc_ldr on freedreno/a4xx</li>
 </ul>
 
 <h2>Bug fixes</h2>

From e4c1221d367405debdb010b249c633355a3eafe9 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Mon, 23 Nov 2015 12:31:59 -0500
Subject: [PATCH 322/335] docs: add missed freedreno features to relnotes

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: "11.1" <mesa-stable@lists.freedesktop.org>
---
 docs/relnotes/11.1.0.html | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 3c1150a6d2b..77df804d901 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -51,14 +51,19 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_ARB_arrays_of_arrays on i965</li>
 <li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
 <li>GL_ARB_clear_texture on nv50, nvc0</li>
+<li>GL_ARB_clip_control on freedreno/a4xx</li>
 <li>GL_ARB_copy_image on nv50, nvc0, radeonsi</li>
+<li>GL_ARB_depth_clamp on freedreno/a4xx</li>
 <li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
 <li>GL_ARB_gpu_shader5 on r600 for Evergreen and later chips</li>
+<li>GL_ARB_seamless_cubemap_per_texture on freedreno/a4xx</li>
 <li>GL_ARB_shader_clock on i965 (gen7+)</li>
 <li>GL_ARB_shader_stencil_export on i965 (gen9+)</li>
 <li>GL_ARB_shader_storage_buffer_object on i965</li>
 <li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
 <li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
+<li>GL_ARB_texture_buffer_range on freedreno/a3xx</li>
+<li>GL_ARB_texture_compression_bptc on freedreno/a4xx</li>
 <li>GL_ARB_texture_query_lod on softpipe</li>
 <li>GL_ARB_texture_view on radeonsi and r600 (for evergeen and newer)</li>
 <li>GL_ARB_vertex_type_2_10_10_10_rev on freedreno (a3xx, a4xx)</li>

From 9d703de85a7bdd6ae767aedd7690cb82794cb90a Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Mon, 23 Nov 2015 10:53:01 -0800
Subject: [PATCH 323/335] i965: Use ull immediates in brw_inst_bits

This fixes a regression introduced in b1a83b5d1 that caused basically all
shaders to fail to compile on 32-bit platforms.

Reported-by: Mark Janes <mark.a.janes@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_inst.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_inst.h b/src/mesa/drivers/dri/i965/brw_inst.h
index b2afe17f950..cd9f6ef591d 100644
--- a/src/mesa/drivers/dri/i965/brw_inst.h
+++ b/src/mesa/drivers/dri/i965/brw_inst.h
@@ -694,7 +694,7 @@ brw_inst_bits(const brw_inst *inst, unsigned high, unsigned low)
    high %= 64;
    low %= 64;
 
-   const uint64_t mask = (~0ul >> (64 - (high - low + 1)));
+   const uint64_t mask = (~0ull >> (64 - (high - low + 1)));
 
    return (inst->data[word] >> low) & mask;
 }
@@ -713,7 +713,7 @@ brw_inst_set_bits(brw_inst *inst, unsigned high, unsigned low, uint64_t value)
    high %= 64;
    low %= 64;
 
-   const uint64_t mask = (~0ul >> (64 - (high - low + 1))) << low;
+   const uint64_t mask = (~0ull >> (64 - (high - low + 1))) << low;
 
    /* Make sure the supplied value actually fits in the given bitfield. */
    assert((value & (mask >> low)) == value);

From 9cf108193b61c342c94c4cd980c4b403638e1051 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 11 Nov 2015 09:40:51 -0800
Subject: [PATCH 324/335] i965/nir: Split shader optimization and lowering into
 three stages

At the moment, brw_create_nir just calls the three stages in sequence so
there's not much difference.  Soon, however, we will want to start doing
variants in NIR at which point the postprocessing step will have to move
from shader create time to codegen time.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/mesa/drivers/dri/i965/brw_nir.c | 133 ++++++++++++++++++++--------
 src/mesa/drivers/dri/i965/brw_nir.h |   9 ++
 2 files changed, 104 insertions(+), 38 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 973a20c0b4e..b9d523dd0b6 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -238,43 +238,33 @@ nir_optimize(nir_shader *nir, bool is_scalar)
    return nir;
 }
 
+/* Does some simple lowering and runs the standard suite of optimizations
+ *
+ * This is intended to be called more-or-less directly after you get the
+ * shader out of GLSL or some other source.  While it is geared towards i965,
+ * it is not at all generator-specific except for the is_scalar flag.  Even
+ * there, it is safe to call with is_scalar = false for a shader that is
+ * intended for the FS backend as long as nir_optimize is called again with
+ * is_scalar = true to scalarize everything prior to code gen.
+ */
 nir_shader *
-brw_create_nir(struct brw_context *brw,
-               const struct gl_shader_program *shader_prog,
-               const struct gl_program *prog,
-               gl_shader_stage stage,
-               bool is_scalar)
+brw_preprocess_nir(nir_shader *nir, bool is_scalar)
 {
-   struct gl_context *ctx = &brw->ctx;
-   const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
-   const nir_shader_compiler_options *options =
-      ctx->Const.ShaderCompilerOptions[stage].NirOptions;
+   bool progress; /* Written by OPT and OPT_V */
+   (void)progress;
+
+   if (nir->stage == MESA_SHADER_GEOMETRY)
+      OPT(nir_lower_gs_intrinsics);
+
    static const nir_lower_tex_options tex_options = {
       .lower_txp = ~0,
    };
-   bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
-   bool progress = false;
-   nir_shader *nir;
-
-   /* First, lower the GLSL IR or Mesa IR to NIR */
-   if (shader_prog) {
-      nir = glsl_to_nir(shader_prog, stage, options);
-   } else {
-      nir = prog_to_nir(prog, options);
-      OPT_V(nir_convert_to_ssa); /* turn registers into SSA */
-   }
-   nir_validate_shader(nir);
-
-   if (stage == MESA_SHADER_GEOMETRY) {
-      OPT(nir_lower_gs_intrinsics);
-   }
-
-   OPT(nir_lower_global_vars_to_local);
 
    OPT_V(nir_lower_tex, &tex_options);
-
    OPT(nir_normalize_cubemap_coords);
 
+   OPT(nir_lower_global_vars_to_local);
+
    OPT(nir_split_var_copies);
 
    nir = nir_optimize(nir, is_scalar);
@@ -285,6 +275,27 @@ brw_create_nir(struct brw_context *brw,
    /* Get rid of split copies */
    nir = nir_optimize(nir, is_scalar);
 
+   OPT(nir_remove_dead_variables);
+
+   return nir;
+}
+
+/* Lowers inputs, outputs, uniforms, and samplers for i965
+ *
+ * This function does all of the standard lowering prior to post-processing.
+ * The lowering done is highly gen, stage, and backend-specific.  The
+ * shader_prog parameter is optional and is used only for lowering sampler
+ * derefs and atomics for GLSL shaders.
+ */
+nir_shader *
+brw_lower_nir(nir_shader *nir,
+              const struct brw_device_info *devinfo,
+              const struct gl_shader_program *shader_prog,
+              bool is_scalar)
+{
+   bool progress; /* Written by OPT and OPT_V */
+   (void)progress;
+
    OPT_V(brw_nir_lower_inputs, devinfo, is_scalar);
    OPT_V(brw_nir_lower_outputs, is_scalar);
    nir_assign_var_locations(&nir->uniforms,
@@ -292,8 +303,6 @@ brw_create_nir(struct brw_context *brw,
                             is_scalar ? type_size_scalar : type_size_vec4);
    OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4);
 
-   OPT(nir_remove_dead_variables);
-
    if (shader_prog) {
       OPT_V(nir_lower_samplers, shader_prog);
    }
@@ -304,9 +313,28 @@ brw_create_nir(struct brw_context *brw,
       OPT_V(nir_lower_atomics, shader_prog);
    }
 
-   nir = nir_optimize(nir, is_scalar);
+   return nir_optimize(nir, is_scalar);
+}
 
-   if (brw->gen >= 6) {
+/* Prepare the given shader for codegen
+ *
+ * This function is intended to be called right before going into the actual
+ * backend and is highly backend-specific.  Also, once this function has been
+ * called on a shader, it will no longer be in SSA form so most optimizations
+ * will not work.
+ */
+nir_shader *
+brw_postprocess_nir(nir_shader *nir,
+                    const struct brw_device_info *devinfo,
+                    bool is_scalar)
+{
+   bool debug_enabled =
+      (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage));
+
+   bool progress; /* Written by OPT and OPT_V */
+   (void)progress;
+
+   if (devinfo->gen >= 6) {
       /* Try and fuse multiply-adds */
       OPT(brw_nir_opt_peephole_ffma);
    }
@@ -327,7 +355,7 @@ brw_create_nir(struct brw_context *brw,
       }
 
       fprintf(stderr, "NIR (SSA form) for %s shader:\n",
-              _mesa_shader_stage_to_string(stage));
+              _mesa_shader_stage_to_string(nir->stage));
       nir_print_shader(nir, stderr);
    }
 
@@ -338,28 +366,57 @@ brw_create_nir(struct brw_context *brw,
       OPT(nir_lower_vec_to_movs);
    }
 
-   /* Needed only so that OPT and OPT_V can set it */
-   (void)progress;
-
    /* This is the last pass we run before we start emitting stuff.  It
     * determines when we need to insert boolean resolves on Gen <= 5.  We
     * run it last because it stashes data in instr->pass_flags and we don't
     * want that to be squashed by other NIR passes.
     */
-   if (brw->gen <= 5)
+   if (devinfo->gen <= 5)
       brw_nir_analyze_boolean_resolves(nir);
 
    nir_sweep(nir);
 
    if (unlikely(debug_enabled)) {
       fprintf(stderr, "NIR (final form) for %s shader:\n",
-              _mesa_shader_stage_to_string(stage));
+              _mesa_shader_stage_to_string(nir->stage));
       nir_print_shader(nir, stderr);
    }
 
    return nir;
 }
 
+nir_shader *
+brw_create_nir(struct brw_context *brw,
+               const struct gl_shader_program *shader_prog,
+               const struct gl_program *prog,
+               gl_shader_stage stage,
+               bool is_scalar)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
+   const nir_shader_compiler_options *options =
+      ctx->Const.ShaderCompilerOptions[stage].NirOptions;
+   bool progress;
+   nir_shader *nir;
+
+   /* First, lower the GLSL IR or Mesa IR to NIR */
+   if (shader_prog) {
+      nir = glsl_to_nir(shader_prog, stage, options);
+   } else {
+      nir = prog_to_nir(prog, options);
+      OPT_V(nir_convert_to_ssa); /* turn registers into SSA */
+   }
+   nir_validate_shader(nir);
+
+   (void)progress;
+
+   nir = brw_preprocess_nir(nir, is_scalar);
+   nir = brw_lower_nir(nir, devinfo, shader_prog, is_scalar);
+   nir = brw_postprocess_nir(nir, devinfo, is_scalar);
+
+   return nir;
+}
+
 enum brw_reg_type
 brw_type_for_nir_type(nir_alu_type type)
 {
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
index e7c93684fb3..baf2f137672 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -81,6 +81,15 @@ nir_shader *brw_create_nir(struct brw_context *brw,
                            gl_shader_stage stage,
                            bool is_scalar);
 
+nir_shader *brw_preprocess_nir(nir_shader *nir, bool is_scalar);
+nir_shader *brw_lower_nir(nir_shader *nir,
+                          const struct brw_device_info *devinfo,
+                          const struct gl_shader_program *shader_prog,
+                          bool is_scalar);
+nir_shader *brw_postprocess_nir(nir_shader *nir,
+                                const struct brw_device_info *devinfo,
+                                bool is_scalar);
+
 enum brw_reg_type brw_type_for_nir_type(nir_alu_type type);
 
 enum glsl_base_type brw_glsl_base_type_for_nir_type(nir_alu_type type);

From ce767bbdfff7c2a7829b652c111a11eb9ddba026 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 11 Nov 2015 10:04:43 -0800
Subject: [PATCH 325/335] i965: Move postprocess_nir to codegen time

This allows us to insert NIR passes between initial NIR compilation and
optimization (link time) and actual backend code-gen.  In particular, it
will allow us to do shader variants in NIR and share some of that shader
variant code between backends.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp              | 11 +++++++++--
 src/mesa/drivers/dri/i965/brw_nir.c               |  1 -
 src/mesa/drivers/dri/i965/brw_vec4.cpp            |  6 +++++-
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |  7 ++++++-
 4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 7376f951fa8..e9e3d4dfe81 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -43,6 +43,7 @@
 #include "brw_wm.h"
 #include "brw_fs.h"
 #include "brw_cs.h"
+#include "brw_nir.h"
 #include "brw_vec4_gs_visitor.h"
 #include "brw_cfg.h"
 #include "brw_dead_control_flow.h"
@@ -5430,13 +5431,16 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
                void *mem_ctx,
                const struct brw_wm_prog_key *key,
                struct brw_wm_prog_data *prog_data,
-               const nir_shader *shader,
+               const nir_shader *src_shader,
                struct gl_program *prog,
                int shader_time_index8, int shader_time_index16,
                bool use_rep_send,
                unsigned *final_assembly_size,
                char **error_str)
 {
+   nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+   shader = brw_postprocess_nir(shader, compiler->devinfo, true);
+
    /* key->alpha_test_func means simulating alpha testing via discards,
     * so the shader definitely kills pixels.
     */
@@ -5589,11 +5593,14 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
                void *mem_ctx,
                const struct brw_cs_prog_key *key,
                struct brw_cs_prog_data *prog_data,
-               const nir_shader *shader,
+               const nir_shader *src_shader,
                int shader_time_index,
                unsigned *final_assembly_size,
                char **error_str)
 {
+   nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+   shader = brw_postprocess_nir(shader, compiler->devinfo, true);
+
    prog_data->local_size[0] = shader->info.cs.local_size[0];
    prog_data->local_size[1] = shader->info.cs.local_size[1];
    prog_data->local_size[2] = shader->info.cs.local_size[2];
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index b9d523dd0b6..16969530577 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -412,7 +412,6 @@ brw_create_nir(struct brw_context *brw,
 
    nir = brw_preprocess_nir(nir, is_scalar);
    nir = brw_lower_nir(nir, devinfo, shader_prog, is_scalar);
-   nir = brw_postprocess_nir(nir, devinfo, is_scalar);
 
    return nir;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 44893e3e593..bf40a583ea8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1931,13 +1931,17 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
                void *mem_ctx,
                const struct brw_vs_prog_key *key,
                struct brw_vs_prog_data *prog_data,
-               const nir_shader *shader,
+               const nir_shader *src_shader,
                gl_clip_plane *clip_planes,
                bool use_legacy_snorm_formula,
                int shader_time_index,
                unsigned *final_assembly_size,
                char **error_str)
 {
+   nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+   shader = brw_postprocess_nir(shader, compiler->devinfo,
+                                compiler->scalar_stage[MESA_SHADER_VERTEX]);
+
    const unsigned *assembly = NULL;
 
    unsigned nr_attributes = _mesa_bitcount_64(prog_data->inputs_read);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 89e49964fa2..7174ee94067 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -30,6 +30,7 @@
 #include "brw_vec4_gs_visitor.h"
 #include "gen6_gs_visitor.h"
 #include "brw_fs.h"
+#include "brw_nir.h"
 
 namespace brw {
 
@@ -606,7 +607,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
                void *mem_ctx,
                const struct brw_gs_prog_key *key,
                struct brw_gs_prog_data *prog_data,
-               const nir_shader *shader,
+               const nir_shader *src_shader,
                struct gl_shader_program *shader_prog,
                int shader_time_index,
                unsigned *final_assembly_size,
@@ -616,6 +617,10 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
    memset(&c, 0, sizeof(c));
    c.key = *key;
 
+   nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+   shader = brw_postprocess_nir(shader, compiler->devinfo,
+                                compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
+
    prog_data->include_primitive_id =
       (shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) != 0;
 

From 1417f6a216b46dbbaa1bfe0cef97e2b4a48224c0 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 11 Nov 2015 10:46:09 -0800
Subject: [PATCH 326/335] nir/lower_tex: Report progress

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/glsl/nir/nir.h                  |  2 +-
 src/glsl/nir/nir_lower_tex.c        | 19 +++++++++++++++----
 src/mesa/drivers/dri/i965/brw_nir.c |  2 +-
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index b4be145e5ec..28c85459021 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1994,7 +1994,7 @@ typedef struct nir_lower_tex_options {
    unsigned saturate_r;
 } nir_lower_tex_options;
 
-void nir_lower_tex(nir_shader *shader,
+bool nir_lower_tex(nir_shader *shader,
                    const nir_lower_tex_options *options);
 
 void nir_lower_idiv(nir_shader *shader);
diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c
index 8aaa48ab568..21ed1032a0b 100644
--- a/src/glsl/nir/nir_lower_tex.c
+++ b/src/glsl/nir/nir_lower_tex.c
@@ -41,6 +41,7 @@
 typedef struct {
    nir_builder b;
    const nir_lower_tex_options *options;
+   bool progress;
 } lower_tex_state;
 
 static void
@@ -239,15 +240,21 @@ nir_lower_tex_block(nir_block *block, void *void_state)
       /* If we are clamping any coords, we must lower projector first
        * as clamping happens *after* projection:
        */
-      if (lower_txp || sat_mask)
+      if (lower_txp || sat_mask) {
          project_src(b, tex);
+         state->progress = true;
+      }
 
       if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) &&
-          state->options->lower_rect)
+          state->options->lower_rect) {
          lower_rect(b, tex);
+         state->progress = true;
+      }
 
-      if (sat_mask)
+      if (sat_mask) {
          saturate_src(b, tex, sat_mask);
+         state->progress = true;
+      }
    }
 
    return true;
@@ -264,13 +271,17 @@ nir_lower_tex_impl(nir_function_impl *impl, lower_tex_state *state)
                                nir_metadata_dominance);
 }
 
-void
+bool
 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
 {
    lower_tex_state state;
    state.options = options;
+   state.progress = false;
+
    nir_foreach_overload(shader, overload) {
       if (overload->impl)
          nir_lower_tex_impl(overload->impl, &state);
    }
+
+   return state.progress;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 16969530577..62f3171329c 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -260,7 +260,7 @@ brw_preprocess_nir(nir_shader *nir, bool is_scalar)
       .lower_txp = ~0,
    };
 
-   OPT_V(nir_lower_tex, &tex_options);
+   OPT(nir_lower_tex, &tex_options);
    OPT(nir_normalize_cubemap_coords);
 
    OPT(nir_lower_global_vars_to_local);

From 042fa75e48118e4d7643e1f42b5a983df1274acb Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 11 Nov 2015 12:01:20 -0800
Subject: [PATCH 327/335] nir/lower_tex: Set the dest_type for txs instructions

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/glsl/nir/nir_lower_tex.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c
index 21ed1032a0b..6dea8377c28 100644
--- a/src/glsl/nir/nir_lower_tex.c
+++ b/src/glsl/nir/nir_lower_tex.c
@@ -134,6 +134,7 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex)
    txs->op = nir_texop_txs;
    txs->sampler_dim = GLSL_SAMPLER_DIM_RECT;
    txs->sampler_index = tex->sampler_index;
+   txs->dest_type = nir_type_int;
 
    /* only single src, the lod: */
    txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));

From d065a93a3f103a8e4fc9c56971c2a4ae195d611f Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 11 Nov 2015 15:46:55 -0800
Subject: [PATCH 328/335] i965/fs: Stomp the texture return type to UINT32 for
 resinfo messages

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 77969c4dc12..971190fac6a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -699,6 +699,17 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
       break;
    }
 
+   /* Stomp the resinfo output type to UINT32.  On gens 4-5, the output type
+    * is set as part of the message descriptor.  On gen4, the PRM seems to
+    * allow UINT32 and FLOAT32 (i965 PRM, Vol. 4 Section 4.8.1.1), but on
+    * later gens UINT32 is required.  Once you hit Sandy Bridge, the bit is
+    * gone from the message descriptor entirely and you just get UINT32 all
+    * the time regasrdless.  Since we can really only do non-UINT32 on gen4,
+    * just stomp it to UINT32 all the time.
+    */
+   if (inst->opcode == SHADER_OPCODE_TXS)
+      return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
+
    switch (inst->exec_size) {
    case 8:
       simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;

From 6c8ba59cff14a1a86273f4008ff2a8e68335ab25 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 11 Nov 2015 11:01:59 -0800
Subject: [PATCH 329/335] i965: Use nir_lower_tex for texture coordinate
 lowering

Previously, we had a rescale_texcoords helper in the FS backend for
handling rescaling of texture coordinates.  Now that we can do variants in
NIR, we can use nir_lower_tex to do the rescaling for us.  This allows us
to delete the i965-specific code and gives us proper TEXTURE_RECTANGLE and
GL_CLAMP handling in vertex and geometry shaders.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp          |   4 +
 src/mesa/drivers/dri/i965/brw_fs.h            |   3 -
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp      |   4 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp  | 125 ------------------
 src/mesa/drivers/dri/i965/brw_nir.c           |  27 ++++
 src/mesa/drivers/dri/i965/brw_nir.h           |   6 +
 src/mesa/drivers/dri/i965/brw_vec4.cpp        |   2 +
 .../drivers/dri/i965/brw_vec4_gs_visitor.cpp  |   2 +
 8 files changed, 42 insertions(+), 131 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e9e3d4dfe81..777cee5c809 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5439,6 +5439,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
                char **error_str)
 {
    nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+   shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
+                                      true);
    shader = brw_postprocess_nir(shader, compiler->devinfo, true);
 
    /* key->alpha_test_func means simulating alpha testing via discards,
@@ -5599,6 +5601,8 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
                char **error_str)
 {
    nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+   shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
+                                      true);
    shader = brw_postprocess_nir(shader, compiler->devinfo, true);
 
    prog_data->local_size[0] = shader->info.cs.local_size[0];
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index f52093ba3ce..3e29b3e929f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -218,8 +218,6 @@ public:
    void emit_interpolation_setup_gen4();
    void emit_interpolation_setup_gen6();
    void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
-   fs_reg rescale_texcoord(fs_reg coordinate, int coord_components,
-                           bool is_rect, uint32_t sampler);
    void emit_texture(ir_texture_opcode op,
                      const glsl_type *dest_type,
                      fs_reg coordinate, int components,
@@ -230,7 +228,6 @@ public:
                      fs_reg mcs,
                      int gather_component,
                      bool is_cube_array,
-                     bool is_rect,
                      uint32_t sampler,
                      fs_reg sampler_reg);
    fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 72190f3312c..c439da2ec50 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -2654,8 +2654,6 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
 
    int gather_component = instr->component;
 
-   bool is_rect = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
-
    bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
                         instr->is_array;
 
@@ -2795,7 +2793,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
    emit_texture(op, dest_type, coordinate, instr->coord_components,
                 shadow_comparitor, lod, lod2, lod_components, sample_index,
                 tex_offset, mcs, gather_component,
-                is_cube_array, is_rect, sampler, sampler_reg);
+                is_cube_array, sampler, sampler_reg);
 
    fs_reg dest = get_nir_dest(instr->dest);
    dest.type = this->result.type;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 2e04134318e..03049062c20 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -79,122 +79,6 @@ fs_visitor::emit_vs_system_value(int location)
    return reg;
 }
 
-fs_reg
-fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
-                             bool is_rect, uint32_t sampler)
-{
-   bool needs_gl_clamp = true;
-   fs_reg scale_x, scale_y;
-
-   /* The 965 requires the EU to do the normalization of GL rectangle
-    * texture coordinates.  We use the program parameter state
-    * tracking to get the scaling factor.
-    */
-   if (is_rect &&
-       (devinfo->gen < 6 ||
-        (devinfo->gen >= 6 && (key_tex->gl_clamp_mask[0] & (1 << sampler) ||
-                               key_tex->gl_clamp_mask[1] & (1 << sampler))))) {
-      struct gl_program_parameter_list *params = prog->Parameters;
-
-
-      /* FINISHME: We're failing to recompile our programs when the sampler is
-       * updated.  This only matters for the texture rectangle scale
-       * parameters (pre-gen6, or gen6+ with GL_CLAMP).
-       */
-      int tokens[STATE_LENGTH] = {
-	 STATE_INTERNAL,
-	 STATE_TEXRECT_SCALE,
-	 prog->SamplerUnits[sampler],
-	 0,
-	 0
-      };
-
-      no16("rectangle scale uniform setup not supported on SIMD16\n");
-      if (dispatch_width == 16) {
-	 return coordinate;
-      }
-
-      GLuint index = _mesa_add_state_reference(params,
-					       (gl_state_index *)tokens);
-      /* Try to find existing copies of the texrect scale uniforms. */
-      for (unsigned i = 0; i < uniforms; i++) {
-         if (stage_prog_data->param[i] ==
-             &prog->Parameters->ParameterValues[index][0]) {
-            scale_x = fs_reg(UNIFORM, i);
-            scale_y = fs_reg(UNIFORM, i + 1);
-            break;
-         }
-      }
-
-      /* If we didn't already set them up, do so now. */
-      if (scale_x.file == BAD_FILE) {
-         scale_x = fs_reg(UNIFORM, uniforms);
-         scale_y = fs_reg(UNIFORM, uniforms + 1);
-
-         stage_prog_data->param[uniforms++] =
-            &prog->Parameters->ParameterValues[index][0];
-         stage_prog_data->param[uniforms++] =
-            &prog->Parameters->ParameterValues[index][1];
-      }
-   }
-
-   /* The 965 requires the EU to do the normalization of GL rectangle
-    * texture coordinates.  We use the program parameter state
-    * tracking to get the scaling factor.
-    */
-   if (devinfo->gen < 6 && is_rect) {
-      fs_reg dst = fs_reg(VGRF, alloc.allocate(coord_components));
-      fs_reg src = coordinate;
-      coordinate = dst;
-
-      bld.MUL(dst, src, scale_x);
-      dst = offset(dst, bld, 1);
-      src = offset(src, bld, 1);
-      bld.MUL(dst, src, scale_y);
-   } else if (is_rect) {
-      /* On gen6+, the sampler handles the rectangle coordinates
-       * natively, without needing rescaling.  But that means we have
-       * to do GL_CLAMP clamping at the [0, width], [0, height] scale,
-       * not [0, 1] like the default case below.
-       */
-      needs_gl_clamp = false;
-
-      for (int i = 0; i < 2; i++) {
-	 if (key_tex->gl_clamp_mask[i] & (1 << sampler)) {
-	    fs_reg chan = coordinate;
-	    chan = offset(chan, bld, i);
-
-            set_condmod(BRW_CONDITIONAL_GE,
-                        bld.emit(BRW_OPCODE_SEL, chan, chan, brw_imm_f(0.0f)));
-
-	    /* Our parameter comes in as 1.0/width or 1.0/height,
-	     * because that's what people normally want for doing
-	     * texture rectangle handling.  We need width or height
-	     * for clamping, but we don't care enough to make a new
-	     * parameter type, so just invert back.
-	     */
-	    fs_reg limit = vgrf(glsl_type::float_type);
-            bld.MOV(limit, i == 0 ? scale_x : scale_y);
-            bld.emit(SHADER_OPCODE_RCP, limit, limit);
-
-            set_condmod(BRW_CONDITIONAL_L,
-                        bld.emit(BRW_OPCODE_SEL, chan, chan, limit));
-	 }
-      }
-   }
-
-   if (coord_components > 0 && needs_gl_clamp) {
-      for (int i = 0; i < MIN2(coord_components, 3); i++) {
-	 if (key_tex->gl_clamp_mask[i] & (1 << sampler)) {
-	    fs_reg chan = coordinate;
-	    chan = offset(chan, bld, i);
-            set_saturate(true, bld.MOV(chan, chan));
-	 }
-      }
-   }
-   return coordinate;
-}
-
 /* Sample from the MCS surface attached to this multisample texture. */
 fs_reg
 fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
@@ -227,7 +111,6 @@ fs_visitor::emit_texture(ir_texture_opcode op,
                          fs_reg mcs,
                          int gather_component,
                          bool is_cube_array,
-                         bool is_rect,
                          uint32_t sampler,
                          fs_reg sampler_reg)
 {
@@ -279,14 +162,6 @@ fs_visitor::emit_texture(ir_texture_opcode op,
       return;
    }
 
-   if (coordinate.file != BAD_FILE) {
-      /* FINISHME: Texture coordinate rescaling doesn't work with non-constant
-       * samplers.  This should only be a problem with GL_CLAMP on Gen7.
-       */
-      coordinate = rescale_texcoord(coordinate, coord_components, is_rect,
-                                    sampler);
-   }
-
    /* Writemasking doesn't eliminate channels on SIMD8 texture
     * samples, so don't worry about them.
     */
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 62f3171329c..b8eeaa0d9b2 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -416,6 +416,33 @@ brw_create_nir(struct brw_context *brw,
    return nir;
 }
 
+nir_shader *
+brw_nir_apply_sampler_key(nir_shader *nir,
+                          const struct brw_device_info *devinfo,
+                          const struct brw_sampler_prog_key_data *key_tex,
+                          bool is_scalar)
+{
+   nir_lower_tex_options tex_options = { 0 };
+
+   /* Iron Lake and prior require lowering of all rectangle textures */
+   if (devinfo->gen < 6)
+      tex_options.lower_rect = true;
+
+   /* Prior to Broadwell, our hardware can't actually do GL_CLAMP */
+   if (devinfo->gen < 8) {
+      tex_options.saturate_s = key_tex->gl_clamp_mask[0];
+      tex_options.saturate_t = key_tex->gl_clamp_mask[1];
+      tex_options.saturate_r = key_tex->gl_clamp_mask[2];
+   }
+
+   if (nir_lower_tex(nir, &tex_options)) {
+      nir_validate_shader(nir);
+      nir = nir_optimize(nir, is_scalar);
+   }
+
+   return nir;
+}
+
 enum brw_reg_type
 brw_type_for_nir_type(nir_alu_type type)
 {
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
index baf2f137672..0a8a5a280b1 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -90,6 +90,12 @@ nir_shader *brw_postprocess_nir(nir_shader *nir,
                                 const struct brw_device_info *devinfo,
                                 bool is_scalar);
 
+
+nir_shader *brw_nir_apply_sampler_key(nir_shader *nir,
+                                      const struct brw_device_info *devinfo,
+                                      const struct brw_sampler_prog_key_data *key,
+                                      bool is_scalar);
+
 enum brw_reg_type brw_type_for_nir_type(nir_alu_type type);
 
 enum glsl_base_type brw_glsl_base_type_for_nir_type(nir_alu_type type);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index bf40a583ea8..ae3cf728443 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1939,6 +1939,8 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
                char **error_str)
 {
    nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+   shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
+                                      compiler->scalar_stage[MESA_SHADER_VERTEX]);
    shader = brw_postprocess_nir(shader, compiler->devinfo,
                                 compiler->scalar_stage[MESA_SHADER_VERTEX]);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 7174ee94067..b13d36e2c7d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -618,6 +618,8 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
    c.key = *key;
 
    nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+   shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
+                                      compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
    shader = brw_postprocess_nir(shader, compiler->devinfo,
                                 compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
 

From 03c9ad900e4f1feb2a4df8dd0c563937d999ae5d Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Thu, 12 Nov 2015 10:38:12 -0800
Subject: [PATCH 330/335] nir/validate: Validated dests after sources

Previously, if someone accidentally made an instruction that refers to its
own SSA destination, the validator wouldn't catch it.  The reason for this
is that it validated the destination too early and, by the time it got to
the source, the destination SSA value was already added to the set of seen
SSA values so it would assume that it came from some previous instruction.
By moving destination validation to be after source validation, the SSA
value is not in the list of seen values and the validator will catch
self-referential instructions.

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/glsl/nir/nir_validate.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
index ed374b921fa..06879d64ee2 100644
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -290,11 +290,11 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state)
 {
    assert(instr->op < nir_num_opcodes);
 
-   validate_alu_dest(&instr->dest, state);
-
    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
       validate_alu_src(instr, i, state);
    }
+
+   validate_alu_dest(&instr->dest, state);
 }
 
 static void
@@ -375,6 +375,11 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
       validate_src(&instr->src[i], state);
    }
 
+   unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+   for (unsigned i = 0; i < num_vars; i++) {
+      validate_deref_var(instr, instr->variables[i], state);
+   }
+
    if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
       unsigned components_written =
          nir_intrinsic_infos[instr->intrinsic].dest_components;
@@ -392,11 +397,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
       validate_dest(&instr->dest, state);
    }
 
-   unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
-   for (unsigned i = 0; i < num_vars; i++) {
-      validate_deref_var(instr, instr->variables[i], state);
-   }
-
    switch (instr->intrinsic) {
    case nir_intrinsic_load_var: {
       const struct glsl_type *type =
@@ -434,8 +434,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
 static void
 validate_tex_instr(nir_tex_instr *instr, validate_state *state)
 {
-   validate_dest(&instr->dest, state);
-
    bool src_type_seen[nir_num_tex_src_types];
    for (unsigned i = 0; i < nir_num_tex_src_types; i++)
       src_type_seen[i] = false;
@@ -448,6 +446,8 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state)
 
    if (instr->sampler != NULL)
       validate_deref_var(instr, instr->sampler, state);
+
+   validate_dest(&instr->dest, state);
 }
 
 static void

From 384396a69bdfec9971337863ae69266c7fa4a2e8 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 11 Nov 2015 21:13:26 -0800
Subject: [PATCH 331/335] nir: Use instr/if_rewrite in nir_ssa_def_rewrite_uses

nir_ssa_def_rewrite_uses is one of the older helpers in NIR and predated
both of those.  Now it can be substantially simplified.

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/glsl/nir/nir.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 8956b7e56ca..470469429dd 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1312,19 +1312,11 @@ nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
 {
    assert(!new_src.is_ssa || def != new_src.ssa);
 
-   nir_foreach_use_safe(def, use_src) {
-      nir_instr *src_parent_instr = use_src->parent_instr;
-      list_del(&use_src->use_link);
-      nir_src_copy(use_src, &new_src, src_parent_instr);
-      src_add_all_uses(use_src, src_parent_instr, NULL);
-   }
+   nir_foreach_use_safe(def, use_src)
+      nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
 
-   nir_foreach_if_use_safe(def, use_src) {
-      nir_if *src_parent_if = use_src->parent_if;
-      list_del(&use_src->use_link);
-      nir_src_copy(use_src, &new_src, src_parent_if);
-      src_add_all_uses(use_src, NULL, src_parent_if);
-   }
+   nir_foreach_if_use_safe(def, use_src)
+      nir_if_rewrite_condition(use_src->parent_if, new_src);
 }
 
 

From 7e83fd85aa9e448aa588b3e981fdc8e026dd51b9 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Thu, 12 Nov 2015 08:40:17 -0800
Subject: [PATCH 332/335] nir: Add a ssa_def_rewrite_uses_after helper

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/glsl/nir/nir.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++
 src/glsl/nir/nir.h |  2 ++
 2 files changed, 51 insertions(+)

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 470469429dd..bfec11e53ff 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1319,6 +1319,55 @@ nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
       nir_if_rewrite_condition(use_src->parent_if, new_src);
 }
 
+static bool
+is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
+{
+   assert(start->block == end->block);
+
+   if (between->block != start->block)
+      return false;
+
+   /* Search backwards looking for "between" */
+   while (start != end) {
+      if (between == end)
+         return true;
+
+      end = nir_instr_prev(end);
+      assert(end);
+   }
+
+   return false;
+}
+
+/* Replaces all uses of the given SSA def with the given source but only if
+ * the use comes after the after_me instruction.  This can be useful if you
+ * are emitting code to fix up the result of some instruction: you can freely
+ * use the result in that code and then call rewrite_uses_after and pass the
+ * last fixup instruction as after_me and it will replace all of the uses you
+ * want without touching the fixup code.
+ *
+ * This function assumes that after_me is in the same block as
+ * def->parent_instr and that after_me comes after def->parent_instr.
+ */
+void
+nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
+                               nir_instr *after_me)
+{
+   assert(!new_src.is_ssa || def != new_src.ssa);
+
+   nir_foreach_use_safe(def, use_src) {
+      assert(use_src->parent_instr != def->parent_instr);
+      /* Since def already dominates all of its uses, the only way a use can
+       * not be dominated by after_me is if it is between def and after_me in
+       * the instruction list.
+       */
+      if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
+         nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
+   }
+
+   nir_foreach_if_use_safe(def, use_src)
+      nir_if_rewrite_condition(use_src->parent_if, new_src);
+}
 
 static bool foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
                             bool reverse, void *state);
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 28c85459021..2bdfcb80faf 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1868,6 +1868,8 @@ void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
 void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
                       unsigned num_components, const char *name);
 void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
+void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
+                                    nir_instr *after_me);
 
 /* visits basic blocks in source-code order */
 typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state);

From 6921b17107d1a55582b174e6937fae22cdc65ee4 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 11 Nov 2015 18:30:09 -0800
Subject: [PATCH 333/335] nir: Add a tex_instr_is_query helper

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/glsl/nir/nir.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 2bdfcb80faf..1baef19812b 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1043,6 +1043,31 @@ nir_tex_instr_dest_size(nir_tex_instr *instr)
    }
 }
 
+/* Returns true if this texture operation queries something about the texture
+ * rather than actually sampling it.
+ */
+static inline bool
+nir_tex_instr_is_query(nir_tex_instr *instr)
+{
+   switch (instr->op) {
+   case nir_texop_txs:
+   case nir_texop_lod:
+   case nir_texop_texture_samples:
+   case nir_texop_query_levels:
+      return true;
+   case nir_texop_tex:
+   case nir_texop_txb:
+   case nir_texop_txl:
+   case nir_texop_txd:
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+   case nir_texop_tg4:
+      return false;
+   default:
+      unreachable("Invalid texture opcode");
+   }
+}
+
 static inline unsigned
 nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src)
 {

From 8537b4ab762ec4249acf23164e0e5a943d2d05af Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 11 Nov 2015 18:30:31 -0800
Subject: [PATCH 334/335] nir/lower_tex: Add support for lowering texture
 swizzle

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/glsl/nir/nir.h           | 13 +++++++
 src/glsl/nir/nir_lower_tex.c | 67 ++++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 1baef19812b..524717a0fda 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -2019,6 +2019,19 @@ typedef struct nir_lower_tex_options {
    unsigned saturate_s;
    unsigned saturate_t;
    unsigned saturate_r;
+
+   /* Bitmask of samplers that need swizzling.
+    *
+    * If (swizzle_result & (1 << sampler_index)), then the swizzle in
+    * swizzles[sampler_index] is applied to the result of the texturing
+    * operation.
+    */
+   unsigned swizzle_result;
+
+   /* A swizzle for each sampler.  Values 0-3 represent x, y, z, or w swizzles
+    * while 4 and 5 represent 0 and 1 respectively.
+    */
+   uint8_t swizzles[32][4];
 } nir_lower_tex_options;
 
 bool nir_lower_tex(nir_shader *shader,
diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c
index 6dea8377c28..93ebf8e78a9 100644
--- a/src/glsl/nir/nir_lower_tex.c
+++ b/src/glsl/nir/nir_lower_tex.c
@@ -215,6 +215,66 @@ saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
    }
 }
 
+static nir_ssa_def *
+get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
+{
+   nir_const_value v;
+
+   memset(&v, 0, sizeof(v));
+
+   if (swizzle_val == 4) {
+      v.u[0] = v.u[1] = v.u[2] = v.u[3] = 0;
+   } else {
+      assert(swizzle_val == 5);
+      if (type == nir_type_float)
+         v.f[0] = v.f[1] = v.f[2] = v.f[3] = 1.0;
+      else
+         v.u[0] = v.u[1] = v.u[2] = v.u[3] = 1;
+   }
+
+   return nir_build_imm(b, 4, v);
+}
+
+static void
+swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
+{
+   assert(tex->dest.is_ssa);
+
+   b->cursor = nir_after_instr(&tex->instr);
+
+   nir_ssa_def *swizzled;
+   if (tex->op == nir_texop_tg4) {
+      if (swizzle[tex->component] < 4) {
+         /* This one's easy */
+         tex->component = swizzle[tex->component];
+         return;
+      } else {
+         swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
+      }
+   } else {
+      assert(nir_tex_instr_dest_size(tex) == 4);
+      if (swizzle[0] < 4 && swizzle[1] < 4 &&
+          swizzle[2] < 4 && swizzle[3] < 4) {
+         unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
+         /* We have no 0's or 1's, just emit a swizzling MOV */
+         swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false);
+      } else {
+         nir_ssa_def *srcs[4];
+         for (unsigned i = 0; i < 4; i++) {
+            if (swizzle[i] < 4) {
+               srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
+            } else {
+               srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
+            }
+         }
+         swizzled = nir_vec(b, srcs, 4);
+      }
+   }
+
+   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
+                                  swizzled->parent_instr);
+}
+
 static bool
 nir_lower_tex_block(nir_block *block, void *void_state)
 {
@@ -256,6 +316,13 @@ nir_lower_tex_block(nir_block *block, void *void_state)
          saturate_src(b, tex, sat_mask);
          state->progress = true;
       }
+
+      if (((1 << tex->sampler_index) & state->options->swizzle_result) &&
+          !nir_tex_instr_is_query(tex) &&
+          !(tex->is_shadow && tex->is_new_style_shadow)) {
+         swizzle_result(b, tex, state->options->swizzles[tex->sampler_index]);
+         state->progress = true;
+      }
    }
 
    return true;

From d9b8fde963a53d4e06570d8bece97f806714507a Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 11 Nov 2015 18:41:37 -0800
Subject: [PATCH 335/335] i965: Use NIR for lowering texture swizzle

Now that nir_lower_tex can do texture swizzle lowering, we can use that
instead of repeating more-or-less the same code in both backends.  This
both allows us to share code and means that things like the tg4
work-arounds are somewhat simpler because they don't have to take the
swizzle into account.

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_fs.h            |   4 -
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp  | 105 +++---------------
 src/mesa/drivers/dri/i965/brw_nir.c           |  10 ++
 src/mesa/drivers/dri/i965/brw_vec4.h          |   4 -
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp    |  24 ++--
 .../drivers/dri/i965/brw_vec4_visitor.cpp     |  93 ++--------------
 6 files changed, 44 insertions(+), 196 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 3e29b3e929f..2d408b2f363 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -116,10 +116,6 @@ public:
    void setup_uniform_clipplane_values(gl_clip_plane *clip_planes);
    void compute_clip_distance(gl_clip_plane *clip_planes);
 
-   uint32_t gather_channel(int orig_chan, uint32_t sampler);
-   void swizzle_result(ir_texture_opcode op, int dest_components,
-                       fs_reg orig_val, uint32_t sampler);
-
    fs_inst *get_instruction_generating_reg(fs_inst *start,
 					   fs_inst *end,
 					   const fs_reg &reg);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 03049062c20..1e202165cb6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -116,24 +116,6 @@ fs_visitor::emit_texture(ir_texture_opcode op,
 {
    fs_inst *inst = NULL;
 
-   if (op == ir_tg4) {
-      /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
-       * emitting anything other than setting up the constant result.
-       */
-      int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component);
-      if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
-
-         fs_reg res = vgrf(glsl_type::vec4_type);
-         this->result = res;
-
-         for (int i=0; i<4; i++) {
-            bld.MOV(res, brw_imm_f(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f));
-            res = offset(res, bld, 1);
-         }
-         return;
-      }
-   }
-
    if (op == ir_query_levels) {
       /* textureQueryLevels() is implemented in terms of TXS so we need to
        * pass a valid LOD argument.
@@ -220,8 +202,15 @@ fs_visitor::emit_texture(ir_texture_opcode op,
       inst->offset = offset_value.ud;
 
    if (op == ir_tg4) {
-      inst->offset |=
-         gather_channel(gather_component, sampler) << 16; /* M0.2:16-17 */
+      if (gather_component == 1 &&
+          key_tex->gather_channel_quirk_mask & (1 << sampler)) {
+         /* gather4 sampler is broken for green channel on RG32F --
+          * we must ask for blue instead.
+          */
+         inst->offset |= 2 << 16;
+      } else {
+         inst->offset |= gather_component << 16;
+      }
 
       if (devinfo->gen == 6)
          emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], dst);
@@ -245,7 +234,12 @@ fs_visitor::emit_texture(ir_texture_opcode op,
       bld.LOAD_PAYLOAD(dst, fixed_payload, components, 0);
    }
 
-   swizzle_result(op, dest_type->vector_elements, dst, sampler);
+   if (op == ir_query_levels) {
+      /* # levels is in .w */
+      dst = offset(dst, bld, 3);
+   }
+
+   this->result = dst;
 }
 
 /**
@@ -278,75 +272,6 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst)
    }
 }
 
-/**
- * Set up the gather channel based on the swizzle, for gather4.
- */
-uint32_t
-fs_visitor::gather_channel(int orig_chan, uint32_t sampler)
-{
-   int swiz = GET_SWZ(key_tex->swizzles[sampler], orig_chan);
-   switch (swiz) {
-      case SWIZZLE_X: return 0;
-      case SWIZZLE_Y:
-         /* gather4 sampler is broken for green channel on RG32F --
-          * we must ask for blue instead.
-          */
-         if (key_tex->gather_channel_quirk_mask & (1 << sampler))
-            return 2;
-         return 1;
-      case SWIZZLE_Z: return 2;
-      case SWIZZLE_W: return 3;
-      default:
-         unreachable("Not reached"); /* zero, one swizzles handled already */
-   }
-}
-
-/**
- * Swizzle the result of a texture result.  This is necessary for
- * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons.
- */
-void
-fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components,
-                           fs_reg orig_val, uint32_t sampler)
-{
-   if (op == ir_query_levels) {
-      /* # levels is in .w */
-      this->result = offset(orig_val, bld, 3);
-      return;
-   }
-
-   this->result = orig_val;
-
-   /* txs,lod don't actually sample the texture, so swizzling the result
-    * makes no sense.
-    */
-   if (op == ir_txs || op == ir_lod || op == ir_tg4)
-      return;
-
-   if (dest_components == 1) {
-      /* Ignore DEPTH_TEXTURE_MODE swizzling. */
-   } else if (key_tex->swizzles[sampler] != SWIZZLE_NOOP) {
-      fs_reg swizzled_result = vgrf(glsl_type::vec4_type);
-      swizzled_result.type = orig_val.type;
-
-      for (int i = 0; i < 4; i++) {
-	 int swiz = GET_SWZ(key_tex->swizzles[sampler], i);
-	 fs_reg l = swizzled_result;
-	 l = offset(l, bld, i);
-
-	 if (swiz == SWIZZLE_ZERO) {
-            bld.MOV(l, brw_imm_f(0.0f));
-	 } else if (swiz == SWIZZLE_ONE) {
-            bld.MOV(l, brw_imm_f(1.0f));
-	 } else {
-            bld.MOV(l, offset(orig_val, bld,
-                                  GET_SWZ(key_tex->swizzles[sampler], i)));
-	 }
-      }
-      this->result = swizzled_result;
-   }
-}
-
 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
 void
 fs_visitor::emit_dummy_fs()
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index b8eeaa0d9b2..91358d8f389 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -435,6 +435,16 @@ brw_nir_apply_sampler_key(nir_shader *nir,
       tex_options.saturate_r = key_tex->gl_clamp_mask[2];
    }
 
+   /* Prior to Haswell, we have to fake texture swizzle */
+   for (unsigned s = 0; s < MAX_SAMPLERS; s++) {
+      if (key_tex->swizzles[s] == SWIZZLE_NOOP)
+         continue;
+
+      tex_options.swizzle_result |= (1 << s);
+      for (unsigned c = 0; c < 4; c++)
+         tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c);
+   }
+
    if (nir_lower_tex(nir, &tex_options)) {
       nir_validate_shader(nir);
       nir = nir_optimize(nir, is_scalar);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index ec8abf49cd8..3f674326284 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -275,13 +275,9 @@ public:
                      bool is_cube_array,
                      uint32_t sampler, src_reg sampler_reg);
 
-   uint32_t gather_channel(unsigned gather_component, uint32_t sampler);
    src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
                           src_reg sampler);
    void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
-   void swizzle_result(ir_texture_opcode op, dst_reg dest,
-                       src_reg orig_val, uint32_t sampler,
-                       const glsl_type *dest_type);
 
    void emit_ndc_computation();
    void emit_psiz_and_flags(dst_reg reg);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 8d2ebfb7c89..c777acf70a7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1590,17 +1590,6 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
                                  nir_tex_instr_dest_size(instr));
    dst_reg dest = get_nir_dest(instr->dest, instr->dest_type);
 
-   /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
-    * emitting anything other than setting up the constant result.
-    */
-   if (instr->op == nir_texop_tg4) {
-      int swiz = GET_SWZ(key_tex->swizzles[sampler], instr->component);
-      if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
-         emit(MOV(dest, brw_imm_f(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
-         return;
-      }
-   }
-
    /* Load the texture operation sources */
    for (unsigned i = 0; i < instr->num_srcs; i++) {
       switch (instr->src[i].src_type) {
@@ -1716,8 +1705,17 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
    }
 
    /* Stuff the channel select bits in the top of the texture offset */
-   if (instr->op == nir_texop_tg4)
-      constant_offset |= gather_channel(instr->component, sampler) << 16;
+   if (instr->op == nir_texop_tg4) {
+      if (instr->component == 1 &&
+          (key_tex->gather_channel_quirk_mask & (1 << sampler))) {
+         /* gather4 sampler is broken for green channel on RG32F --
+          * we must ask for blue instead.
+          */
+         constant_offset |= 2 << 16;
+      } else {
+         constant_offset |= instr->component << 16;
+      }
+   }
 
    ir_texture_opcode op = ir_texture_opcode_for_nir_texop(instr->op);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 2e4695a2845..04ea1775ceb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -920,8 +920,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
       unreachable("Unrecognized tex op");
    }
 
-   vec4_instruction *inst = new(mem_ctx) vec4_instruction(
-      opcode, dst_reg(this, dest_type));
+   vec4_instruction *inst = new(mem_ctx) vec4_instruction(opcode, dest);
 
    inst->offset = constant_offset;
 
@@ -1072,8 +1071,13 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
       emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst);
    }
 
-   swizzle_result(op, dest,
-                  src_reg(inst->dst), sampler, dest_type);
+   if (op == ir_query_levels) {
+      /* # levels is in .w */
+      src_reg swizzled(dest);
+      swizzled.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W,
+                                      SWIZZLE_W, SWIZZLE_W);
+      emit(MOV(dest, swizzled));
+   }
 }
 
 /**
@@ -1103,87 +1107,6 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
    }
 }
 
-/**
- * Set up the gather channel based on the swizzle, for gather4.
- */
-uint32_t
-vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler)
-{
-   int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component);
-   switch (swiz) {
-      case SWIZZLE_X: return 0;
-      case SWIZZLE_Y:
-         /* gather4 sampler is broken for green channel on RG32F --
-          * we must ask for blue instead.
-          */
-         if (key_tex->gather_channel_quirk_mask & (1 << sampler))
-            return 2;
-         return 1;
-      case SWIZZLE_Z: return 2;
-      case SWIZZLE_W: return 3;
-      default:
-         unreachable("Not reached"); /* zero, one swizzles handled already */
-   }
-}
-
-void
-vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest,
-                             src_reg orig_val, uint32_t sampler,
-                             const glsl_type *dest_type)
-{
-   int s = key_tex->swizzles[sampler];
-
-   dst_reg swizzled_result = dest;
-
-   if (op == ir_query_levels) {
-      /* # levels is in .w */
-      orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
-      emit(MOV(swizzled_result, orig_val));
-      return;
-   }
-
-   if (op == ir_txs || dest_type == glsl_type::float_type
-			|| s == SWIZZLE_NOOP || op == ir_tg4) {
-      emit(MOV(swizzled_result, orig_val));
-      return;
-   }
-
-
-   int zero_mask = 0, one_mask = 0, copy_mask = 0;
-   int swizzle[4] = {0};
-
-   for (int i = 0; i < 4; i++) {
-      switch (GET_SWZ(s, i)) {
-      case SWIZZLE_ZERO:
-	 zero_mask |= (1 << i);
-	 break;
-      case SWIZZLE_ONE:
-	 one_mask |= (1 << i);
-	 break;
-      default:
-	 copy_mask |= (1 << i);
-	 swizzle[i] = GET_SWZ(s, i);
-	 break;
-      }
-   }
-
-   if (copy_mask) {
-      orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
-      swizzled_result.writemask = copy_mask;
-      emit(MOV(swizzled_result, orig_val));
-   }
-
-   if (zero_mask) {
-      swizzled_result.writemask = zero_mask;
-      emit(MOV(swizzled_result, brw_imm_f(0.0f)));
-   }
-
-   if (one_mask) {
-      swizzled_result.writemask = one_mask;
-      emit(MOV(swizzled_result, brw_imm_f(1.0f)));
-   }
-}
-
 void
 vec4_visitor::gs_emit_vertex(int stream_id)
 {