From a641aa294ef155ca1133e131a920dcab4cb1c990 Mon Sep 17 00:00:00 2001
From: Caio Oliveira <caio.oliveira@intel.com>
Date: Wed, 14 Feb 2024 22:57:40 -0800
Subject: [PATCH] intel/brw: Remove vec4 backend

It still exists as part of ELK for older gfx versions.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>
---
 src/intel/compiler/brw_compiler.c             |    2 -
 src/intel/compiler/brw_compiler.h             |   10 -
 src/intel/compiler/brw_fs.cpp                 |    1 -
 src/intel/compiler/brw_ir_performance.cpp     |  126 -
 src/intel/compiler/brw_ir_performance.h       |    3 -
 src/intel/compiler/brw_ir_vec4.h              |  475 ---
 .../compiler/brw_schedule_instructions.cpp    |  260 --
 src/intel/compiler/brw_shader.h               |    3 -
 src/intel/compiler/brw_vec4.cpp               | 2542 -----------------
 src/intel/compiler/brw_vec4.h                 |  350 ---
 src/intel/compiler/brw_vec4_builder.h         |  646 -----
 .../compiler/brw_vec4_cmod_propagation.cpp    |  365 ---
 .../compiler/brw_vec4_copy_propagation.cpp    |  556 ----
 src/intel/compiler/brw_vec4_cse.cpp           |  322 ---
 .../compiler/brw_vec4_dead_code_eliminate.cpp |  188 --
 src/intel/compiler/brw_vec4_generator.cpp     | 2319 ---------------
 src/intel/compiler/brw_vec4_gs_nir.cpp        |   98 -
 src/intel/compiler/brw_vec4_gs_visitor.cpp    |  560 ----
 src/intel/compiler/brw_vec4_gs_visitor.h      |   75 -
 .../compiler/brw_vec4_live_variables.cpp      |  331 ---
 src/intel/compiler/brw_vec4_live_variables.h  |  143 -
 src/intel/compiler/brw_vec4_nir.cpp           | 2307 ---------------
 src/intel/compiler/brw_vec4_reg_allocate.cpp  |  512 ----
 .../compiler/brw_vec4_surface_builder.cpp     |  213 --
 src/intel/compiler/brw_vec4_surface_builder.h |   53 -
 src/intel/compiler/brw_vec4_tcs.cpp           |  320 ---
 src/intel/compiler/brw_vec4_tcs.h             |   83 -
 src/intel/compiler/brw_vec4_tes.cpp           |  223 --
 src/intel/compiler/brw_vec4_tes.h             |   65 -
 src/intel/compiler/brw_vec4_visitor.cpp       | 1319 ---------
 src/intel/compiler/brw_vec4_vs.h              |   58 -
 src/intel/compiler/brw_vec4_vs_visitor.cpp    |  108 -
 src/intel/compiler/gfx6_gs_visitor.cpp        |  702 -----
 src/intel/compiler/gfx6_gs_visitor.h          |   84 -
 src/intel/compiler/meson.build                |   31 -
 .../compiler/test_vec4_cmod_propagation.cpp   | 1056 -------
 .../compiler/test_vec4_copy_propagation.cpp   |  195 --
 .../test_vec4_dead_code_eliminate.cpp         |  178 --
 .../compiler/test_vec4_register_coalesce.cpp  |  256 --
 39 files changed, 17138 deletions(-)
 delete mode 100644 src/intel/compiler/brw_ir_vec4.h
 delete mode 100644 src/intel/compiler/brw_vec4.cpp
 delete mode 100644 src/intel/compiler/brw_vec4.h
 delete mode 100644 src/intel/compiler/brw_vec4_builder.h
 delete mode 100644 src/intel/compiler/brw_vec4_cmod_propagation.cpp
 delete mode 100644 src/intel/compiler/brw_vec4_copy_propagation.cpp
 delete mode 100644 src/intel/compiler/brw_vec4_cse.cpp
 delete mode 100644 src/intel/compiler/brw_vec4_dead_code_eliminate.cpp
 delete mode 100644 src/intel/compiler/brw_vec4_generator.cpp
 delete mode 100644 src/intel/compiler/brw_vec4_gs_nir.cpp
 delete mode 100644 src/intel/compiler/brw_vec4_gs_visitor.cpp
 delete mode 100644 src/intel/compiler/brw_vec4_gs_visitor.h
 delete mode 100644 src/intel/compiler/brw_vec4_live_variables.cpp
 delete mode 100644 src/intel/compiler/brw_vec4_live_variables.h
 delete mode 100644 src/intel/compiler/brw_vec4_nir.cpp
 delete mode 100644 src/intel/compiler/brw_vec4_reg_allocate.cpp
 delete mode 100644 src/intel/compiler/brw_vec4_surface_builder.cpp
 delete mode 100644 src/intel/compiler/brw_vec4_surface_builder.h
 delete mode 100644 src/intel/compiler/brw_vec4_tcs.cpp
 delete mode 100644 src/intel/compiler/brw_vec4_tcs.h
 delete mode 100644 src/intel/compiler/brw_vec4_tes.cpp
 delete mode 100644 src/intel/compiler/brw_vec4_tes.h
 delete mode 100644 src/intel/compiler/brw_vec4_visitor.cpp
 delete mode 100644 src/intel/compiler/brw_vec4_vs.h
 delete mode 100644 src/intel/compiler/brw_vec4_vs_visitor.cpp
 delete mode 100644 src/intel/compiler/gfx6_gs_visitor.cpp
 delete mode 100644 src/intel/compiler/gfx6_gs_visitor.h
 delete mode 100644 src/intel/compiler/test_vec4_cmod_propagation.cpp
 delete mode 100644 src/intel/compiler/test_vec4_copy_propagation.cpp
 delete mode 100644 src/intel/compiler/test_vec4_dead_code_eliminate.cpp
 delete mode 100644 src/intel/compiler/test_vec4_register_coalesce.cpp

diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index d7eac3ca69c..337e0177d54 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -87,8 +87,6 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
    brw_init_isa_info(&compiler->isa, devinfo);
 
    brw_fs_alloc_reg_sets(compiler);
-   if (devinfo->ver < 8)
-      brw_vec4_alloc_reg_set(compiler);
 
    compiler->precise_trig = debug_get_bool_option("INTEL_PRECISE_TRIG", false);
 
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index 0b2155a5626..3628a2eab75 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -57,16 +57,6 @@ struct brw_compiler {
 
    struct brw_isa_info isa;
 
-   struct {
-      struct ra_regs *regs;
-
-      /**
-       * Array of the ra classes for the unaligned contiguous register
-       * block sizes used.
-       */
-      struct ra_class **classes;
-   } vec4_reg_set;
-
    struct {
       struct ra_regs *regs;
 
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 2a000cb74a8..ab1aea820b6 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -33,7 +33,6 @@
 #include "brw_fs_builder.h"
 #include "brw_fs_live_variables.h"
 #include "brw_nir.h"
-#include "brw_vec4_gs_visitor.h"
 #include "brw_cfg.h"
 #include "brw_dead_control_flow.h"
 #include "brw_private.h"
diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp
index e94006cca65..eeb0921e011 100644
--- a/src/intel/compiler/brw_ir_performance.cpp
+++ b/src/intel/compiler/brw_ir_performance.cpp
@@ -23,7 +23,6 @@
 
 #include "brw_eu.h"
 #include "brw_fs.h"
-#include "brw_vec4.h"
 #include "brw_cfg.h"
 
 using namespace brw;
@@ -152,29 +151,6 @@ namespace {
          rcount = inst->opcode == BRW_OPCODE_DPAS ? inst->rcount : 0;
       }
 
-      instruction_info(const struct brw_isa_info *isa,
-                       const vec4_instruction *inst) :
-         isa(isa), devinfo(isa->devinfo), op(inst->opcode),
-         td(inst->dst.type), sd(DIV_ROUND_UP(inst->size_written, REG_SIZE)),
-         tx(get_exec_type(inst)), sx(0), ss(0), sc(0),
-         desc(inst->desc), sfid(inst->sfid), rcount(0)
-      {
-         /* Compute the maximum source size. */
-         for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++)
-            ss = MAX2(ss, DIV_ROUND_UP(inst->size_read(i), REG_SIZE));
-
-         /* Convert the execution size to GRF units. */
-         sx = DIV_ROUND_UP(inst->exec_size * type_sz(tx), REG_SIZE);
-
-         /* 32x32 integer multiplication has half the usual ALU throughput.
-          * Treat it as double-precision.
-          */
-         if ((inst->opcode == BRW_OPCODE_MUL || inst->opcode == BRW_OPCODE_MAD) &&
-             !brw_reg_type_is_floating_point(tx) && type_sz(tx) == 4 &&
-             type_sz(inst->src[0].type) == type_sz(inst->src[1].type))
-            tx = brw_int_type(8, tx == BRW_REGISTER_TYPE_D);
-      }
-
       /** ISA encoding information */
       const struct brw_isa_info *isa;
       /** Device information. */
@@ -1505,102 +1481,6 @@ namespace {
       }
    }
 
-   /**
-    * Model the performance behavior of a VEC4 back-end instruction.
-    */
-   void
-   issue_vec4_instruction(state &st, const struct brw_isa_info *isa,
-                          const backend_instruction *be_inst)
-   {
-      const struct intel_device_info *devinfo = isa->devinfo;
-      const vec4_instruction *inst =
-         static_cast<const vec4_instruction *>(be_inst);
-      const instruction_info info(isa, inst);
-      const perf_desc perf = instruction_desc(info);
-
-      /* Stall on any source dependencies. */
-      for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) {
-         for (unsigned j = 0; j < regs_read(inst, i); j++)
-            stall_on_dependency(
-               st, reg_dependency_id(devinfo, inst->src[i], j));
-      }
-
-      if (inst->reads_accumulator_implicitly()) {
-         for (unsigned j = accum_reg_of_channel(devinfo, inst, info.tx, 0);
-              j <= accum_reg_of_channel(devinfo, inst, info.tx,
-                                        inst->exec_size - 1); j++)
-            stall_on_dependency(
-               st, reg_dependency_id(devinfo, brw_acc_reg(8), j));
-      }
-
-      if (inst->base_mrf != -1) {
-         for (unsigned j = 0; j < inst->mlen; j++)
-            stall_on_dependency(
-               st, reg_dependency_id(
-                  devinfo, brw_uvec_mrf(8, inst->base_mrf, 0), j));
-      }
-
-      if (inst->reads_flag())
-         stall_on_dependency(st, EU_DEPENDENCY_ID_FLAG0);
-
-      /* Stall on any write dependencies. */
-      if (!inst->no_dd_check) {
-         if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) {
-            for (unsigned j = 0; j < regs_written(inst); j++)
-               stall_on_dependency(
-                  st, reg_dependency_id(devinfo, inst->dst, j));
-         }
-
-         if (inst->writes_accumulator_implicitly(devinfo)) {
-            for (unsigned j = accum_reg_of_channel(devinfo, inst, info.tx, 0);
-                 j <= accum_reg_of_channel(devinfo, inst, info.tx,
-                                           inst->exec_size - 1); j++)
-               stall_on_dependency(
-                  st, reg_dependency_id(devinfo, brw_acc_reg(8), j));
-         }
-
-         if (inst->writes_flag(devinfo))
-            stall_on_dependency(st, EU_DEPENDENCY_ID_FLAG0);
-      }
-
-      /* Execute the instruction. */
-      execute_instruction(st, perf);
-
-      /* Mark any source dependencies. */
-      if (inst->is_send_from_grf()) {
-         for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) {
-            for (unsigned j = 0; j < regs_read(inst, i); j++)
-               mark_read_dependency(
-                  st, perf, reg_dependency_id(devinfo, inst->src[i], j));
-         }
-      }
-
-      if (inst->base_mrf != -1) {
-         for (unsigned j = 0; j < inst->mlen; j++)
-            mark_read_dependency(st, perf,
-               reg_dependency_id(devinfo, brw_uvec_mrf(8, inst->base_mrf, 0), j));
-      }
-
-      /* Mark any destination dependencies. */
-      if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) {
-         for (unsigned j = 0; j < regs_written(inst); j++) {
-            mark_write_dependency(st, perf,
-                                  reg_dependency_id(devinfo, inst->dst, j));
-         }
-      }
-
-      if (inst->writes_accumulator_implicitly(devinfo)) {
-         for (unsigned j = accum_reg_of_channel(devinfo, inst, info.tx, 0);
-              j <= accum_reg_of_channel(devinfo, inst, info.tx,
-                                        inst->exec_size - 1); j++)
-            mark_write_dependency(st, perf,
-                                  reg_dependency_id(devinfo, brw_acc_reg(8), j));
-      }
-
-      if (inst->writes_flag(devinfo))
-         mark_write_dependency(st, perf, EU_DEPENDENCY_ID_FLAG0);
-   }
-
    /**
     * Calculate the maximum possible throughput of the program compatible with
     * the cycle-count utilization estimated for each asynchronous unit, in
@@ -1692,12 +1572,6 @@ brw::performance::performance(const fs_visitor *v) :
    calculate_performance(*this, v, issue_fs_inst, v->dispatch_width);
 }
 
-brw::performance::performance(const vec4_visitor *v) :
-   block_latency(new unsigned[v->cfg->num_blocks])
-{
-   calculate_performance(*this, v, issue_vec4_instruction, 8);
-}
-
 brw::performance::~performance()
 {
    delete[] block_latency;
diff --git a/src/intel/compiler/brw_ir_performance.h b/src/intel/compiler/brw_ir_performance.h
index c3cefe838aa..80dc95b0d2e 100644
--- a/src/intel/compiler/brw_ir_performance.h
+++ b/src/intel/compiler/brw_ir_performance.h
@@ -28,15 +28,12 @@
 class fs_visitor;
 
 namespace brw {
-   class vec4_visitor;
-
    /**
     * Various estimates of the performance of a shader based on static
     * analysis.
     */
    struct performance {
       performance(const fs_visitor *v);
-      performance(const vec4_visitor *v);
       ~performance();
 
       analysis_dependency_class
diff --git a/src/intel/compiler/brw_ir_vec4.h b/src/intel/compiler/brw_ir_vec4.h
deleted file mode 100644
index 78d34729c0b..00000000000
--- a/src/intel/compiler/brw_ir_vec4.h
+++ /dev/null
@@ -1,475 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright © 2011-2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_IR_VEC4_H
-#define BRW_IR_VEC4_H
-
-#include "brw_shader.h"
-
-namespace brw {
-
-class dst_reg;
-
-class src_reg : public backend_reg
-{
-public:
-   DECLARE_RALLOC_CXX_OPERATORS(src_reg)
-
-   void init();
-
-   src_reg(enum brw_reg_file file, int nr, const glsl_type *type);
-   src_reg();
-   src_reg(struct ::brw_reg reg);
-
-   bool equals(const src_reg &r) const;
-   bool negative_equals(const src_reg &r) const;
-
-   src_reg(class vec4_visitor *v, const struct glsl_type *type);
-   src_reg(class vec4_visitor *v, const struct glsl_type *type, int size);
-
-   explicit src_reg(const dst_reg &reg);
-
-   src_reg *reladdr;
-};
-
-static inline src_reg
-retype(src_reg reg, enum brw_reg_type type)
-{
-   reg.type = type;
-   return reg;
-}
-
-namespace detail {
-
-static inline void
-add_byte_offset(backend_reg *reg, unsigned bytes)
-{
-   switch (reg->file) {
-      case BAD_FILE:
-         break;
-      case VGRF:
-      case ATTR:
-      case UNIFORM:
-         reg->offset += bytes;
-         assert(reg->offset % 16 == 0);
-         break;
-      case MRF: {
-         const unsigned suboffset = reg->offset + bytes;
-         reg->nr += suboffset / REG_SIZE;
-         reg->offset = suboffset % REG_SIZE;
-         assert(reg->offset % 16 == 0);
-         break;
-      }
-      case ARF:
-      case FIXED_GRF: {
-         const unsigned suboffset = reg->subnr + bytes;
-         reg->nr += suboffset / REG_SIZE;
-         reg->subnr = suboffset % REG_SIZE;
-         assert(reg->subnr % 16 == 0);
-         break;
-      }
-      default:
-         assert(bytes == 0);
-   }
-}
-
-} /* namespace detail */
-
-static inline src_reg
-byte_offset(src_reg reg, unsigned bytes)
-{
-   detail::add_byte_offset(&reg, bytes);
-   return reg;
-}
-
-static inline src_reg
-offset(src_reg reg, unsigned width, unsigned delta)
-{
-   const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
-   const unsigned num_components = MAX2(width / 4 * stride, 4);
-   return byte_offset(reg, num_components * type_sz(reg.type) * delta);
-}
-
-static inline src_reg
-horiz_offset(src_reg reg, unsigned delta)
-{
-   return byte_offset(reg, delta * type_sz(reg.type));
-}
-
-/**
- * Reswizzle a given source register.
- * \sa brw_swizzle().
- */
-static inline src_reg
-swizzle(src_reg reg, unsigned swizzle)
-{
-   if (reg.file == IMM)
-      reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swizzle);
-   else
-      reg.swizzle = brw_compose_swizzle(swizzle, reg.swizzle);
-
-   return reg;
-}
-
-static inline src_reg
-negate(src_reg reg)
-{
-   assert(reg.file != IMM);
-   reg.negate = !reg.negate;
-   return reg;
-}
-
-static inline bool
-is_uniform(const src_reg &reg)
-{
-   return (reg.file == IMM || reg.file == UNIFORM || reg.is_null()) &&
-          (!reg.reladdr || is_uniform(*reg.reladdr));
-}
-
-class dst_reg : public backend_reg
-{
-public:
-   DECLARE_RALLOC_CXX_OPERATORS(dst_reg)
-
-   void init();
-
-   dst_reg();
-   dst_reg(enum brw_reg_file file, int nr);
-   dst_reg(enum brw_reg_file file, int nr, const glsl_type *type,
-           unsigned writemask);
-   dst_reg(enum brw_reg_file file, int nr, brw_reg_type type,
-           unsigned writemask);
-   dst_reg(struct ::brw_reg reg);
-   dst_reg(class vec4_visitor *v, const struct glsl_type *type);
-
-   explicit dst_reg(const src_reg &reg);
-
-   bool equals(const dst_reg &r) const;
-
-   src_reg *reladdr;
-};
-
-static inline dst_reg
-retype(dst_reg reg, enum brw_reg_type type)
-{
-   reg.type = type;
-   return reg;
-}
-
-static inline dst_reg
-byte_offset(dst_reg reg, unsigned bytes)
-{
-   detail::add_byte_offset(&reg, bytes);
-   return reg;
-}
-
-static inline dst_reg
-offset(dst_reg reg, unsigned width, unsigned delta)
-{
-   const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
-   const unsigned num_components = MAX2(width / 4 * stride, 4);
-   return byte_offset(reg, num_components * type_sz(reg.type) * delta);
-}
-
-static inline dst_reg
-horiz_offset(const dst_reg &reg, unsigned delta)
-{
-   if (is_uniform(src_reg(reg)))
-      return reg;
-   else
-      return byte_offset(reg, delta * type_sz(reg.type));
-}
-
-static inline dst_reg
-writemask(dst_reg reg, unsigned mask)
-{
-   assert(reg.file != IMM);
-   assert((reg.writemask & mask) != 0);
-   reg.writemask &= mask;
-   return reg;
-}
-
-/**
- * Return an integer identifying the discrete address space a register is
- * contained in.  A register is by definition fully contained in the single
- * reg_space it belongs to, so two registers with different reg_space ids are
- * guaranteed not to overlap.  Most register files are a single reg_space of
- * its own, only the VGRF file is composed of multiple discrete address
- * spaces, one for each VGRF allocation.
- */
-static inline uint32_t
-reg_space(const backend_reg &r)
-{
-   return r.file << 16 | (r.file == VGRF ? r.nr : 0);
-}
-
-/**
- * Return the base offset in bytes of a register relative to the start of its
- * reg_space().
- */
-static inline unsigned
-reg_offset(const backend_reg &r)
-{
-   return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
-          (r.file == UNIFORM ? 16 : REG_SIZE) + r.offset +
-          (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
-}
-
-/**
- * Return whether the register region starting at \p r and spanning \p dr
- * bytes could potentially overlap the register region starting at \p s and
- * spanning \p ds bytes.
- */
-static inline bool
-regions_overlap(const backend_reg &r, unsigned dr,
-                const backend_reg &s, unsigned ds)
-{
-   if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
-      /* COMPR4 regions are translated by the hardware during decompression
-       * into two separate half-regions 4 MRFs apart from each other.
-       */
-      backend_reg t0 = r;
-      t0.nr &= ~BRW_MRF_COMPR4;
-      backend_reg t1 = t0;
-      t1.offset += 4 * REG_SIZE;
-      return regions_overlap(t0, dr / 2, s, ds) ||
-             regions_overlap(t1, dr / 2, s, ds);
-
-   } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
-      return regions_overlap(s, ds, r, dr);
-
-   } else {
-      return reg_space(r) == reg_space(s) &&
-             !(reg_offset(r) + dr <= reg_offset(s) ||
-               reg_offset(s) + ds <= reg_offset(r));
-   }
-}
-
-class vec4_instruction : public backend_instruction {
-public:
-   DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction)
-
-   vec4_instruction(enum opcode opcode,
-                    const dst_reg &dst = dst_reg(),
-                    const src_reg &src0 = src_reg(),
-                    const src_reg &src1 = src_reg(),
-                    const src_reg &src2 = src_reg());
-
-   dst_reg dst;
-   src_reg src[3];
-
-   enum brw_urb_write_flags urb_write_flags;
-
-   unsigned sol_binding; /**< gfx6: SOL binding table index */
-   bool sol_final_write; /**< gfx6: send commit message */
-   unsigned sol_vertex; /**< gfx6: used for setting dst index in SVB header */
-
-   bool is_send_from_grf() const;
-   unsigned size_read(unsigned arg) const;
-   bool can_reswizzle(const struct intel_device_info *devinfo,
-                      int dst_writemask,
-                      int swizzle, int swizzle_mask);
-   void reswizzle(int dst_writemask, int swizzle);
-   bool can_do_source_mods(const struct intel_device_info *devinfo);
-   bool can_do_cmod();
-   bool can_do_writemask(const struct intel_device_info *devinfo);
-   bool can_change_types() const;
-   bool has_source_and_destination_hazard() const;
-   unsigned implied_mrf_writes() const;
-
-   bool is_align1_partial_write()
-   {
-      return opcode == VEC4_OPCODE_SET_LOW_32BIT ||
-             opcode == VEC4_OPCODE_SET_HIGH_32BIT;
-   }
-
-   bool reads_flag() const
-   {
-      return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2;
-   }
-
-   bool reads_flag(unsigned c)
-   {
-      if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
-         return true;
-
-      switch (predicate) {
-      case BRW_PREDICATE_NONE:
-         return false;
-      case BRW_PREDICATE_ALIGN16_REPLICATE_X:
-         return c == 0;
-      case BRW_PREDICATE_ALIGN16_REPLICATE_Y:
-         return c == 1;
-      case BRW_PREDICATE_ALIGN16_REPLICATE_Z:
-         return c == 2;
-      case BRW_PREDICATE_ALIGN16_REPLICATE_W:
-         return c == 3;
-      default:
-         return true;
-      }
-   }
-
-   bool writes_flag(const intel_device_info *devinfo) const
-   {
-      return (conditional_mod && ((opcode != BRW_OPCODE_SEL || devinfo->ver <= 5) &&
-                                  opcode != BRW_OPCODE_CSEL &&
-                                  opcode != BRW_OPCODE_IF &&
-                                  opcode != BRW_OPCODE_WHILE));
-   }
-
-   bool reads_g0_implicitly() const
-   {
-      switch (opcode) {
-      case SHADER_OPCODE_TEX:
-      case SHADER_OPCODE_TXL:
-      case SHADER_OPCODE_TXD:
-      case SHADER_OPCODE_TXF:
-      case SHADER_OPCODE_TXF_CMS_W:
-      case SHADER_OPCODE_TXF_CMS:
-      case SHADER_OPCODE_TXF_MCS:
-      case SHADER_OPCODE_TXS:
-      case SHADER_OPCODE_TG4:
-      case SHADER_OPCODE_TG4_OFFSET:
-      case SHADER_OPCODE_SAMPLEINFO:
-      case VS_OPCODE_PULL_CONSTANT_LOAD:
-      case GS_OPCODE_SET_PRIMITIVE_ID:
-      case GS_OPCODE_GET_INSTANCE_ID:
-      case SHADER_OPCODE_GFX4_SCRATCH_READ:
-      case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
-         return true;
-      default:
-         return false;
-      }
-   }
-};
-
-/**
- * Make the execution of \p inst dependent on the evaluation of a possibly
- * inverted predicate.
- */
-inline vec4_instruction *
-set_predicate_inv(enum brw_predicate pred, bool inverse,
-                  vec4_instruction *inst)
-{
-   inst->predicate = pred;
-   inst->predicate_inverse = inverse;
-   return inst;
-}
-
-/**
- * Make the execution of \p inst dependent on the evaluation of a predicate.
- */
-inline vec4_instruction *
-set_predicate(enum brw_predicate pred, vec4_instruction *inst)
-{
-   return set_predicate_inv(pred, false, inst);
-}
-
-/**
- * Write the result of evaluating the condition given by \p mod to a flag
- * register.
- */
-inline vec4_instruction *
-set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst)
-{
-   inst->conditional_mod = mod;
-   return inst;
-}
-
-/**
- * Clamp the result of \p inst to the saturation range of its destination
- * datatype.
- */
-inline vec4_instruction *
-set_saturate(bool saturate, vec4_instruction *inst)
-{
-   inst->saturate = saturate;
-   return inst;
-}
-
-/**
- * Return the number of dataflow registers written by the instruction (either
- * fully or partially) counted from 'floor(reg_offset(inst->dst) /
- * register_size)'.  The somewhat arbitrary register size unit is 16B for the
- * UNIFORM and IMM files and 32B for all other files.
- */
-inline unsigned
-regs_written(const vec4_instruction *inst)
-{
-   assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
-   return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE + inst->size_written,
-                       REG_SIZE);
-}
-
-/**
- * Return the number of dataflow registers read by the instruction (either
- * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
- * register_size)'.  The somewhat arbitrary register size unit is 16B for the
- * UNIFORM and IMM files and 32B for all other files.
- */
-inline unsigned
-regs_read(const vec4_instruction *inst, unsigned i)
-{
-   const unsigned reg_size =
-      inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 16 : REG_SIZE;
-   return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + inst->size_read(i),
-                       reg_size);
-}
-
-static inline enum brw_reg_type
-get_exec_type(const vec4_instruction *inst)
-{
-   enum brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
-
-   for (int i = 0; i < 3; i++) {
-      if (inst->src[i].file != BAD_FILE) {
-         const brw_reg_type t = get_exec_type(brw_reg_type(inst->src[i].type));
-         if (type_sz(t) > type_sz(exec_type))
-            exec_type = t;
-         else if (type_sz(t) == type_sz(exec_type) &&
-                  brw_reg_type_is_floating_point(t))
-            exec_type = t;
-      }
-   }
-
-   if (exec_type == BRW_REGISTER_TYPE_B)
-      exec_type = inst->dst.type;
-
-   /* TODO: We need to handle half-float conversions. */
-   assert(exec_type != BRW_REGISTER_TYPE_HF ||
-          inst->dst.type == BRW_REGISTER_TYPE_HF);
-   assert(exec_type != BRW_REGISTER_TYPE_B);
-
-   return exec_type;
-}
-
-static inline unsigned
-get_exec_type_size(const vec4_instruction *inst)
-{
-   return type_sz(get_exec_type(inst));
-}
-
-} /* namespace brw */
-
-#endif
diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp
index 01d1243bc77..4bb50369ec2 100644
--- a/src/intel/compiler/brw_schedule_instructions.cpp
+++ b/src/intel/compiler/brw_schedule_instructions.cpp
@@ -28,7 +28,6 @@
 #include "brw_eu.h"
 #include "brw_fs.h"
 #include "brw_fs_live_variables.h"
-#include "brw_vec4.h"
 #include "brw_cfg.h"
 #include "brw_shader.h"
 #include <new>
@@ -1027,25 +1026,6 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
    return benefit;
 }
 
-class vec4_instruction_scheduler : public instruction_scheduler
-{
-public:
-   vec4_instruction_scheduler(void *mem_ctx, const vec4_visitor *v, int grf_count);
-   void calculate_deps();
-   schedule_node *choose_instruction_to_schedule();
-   const vec4_visitor *v;
-
-   void run();
-};
-
-vec4_instruction_scheduler::vec4_instruction_scheduler(void *mem_ctx, const vec4_visitor *v,
-                                                       int grf_count)
-   : instruction_scheduler(mem_ctx, v, grf_count, /* grf_write_scale */ 1,
-                           /* post_reg_alloc */ true),
-     v(v)
-{
-}
-
 void
 instruction_scheduler::set_current_block(bblock_t *block)
 {
@@ -1534,179 +1514,6 @@ fs_instruction_scheduler::calculate_deps()
    clear_last_grf_write();
 }
 
-void
-vec4_instruction_scheduler::calculate_deps()
-{
-   schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->ver)];
-   schedule_node *last_conditional_mod = NULL;
-   schedule_node *last_accumulator_write = NULL;
-   /* Fixed HW registers are assumed to be separate from the virtual
-    * GRFs, so they can be tracked separately.  We don't really write
-    * to fixed GRFs much, so don't bother tracking them on a more
-    * granular level.
-    */
-   schedule_node *last_fixed_grf_write = NULL;
-
-   memset(last_grf_write, 0, grf_count * sizeof(*last_grf_write));
-   memset(last_mrf_write, 0, sizeof(last_mrf_write));
-
-   /* top-to-bottom dependencies: RAW and WAW. */
-   for (schedule_node *n = current.start; n < current.end; n++) {
-      vec4_instruction *inst = (vec4_instruction *)n->inst;
-
-      if (is_scheduling_barrier(inst))
-         add_barrier_deps(n);
-
-      /* read-after-write deps. */
-      for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file == VGRF) {
-            for (unsigned j = 0; j < regs_read(inst, i); ++j)
-               add_dep(last_grf_write[inst->src[i].nr + j], n);
-         } else if (inst->src[i].file == FIXED_GRF) {
-            add_dep(last_fixed_grf_write, n);
-         } else if (inst->src[i].is_accumulator()) {
-            assert(last_accumulator_write);
-            add_dep(last_accumulator_write, n);
-         } else if (inst->src[i].file == ARF && !inst->src[i].is_null()) {
-            add_barrier_deps(n);
-         }
-      }
-
-      if (inst->reads_g0_implicitly())
-         add_dep(last_fixed_grf_write, n);
-
-      if (!inst->is_send_from_grf()) {
-         for (int i = 0; i < inst->mlen; i++) {
-            /* It looks like the MRF regs are released in the send
-             * instruction once it's sent, not when the result comes
-             * back.
-             */
-            add_dep(last_mrf_write[inst->base_mrf + i], n);
-         }
-      }
-
-      if (inst->reads_flag()) {
-         assert(last_conditional_mod);
-         add_dep(last_conditional_mod, n);
-      }
-
-      if (inst->reads_accumulator_implicitly()) {
-         assert(last_accumulator_write);
-         add_dep(last_accumulator_write, n);
-      }
-
-      /* write-after-write deps. */
-      if (inst->dst.file == VGRF) {
-         for (unsigned j = 0; j < regs_written(inst); ++j) {
-            add_dep(last_grf_write[inst->dst.nr + j], n);
-            last_grf_write[inst->dst.nr + j] = n;
-         }
-      } else if (inst->dst.file == MRF) {
-         add_dep(last_mrf_write[inst->dst.nr], n);
-         last_mrf_write[inst->dst.nr] = n;
-     } else if (inst->dst.file == FIXED_GRF) {
-         add_dep(last_fixed_grf_write, n);
-         last_fixed_grf_write = n;
-      } else if (inst->dst.is_accumulator()) {
-         add_dep(last_accumulator_write, n);
-         last_accumulator_write = n;
-      } else if (inst->dst.file == ARF && !inst->dst.is_null()) {
-         add_barrier_deps(n);
-      }
-
-      if (inst->mlen > 0 && !inst->is_send_from_grf()) {
-         for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
-            add_dep(last_mrf_write[inst->base_mrf + i], n);
-            last_mrf_write[inst->base_mrf + i] = n;
-         }
-      }
-
-      if (inst->writes_flag(v->devinfo)) {
-         add_dep(last_conditional_mod, n, 0);
-         last_conditional_mod = n;
-      }
-
-      if (inst->writes_accumulator_implicitly(v->devinfo) &&
-          !inst->dst.is_accumulator()) {
-         add_dep(last_accumulator_write, n);
-         last_accumulator_write = n;
-      }
-   }
-
-   /* bottom-to-top dependencies: WAR */
-   memset(last_grf_write, 0, grf_count * sizeof(*last_grf_write));
-   memset(last_mrf_write, 0, sizeof(last_mrf_write));
-   last_conditional_mod = NULL;
-   last_accumulator_write = NULL;
-   last_fixed_grf_write = NULL;
-
-   for (schedule_node *n = current.end - 1; n >= current.start; n--) {
-      vec4_instruction *inst = (vec4_instruction *)n->inst;
-
-      /* write-after-read deps. */
-      for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file == VGRF) {
-            for (unsigned j = 0; j < regs_read(inst, i); ++j)
-               add_dep(n, last_grf_write[inst->src[i].nr + j]);
-         } else if (inst->src[i].file == FIXED_GRF) {
-            add_dep(n, last_fixed_grf_write);
-         } else if (inst->src[i].is_accumulator()) {
-            add_dep(n, last_accumulator_write);
-         } else if (inst->src[i].file == ARF && !inst->src[i].is_null()) {
-            add_barrier_deps(n);
-         }
-      }
-
-      if (!inst->is_send_from_grf()) {
-         for (int i = 0; i < inst->mlen; i++) {
-            /* It looks like the MRF regs are released in the send
-             * instruction once it's sent, not when the result comes
-             * back.
-             */
-            add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
-         }
-      }
-
-      if (inst->reads_flag()) {
-         add_dep(n, last_conditional_mod);
-      }
-
-      if (inst->reads_accumulator_implicitly()) {
-         add_dep(n, last_accumulator_write);
-      }
-
-      /* Update the things this instruction wrote, so earlier reads
-       * can mark this as WAR dependency.
-       */
-      if (inst->dst.file == VGRF) {
-         for (unsigned j = 0; j < regs_written(inst); ++j)
-            last_grf_write[inst->dst.nr + j] = n;
-      } else if (inst->dst.file == MRF) {
-         last_mrf_write[inst->dst.nr] = n;
-      } else if (inst->dst.file == FIXED_GRF) {
-         last_fixed_grf_write = n;
-      } else if (inst->dst.is_accumulator()) {
-         last_accumulator_write = n;
-      } else if (inst->dst.file == ARF && !inst->dst.is_null()) {
-         add_barrier_deps(n);
-      }
-
-      if (inst->mlen > 0 && !inst->is_send_from_grf()) {
-         for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
-            last_mrf_write[inst->base_mrf + i] = n;
-         }
-      }
-
-      if (inst->writes_flag(v->devinfo)) {
-         last_conditional_mod = n;
-      }
-
-      if (inst->writes_accumulator_implicitly(v->devinfo)) {
-         last_accumulator_write = n;
-      }
-   }
-}
-
 schedule_node *
 fs_instruction_scheduler::choose_instruction_to_schedule()
 {
@@ -1837,25 +1644,6 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
    return chosen;
 }
 
-schedule_node *
-vec4_instruction_scheduler::choose_instruction_to_schedule()
-{
-   schedule_node *chosen = NULL;
-   int chosen_time = 0;
-
-   /* Of the instructions ready to execute or the closest to being ready,
-    * choose the oldest one.
-    */
-   foreach_in_list(schedule_node, n, &current.available) {
-      if (!chosen || n->tmp.unblocked_time < chosen_time) {
-         chosen = n;
-         chosen_time = n->tmp.unblocked_time;
-      }
-   }
-
-   return chosen;
-}
-
 int
 fs_instruction_scheduler::calculate_issue_time(backend_instruction *inst0)
 {
@@ -2009,41 +1797,6 @@ fs_instruction_scheduler::run(instruction_scheduler_mode mode)
    }
 }
 
-void
-vec4_instruction_scheduler::run()
-{
-   foreach_block(block, v->cfg) {
-      set_current_block(block);
-
-      for (schedule_node *n = current.start; n < current.end; n++) {
-         /* We always execute as two vec4s in parallel. */
-         n->issue_time = 2;
-      }
-
-      calculate_deps();
-
-      compute_delays();
-      compute_exits();
-
-      assert(current.available.is_empty());
-      for (schedule_node *n = current.start; n < current.end; n++) {
-         reset_node_tmp(n);
-
-         /* Add DAG heads to the list of available instructions. */
-         if (n->tmp.parent_count == 0)
-            current.available.push_tail(n);
-      }
-
-      current.block->instructions.make_empty();
-
-      while (!current.available.is_empty()) {
-         schedule_node *chosen = choose_instruction_to_schedule();
-         schedule(chosen);
-         update_children(chosen);
-      }
-   }
-}
-
 fs_instruction_scheduler *
 fs_visitor::prepare_scheduler(void *mem_ctx)
 {
@@ -2082,16 +1835,3 @@ fs_visitor::schedule_instructions_post_ra()
 
    invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
 }
-
-void
-vec4_visitor::opt_schedule_instructions()
-{
-   void *mem_ctx = ralloc_context(NULL);
-
-   vec4_instruction_scheduler sched(mem_ctx, this, prog_data->total_grf);
-   sched.run();
-
-   ralloc_free(mem_ctx);
-
-   invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
-}
diff --git a/src/intel/compiler/brw_shader.h b/src/intel/compiler/brw_shader.h
index fbd50c07e7e..18c867841cf 100644
--- a/src/intel/compiler/brw_shader.h
+++ b/src/intel/compiler/brw_shader.h
@@ -114,9 +114,6 @@ extern "C" {
 /* brw_fs_reg_allocate.cpp */
 void brw_fs_alloc_reg_sets(struct brw_compiler *compiler);
 
-/* brw_vec4_reg_allocate.cpp */
-void brw_vec4_alloc_reg_set(struct brw_compiler *compiler);
-
 /* brw_disasm.c */
 extern const char *const conditional_modifier[16];
 extern const char *const pred_ctrl_align16[16];
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
deleted file mode 100644
index 3473ef59bd7..00000000000
--- a/src/intel/compiler/brw_vec4.cpp
+++ /dev/null
@@ -1,2542 +0,0 @@
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_vec4.h"
-#include "brw_cfg.h"
-#include "brw_vec4_builder.h"
-#include "brw_vec4_vs.h"
-#include "brw_dead_control_flow.h"
-#include "dev/intel_debug.h"
-
-#define MAX_INSTRUCTION (1 << 30)
-
-using namespace brw;
-
-namespace brw {
-
-void
-src_reg::init()
-{
-   memset((void*)this, 0, sizeof(*this));
-   this->file = BAD_FILE;
-   this->type = BRW_REGISTER_TYPE_UD;
-}
-
-src_reg::src_reg(enum brw_reg_file file, int nr, const glsl_type *type)
-{
-   init();
-
-   this->file = file;
-   this->nr = nr;
-   if (type && (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)))
-      this->swizzle = brw_swizzle_for_size(type->vector_elements);
-   else
-      this->swizzle = BRW_SWIZZLE_XYZW;
-   if (type)
-      this->type = brw_type_for_base_type(type);
-}
-
-/** Generic unset register constructor. */
-src_reg::src_reg()
-{
-   init();
-}
-
-src_reg::src_reg(struct ::brw_reg reg) :
-   backend_reg(reg)
-{
-   this->offset = 0;
-   this->reladdr = NULL;
-}
-
-src_reg::src_reg(const dst_reg &reg) :
-   backend_reg(reg)
-{
-   this->reladdr = reg.reladdr;
-   this->swizzle = brw_swizzle_for_mask(reg.writemask);
-}
-
-void
-dst_reg::init()
-{
-   memset((void*)this, 0, sizeof(*this));
-   this->file = BAD_FILE;
-   this->type = BRW_REGISTER_TYPE_UD;
-   this->writemask = WRITEMASK_XYZW;
-}
-
-dst_reg::dst_reg()
-{
-   init();
-}
-
-dst_reg::dst_reg(enum brw_reg_file file, int nr)
-{
-   init();
-
-   this->file = file;
-   this->nr = nr;
-}
-
-dst_reg::dst_reg(enum brw_reg_file file, int nr, const glsl_type *type,
-                 unsigned writemask)
-{
-   init();
-
-   this->file = file;
-   this->nr = nr;
-   this->type = brw_type_for_base_type(type);
-   this->writemask = writemask;
-}
-
-dst_reg::dst_reg(enum brw_reg_file file, int nr, brw_reg_type type,
-                 unsigned writemask)
-{
-   init();
-
-   this->file = file;
-   this->nr = nr;
-   this->type = type;
-   this->writemask = writemask;
-}
-
-dst_reg::dst_reg(struct ::brw_reg reg) :
-   backend_reg(reg)
-{
-   this->offset = 0;
-   this->reladdr = NULL;
-}
-
-dst_reg::dst_reg(const src_reg &reg) :
-   backend_reg(reg)
-{
-   this->writemask = brw_mask_for_swizzle(reg.swizzle);
-   this->reladdr = reg.reladdr;
-}
-
-bool
-dst_reg::equals(const dst_reg &r) const
-{
-   return (this->backend_reg::equals(r) &&
-           (reladdr == r.reladdr ||
-            (reladdr && r.reladdr && reladdr->equals(*r.reladdr))));
-}
-
-bool
-vec4_instruction::is_send_from_grf() const
-{
-   switch (opcode) {
-   case VS_OPCODE_PULL_CONSTANT_LOAD_GFX7:
-   case VEC4_OPCODE_UNTYPED_ATOMIC:
-   case VEC4_OPCODE_UNTYPED_SURFACE_READ:
-   case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
-   case VEC4_OPCODE_URB_READ:
-   case VEC4_TCS_OPCODE_URB_WRITE:
-   case TCS_OPCODE_RELEASE_INPUT:
-   case SHADER_OPCODE_BARRIER:
-      return true;
-   default:
-      return false;
-   }
-}
-
-/**
- * Returns true if this instruction's sources and destinations cannot
- * safely be the same register.
- *
- * In most cases, a register can be written over safely by the same
- * instruction that is its last use.  For a single instruction, the
- * sources are dereferenced before writing of the destination starts
- * (naturally).
- *
- * However, there are a few cases where this can be problematic:
- *
- * - Virtual opcodes that translate to multiple instructions in the
- *   code generator: if src == dst and one instruction writes the
- *   destination before a later instruction reads the source, then
- *   src will have been clobbered.
- *
- * The register allocator uses this information to set up conflicts between
- * GRF sources and the destination.
- */
-bool
-vec4_instruction::has_source_and_destination_hazard() const
-{
-   switch (opcode) {
-   case VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS:
-   case VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
-   case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
-      return true;
-   default:
-      /* 8-wide compressed DF operations are executed as two 4-wide operations,
-       * so we have a src/dst hazard if the first half of the instruction
-       * overwrites the source of the second half. Prevent this by marking
-       * compressed instructions as having src/dst hazards, so the register
-       * allocator assigns safe register regions for dst and srcs.
-       */
-      return size_written > REG_SIZE;
-   }
-}
-
-unsigned
-vec4_instruction::size_read(unsigned arg) const
-{
-   switch (opcode) {
-   case VEC4_OPCODE_UNTYPED_ATOMIC:
-   case VEC4_OPCODE_UNTYPED_SURFACE_READ:
-   case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
-   case VEC4_TCS_OPCODE_URB_WRITE:
-      if (arg == 0)
-         return mlen * REG_SIZE;
-      break;
-   case VS_OPCODE_PULL_CONSTANT_LOAD_GFX7:
-      if (arg == 1)
-         return mlen * REG_SIZE;
-      break;
-   default:
-      break;
-   }
-
-   switch (src[arg].file) {
-   case BAD_FILE:
-      return 0;
-   case IMM:
-   case UNIFORM:
-      return 4 * type_sz(src[arg].type);
-   default:
-      /* XXX - Represent actual vertical stride. */
-      return exec_size * type_sz(src[arg].type);
-   }
-}
-
-bool
-vec4_instruction::can_do_source_mods(const struct intel_device_info *devinfo)
-{
-   if (devinfo->ver == 6 && is_math())
-      return false;
-
-   if (is_send_from_grf())
-      return false;
-
-   if (!backend_instruction::can_do_source_mods())
-      return false;
-
-   return true;
-}
-
-bool
-vec4_instruction::can_do_cmod()
-{
-   if (!backend_instruction::can_do_cmod())
-      return false;
-
-   /* The accumulator result appears to get used for the conditional modifier
-    * generation.  When negating a UD value, there is a 33rd bit generated for
-    * the sign in the accumulator value, so now you can't check, for example,
-    * equality with a 32-bit value.  See piglit fs-op-neg-uvec4.
-    */
-   for (unsigned i = 0; i < 3; i++) {
-      if (src[i].file != BAD_FILE &&
-          brw_reg_type_is_unsigned_integer(src[i].type) && src[i].negate)
-         return false;
-   }
-
-   return true;
-}
-
-bool
-vec4_instruction::can_do_writemask(const struct intel_device_info *devinfo)
-{
-   switch (opcode) {
-   case SHADER_OPCODE_GFX4_SCRATCH_READ:
-   case VEC4_OPCODE_DOUBLE_TO_F32:
-   case VEC4_OPCODE_DOUBLE_TO_D32:
-   case VEC4_OPCODE_DOUBLE_TO_U32:
-   case VEC4_OPCODE_TO_DOUBLE:
-   case VEC4_OPCODE_PICK_LOW_32BIT:
-   case VEC4_OPCODE_PICK_HIGH_32BIT:
-   case VEC4_OPCODE_SET_LOW_32BIT:
-   case VEC4_OPCODE_SET_HIGH_32BIT:
-   case VS_OPCODE_PULL_CONSTANT_LOAD:
-   case VS_OPCODE_PULL_CONSTANT_LOAD_GFX7:
-   case VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS:
-   case VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
-   case TES_OPCODE_CREATE_INPUT_READ_HEADER:
-   case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
-   case VEC4_OPCODE_URB_READ:
-   case SHADER_OPCODE_MOV_INDIRECT:
-   case SHADER_OPCODE_TEX:
-   case FS_OPCODE_TXB:
-   case SHADER_OPCODE_TXD:
-   case SHADER_OPCODE_TXF:
-   case SHADER_OPCODE_TXF_LZ:
-   case SHADER_OPCODE_TXF_CMS:
-   case SHADER_OPCODE_TXF_CMS_W:
-   case SHADER_OPCODE_TXF_UMS:
-   case SHADER_OPCODE_TXF_MCS:
-   case SHADER_OPCODE_TXL:
-   case SHADER_OPCODE_TXL_LZ:
-   case SHADER_OPCODE_TXS:
-   case SHADER_OPCODE_LOD:
-   case SHADER_OPCODE_TG4:
-   case SHADER_OPCODE_TG4_OFFSET:
-   case SHADER_OPCODE_SAMPLEINFO:
-      return false;
-   default:
-      /* The MATH instruction on Gfx6 only executes in align1 mode, which does
-       * not support writemasking.
-       */
-      if (devinfo->ver == 6 && is_math())
-         return false;
-
-      return true;
-   }
-}
-
-bool
-vec4_instruction::can_change_types() const
-{
-   return dst.type == src[0].type &&
-          !src[0].abs && !src[0].negate && !saturate &&
-          (opcode == BRW_OPCODE_MOV ||
-           (opcode == BRW_OPCODE_SEL &&
-            dst.type == src[1].type &&
-            predicate != BRW_PREDICATE_NONE &&
-            !src[1].abs && !src[1].negate));
-}
-
-/**
- * Returns how many MRFs an opcode will write over.
- *
- * Note that this is not the 0 or 1 implied writes in an actual gen
- * instruction -- the generate_* functions generate additional MOVs
- * for setup.
- */
-unsigned
-vec4_instruction::implied_mrf_writes() const
-{
-   if (mlen == 0 || is_send_from_grf())
-      return 0;
-
-   switch (opcode) {
-   case SHADER_OPCODE_RCP:
-   case SHADER_OPCODE_RSQ:
-   case SHADER_OPCODE_SQRT:
-   case SHADER_OPCODE_EXP2:
-   case SHADER_OPCODE_LOG2:
-   case SHADER_OPCODE_SIN:
-   case SHADER_OPCODE_COS:
-      return 1;
-   case SHADER_OPCODE_INT_QUOTIENT:
-   case SHADER_OPCODE_INT_REMAINDER:
-   case SHADER_OPCODE_POW:
-   case TCS_OPCODE_THREAD_END:
-      return 2;
-   case VEC4_VS_OPCODE_URB_WRITE:
-      return 1;
-   case VS_OPCODE_PULL_CONSTANT_LOAD:
-      return 2;
-   case SHADER_OPCODE_GFX4_SCRATCH_READ:
-      return 2;
-   case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
-      return 3;
-   case VEC4_GS_OPCODE_URB_WRITE:
-   case VEC4_GS_OPCODE_URB_WRITE_ALLOCATE:
-   case GS_OPCODE_THREAD_END:
-      return 0;
-   case GS_OPCODE_FF_SYNC:
-      return 1;
-   case VEC4_TCS_OPCODE_URB_WRITE:
-      return 0;
-   case SHADER_OPCODE_TEX:
-   case SHADER_OPCODE_TXL:
-   case SHADER_OPCODE_TXD:
-   case SHADER_OPCODE_TXF:
-   case SHADER_OPCODE_TXF_CMS:
-   case SHADER_OPCODE_TXF_CMS_W:
-   case SHADER_OPCODE_TXF_MCS:
-   case SHADER_OPCODE_TXS:
-   case SHADER_OPCODE_TG4:
-   case SHADER_OPCODE_TG4_OFFSET:
-   case SHADER_OPCODE_SAMPLEINFO:
-   case SHADER_OPCODE_GET_BUFFER_SIZE:
-      return header_size;
-   default:
-      unreachable("not reached");
-   }
-}
-
-bool
-src_reg::equals(const src_reg &r) const
-{
-   return (this->backend_reg::equals(r) &&
-	   !reladdr && !r.reladdr);
-}
-
-bool
-src_reg::negative_equals(const src_reg &r) const
-{
-   return this->backend_reg::negative_equals(r) &&
-          !reladdr && !r.reladdr;
-}
-
-bool
-vec4_visitor::opt_vector_float()
-{
-   bool progress = false;
-
-   foreach_block(block, cfg) {
-      unsigned last_reg = ~0u, last_offset = ~0u;
-      enum brw_reg_file last_reg_file = BAD_FILE;
-
-      uint8_t imm[4] = { 0 };
-      int inst_count = 0;
-      vec4_instruction *imm_inst[4];
-      unsigned writemask = 0;
-      enum brw_reg_type dest_type = BRW_REGISTER_TYPE_F;
-
-      foreach_inst_in_block_safe(vec4_instruction, inst, block) {
-         int vf = -1;
-         enum brw_reg_type need_type = BRW_REGISTER_TYPE_LAST;
-
-         /* Look for unconditional MOVs from an immediate with a partial
-          * writemask.  Skip type-conversion MOVs other than integer 0,
-          * where the type doesn't matter.  See if the immediate can be
-          * represented as a VF.
-          */
-         if (inst->opcode == BRW_OPCODE_MOV &&
-             inst->src[0].file == IMM &&
-             inst->predicate == BRW_PREDICATE_NONE &&
-             inst->dst.writemask != WRITEMASK_XYZW &&
-             type_sz(inst->src[0].type) < 8 &&
-             (inst->src[0].type == inst->dst.type || inst->src[0].d == 0)) {
-
-            vf = brw_float_to_vf(inst->src[0].d);
-            need_type = BRW_REGISTER_TYPE_D;
-
-            if (vf == -1) {
-               vf = brw_float_to_vf(inst->src[0].f);
-               need_type = BRW_REGISTER_TYPE_F;
-            }
-         } else {
-            last_reg = ~0u;
-         }
-
-         /* If this wasn't a MOV, or the destination register doesn't match,
-          * or we have to switch destination types, then this breaks our
-          * sequence.  Combine anything we've accumulated so far.
-          */
-         if (last_reg != inst->dst.nr ||
-             last_offset != inst->dst.offset ||
-             last_reg_file != inst->dst.file ||
-             (vf > 0 && dest_type != need_type)) {
-
-            if (inst_count > 1) {
-               unsigned vf;
-               memcpy(&vf, imm, sizeof(vf));
-               vec4_instruction *mov = MOV(imm_inst[0]->dst, brw_imm_vf(vf));
-               mov->dst.type = dest_type;
-               mov->dst.writemask = writemask;
-               inst->insert_before(block, mov);
-
-               for (int i = 0; i < inst_count; i++) {
-                  imm_inst[i]->remove(block);
-               }
-
-               progress = true;
-            }
-
-            inst_count = 0;
-            last_reg = ~0u;;
-            writemask = 0;
-            dest_type = BRW_REGISTER_TYPE_F;
-
-            for (int i = 0; i < 4; i++) {
-               imm[i] = 0;
-            }
-         }
-
-         /* Record this instruction's value (if it was representable). */
-         if (vf != -1) {
-            if ((inst->dst.writemask & WRITEMASK_X) != 0)
-               imm[0] = vf;
-            if ((inst->dst.writemask & WRITEMASK_Y) != 0)
-               imm[1] = vf;
-            if ((inst->dst.writemask & WRITEMASK_Z) != 0)
-               imm[2] = vf;
-            if ((inst->dst.writemask & WRITEMASK_W) != 0)
-               imm[3] = vf;
-
-            writemask |= inst->dst.writemask;
-            imm_inst[inst_count++] = inst;
-
-            last_reg = inst->dst.nr;
-            last_offset = inst->dst.offset;
-            last_reg_file = inst->dst.file;
-            if (vf > 0)
-               dest_type = need_type;
-         }
-      }
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
-
-   return progress;
-}
-
-/* Replaces unused channels of a swizzle with channels that are used.
- *
- * For instance, this pass transforms
- *
- *    mov vgrf4.yz, vgrf5.wxzy
- *
- * into
- *
- *    mov vgrf4.yz, vgrf5.xxzx
- *
- * This eliminates false uses of some channels, letting dead code elimination
- * remove the instructions that wrote them.
- */
-bool
-vec4_visitor::opt_reduce_swizzle()
-{
-   bool progress = false;
-
-   foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
-      if (inst->dst.file == BAD_FILE ||
-          inst->dst.file == ARF ||
-          inst->dst.file == FIXED_GRF ||
-          inst->is_send_from_grf())
-         continue;
-
-      unsigned swizzle;
-
-      /* Determine which channels of the sources are read. */
-      switch (inst->opcode) {
-      case VEC4_OPCODE_PACK_BYTES:
-      case BRW_OPCODE_DP4:
-      case BRW_OPCODE_DPH: /* FINISHME: DPH reads only three channels of src0,
-                            *           but all four of src1.
-                            */
-         swizzle = brw_swizzle_for_size(4);
-         break;
-      case BRW_OPCODE_DP3:
-         swizzle = brw_swizzle_for_size(3);
-         break;
-      case BRW_OPCODE_DP2:
-         swizzle = brw_swizzle_for_size(2);
-         break;
-
-      case VEC4_OPCODE_TO_DOUBLE:
-      case VEC4_OPCODE_DOUBLE_TO_F32:
-      case VEC4_OPCODE_DOUBLE_TO_D32:
-      case VEC4_OPCODE_DOUBLE_TO_U32:
-      case VEC4_OPCODE_PICK_LOW_32BIT:
-      case VEC4_OPCODE_PICK_HIGH_32BIT:
-      case VEC4_OPCODE_SET_LOW_32BIT:
-      case VEC4_OPCODE_SET_HIGH_32BIT:
-         swizzle = brw_swizzle_for_size(4);
-         break;
-
-      default:
-         swizzle = brw_swizzle_for_mask(inst->dst.writemask);
-         break;
-      }
-
-      /* Update sources' swizzles. */
-      for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file != VGRF &&
-             inst->src[i].file != ATTR &&
-             inst->src[i].file != UNIFORM)
-            continue;
-
-         const unsigned new_swizzle =
-            brw_compose_swizzle(swizzle, inst->src[i].swizzle);
-         if (inst->src[i].swizzle != new_swizzle) {
-            inst->src[i].swizzle = new_swizzle;
-            progress = true;
-         }
-      }
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL);
-
-   return progress;
-}
-
-void
-vec4_visitor::split_uniform_registers()
-{
-   /* Prior to this, uniforms have been in an array sized according to
-    * the number of vector uniforms present, sparsely filled (so an
-    * aggregate results in reg indices being skipped over).  Now we're
-    * going to cut those aggregates up so each .nr index is one
-    * vector.  The goal is to make elimination of unused uniform
-    * components easier later.
-    */
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      for (int i = 0 ; i < 3; i++) {
-         if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START)
-	    continue;
-
-	 assert(!inst->src[i].reladdr);
-
-         inst->src[i].nr += inst->src[i].offset / 16;
-	 inst->src[i].offset %= 16;
-      }
-   }
-}
-
-/**
- * Does algebraic optimizations (0 * a = 0, 1 * a = a, a + 0 = a).
- *
- * While GLSL IR also performs this optimization, we end up with it in
- * our instruction stream for a couple of reasons.  One is that we
- * sometimes generate silly instructions, for example in array access
- * where we'll generate "ADD offset, index, base" even if base is 0.
- * The other is that GLSL IR's constant propagation doesn't track the
- * components of aggregates, so some VS patterns (initialize matrix to
- * 0, accumulate in vertex blending factors) end up breaking down to
- * instructions involving 0.
- */
-bool
-vec4_visitor::opt_algebraic()
-{
-   bool progress = false;
-
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      switch (inst->opcode) {
-      case BRW_OPCODE_MOV:
-         if (inst->src[0].file != IMM)
-            break;
-
-         if (inst->saturate) {
-            /* Full mixed-type saturates don't happen.  However, we can end up
-             * with things like:
-             *
-             *    mov.sat(8) g21<1>DF       -1F
-             *
-             * Other mixed-size-but-same-base-type cases may also be possible.
-             */
-            if (inst->dst.type != inst->src[0].type &&
-                inst->dst.type != BRW_REGISTER_TYPE_DF &&
-                inst->src[0].type != BRW_REGISTER_TYPE_F)
-               assert(!"unimplemented: saturate mixed types");
-
-            if (brw_saturate_immediate(inst->src[0].type,
-                                       &inst->src[0].as_brw_reg())) {
-               inst->saturate = false;
-               progress = true;
-            }
-         }
-         break;
-
-      case BRW_OPCODE_OR:
-         if (inst->src[1].is_zero()) {
-            inst->opcode = BRW_OPCODE_MOV;
-            inst->src[1] = src_reg();
-            progress = true;
-         }
-         break;
-
-      case VEC4_OPCODE_UNPACK_UNIFORM:
-         if (inst->src[0].file != UNIFORM) {
-            inst->opcode = BRW_OPCODE_MOV;
-            progress = true;
-         }
-         break;
-
-      case BRW_OPCODE_ADD:
-	 if (inst->src[1].is_zero()) {
-	    inst->opcode = BRW_OPCODE_MOV;
-	    inst->src[1] = src_reg();
-	    progress = true;
-	 }
-	 break;
-
-      case BRW_OPCODE_MUL:
-	 if (inst->src[1].file != IMM)
-	    continue;
-
-	 if (brw_reg_type_is_floating_point(inst->src[1].type))
-	    break;
-
-	 if (inst->src[1].is_zero()) {
-	    inst->opcode = BRW_OPCODE_MOV;
-	    switch (inst->src[0].type) {
-	    case BRW_REGISTER_TYPE_F:
-	       inst->src[0] = brw_imm_f(0.0f);
-	       break;
-	    case BRW_REGISTER_TYPE_D:
-	       inst->src[0] = brw_imm_d(0);
-	       break;
-	    case BRW_REGISTER_TYPE_UD:
-	       inst->src[0] = brw_imm_ud(0u);
-	       break;
-	    default:
-	       unreachable("not reached");
-	    }
-	    inst->src[1] = src_reg();
-	    progress = true;
-	 } else if (inst->src[1].is_one()) {
-	    inst->opcode = BRW_OPCODE_MOV;
-	    inst->src[1] = src_reg();
-	    progress = true;
-         } else if (inst->src[1].is_negative_one()) {
-            inst->opcode = BRW_OPCODE_MOV;
-            inst->src[0].negate = !inst->src[0].negate;
-            inst->src[1] = src_reg();
-            progress = true;
-	 }
-	 break;
-      case SHADER_OPCODE_BROADCAST:
-         if (is_uniform(inst->src[0]) ||
-             inst->src[1].is_zero()) {
-            inst->opcode = BRW_OPCODE_MOV;
-            inst->src[1] = src_reg();
-            inst->force_writemask_all = true;
-            progress = true;
-         }
-         break;
-
-      default:
-	 break;
-      }
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |
-                          DEPENDENCY_INSTRUCTION_DETAIL);
-
-   return progress;
-}
-
-/* Conditions for which we want to avoid setting the dependency control bits */
-bool
-vec4_visitor::is_dep_ctrl_unsafe(const vec4_instruction *inst)
-{
-#define IS_DWORD(reg) \
-   (reg.type == BRW_REGISTER_TYPE_UD || \
-    reg.type == BRW_REGISTER_TYPE_D)
-
-#define IS_64BIT(reg) (reg.file != BAD_FILE && type_sz(reg.type) == 8)
-
-   if (devinfo->ver >= 7) {
-      if (IS_64BIT(inst->dst) || IS_64BIT(inst->src[0]) ||
-          IS_64BIT(inst->src[1]) || IS_64BIT(inst->src[2]))
-      return true;
-   }
-
-#undef IS_64BIT
-#undef IS_DWORD
-
-   /*
-    * mlen:
-    * In the presence of send messages, totally interrupt dependency
-    * control. They're long enough that the chance of dependency
-    * control around them just doesn't matter.
-    *
-    * predicate:
-    * From the Ivy Bridge PRM, volume 4 part 3.7, page 80:
-    * When a sequence of NoDDChk and NoDDClr are used, the last instruction that
-    * completes the scoreboard clear must have a non-zero execution mask. This
-    * means, if any kind of predication can change the execution mask or channel
-    * enable of the last instruction, the optimization must be avoided. This is
-    * to avoid instructions being shot down the pipeline when no writes are
-    * required.
-    *
-    * math:
-    * Dependency control does not work well over math instructions.
-    * NB: Discovered empirically
-    */
-   return (inst->mlen || inst->predicate || inst->is_math());
-}
-
-/**
- * Sets the dependency control fields on instructions after register
- * allocation and before the generator is run.
- *
- * When you have a sequence of instructions like:
- *
- * DP4 temp.x vertex uniform[0]
- * DP4 temp.y vertex uniform[0]
- * DP4 temp.z vertex uniform[0]
- * DP4 temp.w vertex uniform[0]
- *
- * The hardware doesn't know that it can actually run the later instructions
- * while the previous ones are in flight, producing stalls.  However, we have
- * manual fields we can set in the instructions that let it do so.
- */
-void
-vec4_visitor::opt_set_dependency_control()
-{
-   vec4_instruction *last_grf_write[BRW_MAX_GRF];
-   uint8_t grf_channels_written[BRW_MAX_GRF];
-   vec4_instruction *last_mrf_write[BRW_MAX_GRF];
-   uint8_t mrf_channels_written[BRW_MAX_GRF];
-
-   assert(prog_data->total_grf ||
-          !"Must be called after register allocation");
-
-   foreach_block (block, cfg) {
-      memset(last_grf_write, 0, sizeof(last_grf_write));
-      memset(last_mrf_write, 0, sizeof(last_mrf_write));
-
-      foreach_inst_in_block (vec4_instruction, inst, block) {
-         /* If we read from a register that we were doing dependency control
-          * on, don't do dependency control across the read.
-          */
-         for (int i = 0; i < 3; i++) {
-            int reg = inst->src[i].nr + inst->src[i].offset / REG_SIZE;
-            if (inst->src[i].file == VGRF) {
-               last_grf_write[reg] = NULL;
-            } else if (inst->src[i].file == FIXED_GRF) {
-               memset(last_grf_write, 0, sizeof(last_grf_write));
-               break;
-            }
-            assert(inst->src[i].file != MRF);
-         }
-
-         if (is_dep_ctrl_unsafe(inst)) {
-            memset(last_grf_write, 0, sizeof(last_grf_write));
-            memset(last_mrf_write, 0, sizeof(last_mrf_write));
-            continue;
-         }
-
-         /* Now, see if we can do dependency control for this instruction
-          * against a previous one writing to its destination.
-          */
-         int reg = inst->dst.nr + inst->dst.offset / REG_SIZE;
-         if (inst->dst.file == VGRF || inst->dst.file == FIXED_GRF) {
-            if (last_grf_write[reg] &&
-                last_grf_write[reg]->dst.offset == inst->dst.offset &&
-                !(inst->dst.writemask & grf_channels_written[reg])) {
-               last_grf_write[reg]->no_dd_clear = true;
-               inst->no_dd_check = true;
-            } else {
-               grf_channels_written[reg] = 0;
-            }
-
-            last_grf_write[reg] = inst;
-            grf_channels_written[reg] |= inst->dst.writemask;
-         } else if (inst->dst.file == MRF) {
-            if (last_mrf_write[reg] &&
-                last_mrf_write[reg]->dst.offset == inst->dst.offset &&
-                !(inst->dst.writemask & mrf_channels_written[reg])) {
-               last_mrf_write[reg]->no_dd_clear = true;
-               inst->no_dd_check = true;
-            } else {
-               mrf_channels_written[reg] = 0;
-            }
-
-            last_mrf_write[reg] = inst;
-            mrf_channels_written[reg] |= inst->dst.writemask;
-         }
-      }
-   }
-}
-
-bool
-vec4_instruction::can_reswizzle(const struct intel_device_info *devinfo,
-                                int dst_writemask,
-                                int swizzle,
-                                int swizzle_mask)
-{
-   /* Gfx6 MATH instructions can not execute in align16 mode, so swizzles
-    * are not allowed.
-    */
-   if (devinfo->ver == 6 && is_math() && swizzle != BRW_SWIZZLE_XYZW)
-      return false;
-
-   /* If we write to the flag register changing the swizzle would change
-    * what channels are written to the flag register.
-    */
-   if (writes_flag(devinfo))
-      return false;
-
-   /* We can't swizzle implicit accumulator access.  We'd have to
-    * reswizzle the producer of the accumulator value in addition
-    * to the consumer (i.e. both MUL and MACH).  Just skip this.
-    */
-   if (reads_accumulator_implicitly())
-      return false;
-
-   if (!can_do_writemask(devinfo) && dst_writemask != WRITEMASK_XYZW)
-      return false;
-
-   /* If this instruction sets anything not referenced by swizzle, then we'd
-    * totally break it when we reswizzle.
-    */
-   if (dst.writemask & ~swizzle_mask)
-      return false;
-
-   if (mlen > 0)
-      return false;
-
-   for (int i = 0; i < 3; i++) {
-      if (src[i].is_accumulator())
-         return false;
-   }
-
-   return true;
-}
-
-/**
- * For any channels in the swizzle's source that were populated by this
- * instruction, rewrite the instruction to put the appropriate result directly
- * in those channels.
- *
- * e.g. for swizzle=yywx, MUL a.xy b c -> MUL a.yy_x b.yy z.yy_x
- */
-void
-vec4_instruction::reswizzle(int dst_writemask, int swizzle)
-{
-   /* Destination write mask doesn't correspond to source swizzle for the dot
-    * product and pack_bytes instructions.
-    */
-   if (opcode != BRW_OPCODE_DP4 && opcode != BRW_OPCODE_DPH &&
-       opcode != BRW_OPCODE_DP3 && opcode != BRW_OPCODE_DP2 &&
-       opcode != VEC4_OPCODE_PACK_BYTES) {
-      for (int i = 0; i < 3; i++) {
-         if (src[i].file == BAD_FILE)
-            continue;
-
-         if (src[i].file == IMM) {
-            assert(src[i].type != BRW_REGISTER_TYPE_V &&
-                   src[i].type != BRW_REGISTER_TYPE_UV);
-
-            /* Vector immediate types need to be reswizzled. */
-            if (src[i].type == BRW_REGISTER_TYPE_VF) {
-               const unsigned imm[] = {
-                  (src[i].ud >>  0) & 0x0ff,
-                  (src[i].ud >>  8) & 0x0ff,
-                  (src[i].ud >> 16) & 0x0ff,
-                  (src[i].ud >> 24) & 0x0ff,
-               };
-
-               src[i] = brw_imm_vf4(imm[BRW_GET_SWZ(swizzle, 0)],
-                                    imm[BRW_GET_SWZ(swizzle, 1)],
-                                    imm[BRW_GET_SWZ(swizzle, 2)],
-                                    imm[BRW_GET_SWZ(swizzle, 3)]);
-            }
-
-            continue;
-         }
-
-         src[i].swizzle = brw_compose_swizzle(swizzle, src[i].swizzle);
-      }
-   }
-
-   /* Apply the specified swizzle and writemask to the original mask of
-    * written components.
-    */
-   dst.writemask = dst_writemask &
-                   brw_apply_swizzle_to_mask(swizzle, dst.writemask);
-}
-
-/*
- * Tries to reduce extra MOV instructions by taking temporary GRFs that get
- * just written and then MOVed into another reg and making the original write
- * of the GRF write directly to the final destination instead.
- */
-bool
-vec4_visitor::opt_register_coalesce()
-{
-   bool progress = false;
-   int next_ip = 0;
-   const vec4_live_variables &live = live_analysis.require();
-
-   foreach_block_and_inst_safe (block, vec4_instruction, inst, cfg) {
-      int ip = next_ip;
-      next_ip++;
-
-      if (inst->opcode != BRW_OPCODE_MOV ||
-          (inst->dst.file != VGRF && inst->dst.file != MRF) ||
-	  inst->predicate ||
-	  inst->src[0].file != VGRF ||
-	  inst->dst.type != inst->src[0].type ||
-	  inst->src[0].abs || inst->src[0].negate || inst->src[0].reladdr)
-	 continue;
-
-      /* Remove no-op MOVs */
-      if (inst->dst.file == inst->src[0].file &&
-          inst->dst.nr == inst->src[0].nr &&
-          inst->dst.offset == inst->src[0].offset) {
-         bool is_nop_mov = true;
-
-         for (unsigned c = 0; c < 4; c++) {
-            if ((inst->dst.writemask & (1 << c)) == 0)
-               continue;
-
-            if (BRW_GET_SWZ(inst->src[0].swizzle, c) != c) {
-               is_nop_mov = false;
-               break;
-            }
-         }
-
-         if (is_nop_mov) {
-            inst->remove(block);
-            progress = true;
-            continue;
-         }
-      }
-
-      bool to_mrf = (inst->dst.file == MRF);
-
-      /* Can't coalesce this GRF if someone else was going to
-       * read it later.
-       */
-      if (live.var_range_end(var_from_reg(alloc, dst_reg(inst->src[0])), 8) > ip)
-	 continue;
-
-      /* We need to check interference with the final destination between this
-       * instruction and the earliest instruction involved in writing the GRF
-       * we're eliminating.  To do that, keep track of which of our source
-       * channels we've seen initialized.
-       */
-      const unsigned chans_needed =
-         brw_apply_inv_swizzle_to_mask(inst->src[0].swizzle,
-                                       inst->dst.writemask);
-      unsigned chans_remaining = chans_needed;
-
-      /* Now walk up the instruction stream trying to see if we can rewrite
-       * everything writing to the temporary to write into the destination
-       * instead.
-       */
-      vec4_instruction *_scan_inst = (vec4_instruction *)inst->prev;
-      foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst,
-                                                  inst) {
-         _scan_inst = scan_inst;
-
-         if (regions_overlap(inst->src[0], inst->size_read(0),
-                             scan_inst->dst, scan_inst->size_written)) {
-            /* Found something writing to the reg we want to coalesce away. */
-            if (to_mrf) {
-               /* SEND instructions can't have MRF as a destination. */
-               if (scan_inst->mlen)
-                  break;
-
-               if (devinfo->ver == 6) {
-                  /* gfx6 math instructions must have the destination be
-                   * VGRF, so no compute-to-MRF for them.
-                   */
-                  if (scan_inst->is_math()) {
-                     break;
-                  }
-               }
-            }
-
-            /* VS_OPCODE_UNPACK_FLAGS_SIMD4X2 generates a bunch of mov(1)
-             * instructions, and this optimization pass is not capable of
-             * handling that.  Bail on these instructions and hope that some
-             * later optimization pass can do the right thing after they are
-             * expanded.
-             */
-            if (scan_inst->opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
-               break;
-
-            /* This doesn't handle saturation on the instruction we
-             * want to coalesce away if the register types do not match.
-             * But if scan_inst is a non type-converting 'mov', we can fix
-             * the types later.
-             */
-            if (inst->saturate &&
-                inst->dst.type != scan_inst->dst.type &&
-                !(scan_inst->opcode == BRW_OPCODE_MOV &&
-                  scan_inst->dst.type == scan_inst->src[0].type))
-               break;
-
-            /* Only allow coalescing between registers of the same type size.
-             * Otherwise we would need to make the pass aware of the fact that
-             * channel sizes are different for single and double precision.
-             */
-            if (type_sz(inst->src[0].type) != type_sz(scan_inst->src[0].type))
-               break;
-
-            /* Check that scan_inst writes the same amount of data as the
-             * instruction, otherwise coalescing would lead to writing a
-             * different (larger or smaller) region of the destination
-             */
-            if (scan_inst->size_written != inst->size_written)
-               break;
-
-            /* If we can't handle the swizzle, bail. */
-            if (!scan_inst->can_reswizzle(devinfo, inst->dst.writemask,
-                                          inst->src[0].swizzle,
-                                          chans_needed)) {
-               break;
-            }
-
-            /* This only handles coalescing writes of 8 channels (1 register
-             * for single-precision and 2 registers for double-precision)
-             * starting at the source offset of the copy instruction.
-             */
-            if (DIV_ROUND_UP(scan_inst->size_written,
-                             type_sz(scan_inst->dst.type)) > 8 ||
-                scan_inst->dst.offset != inst->src[0].offset)
-               break;
-
-	    /* Mark which channels we found unconditional writes for. */
-	    if (!scan_inst->predicate)
-               chans_remaining &= ~scan_inst->dst.writemask;
-
-	    if (chans_remaining == 0)
-	       break;
-	 }
-
-         /* You can't read from an MRF, so if someone else reads our MRF's
-          * source GRF that we wanted to rewrite, that stops us.  If it's a
-          * GRF we're trying to coalesce to, we don't actually handle
-          * rewriting sources so bail in that case as well.
-          */
-	 bool interfered = false;
-	 for (int i = 0; i < 3; i++) {
-            if (regions_overlap(inst->src[0], inst->size_read(0),
-                                scan_inst->src[i], scan_inst->size_read(i)))
-	       interfered = true;
-	 }
-	 if (interfered)
-	    break;
-
-         /* If somebody else writes the same channels of our destination here,
-          * we can't coalesce before that.
-          */
-         if (regions_overlap(inst->dst, inst->size_written,
-                             scan_inst->dst, scan_inst->size_written) &&
-             (inst->dst.writemask & scan_inst->dst.writemask) != 0) {
-            break;
-         }
-
-         /* Check for reads of the register we're trying to coalesce into.  We
-          * can't go rewriting instructions above that to put some other value
-          * in the register instead.
-          */
-         if (to_mrf && scan_inst->mlen > 0) {
-            unsigned start = scan_inst->base_mrf;
-            unsigned end = scan_inst->base_mrf + scan_inst->mlen;
-
-            if (inst->dst.nr >= start && inst->dst.nr < end) {
-               break;
-            }
-         } else {
-            for (int i = 0; i < 3; i++) {
-               if (regions_overlap(inst->dst, inst->size_written,
-                                   scan_inst->src[i], scan_inst->size_read(i)))
-                  interfered = true;
-            }
-            if (interfered)
-               break;
-         }
-      }
-
-      if (chans_remaining == 0) {
-	 /* If we've made it here, we have an MOV we want to coalesce out, and
-	  * a scan_inst pointing to the earliest instruction involved in
-	  * computing the value.  Now go rewrite the instruction stream
-	  * between the two.
-	  */
-         vec4_instruction *scan_inst = _scan_inst;
-	 while (scan_inst != inst) {
-	    if (scan_inst->dst.file == VGRF &&
-                scan_inst->dst.nr == inst->src[0].nr &&
-		scan_inst->dst.offset == inst->src[0].offset) {
-               scan_inst->reswizzle(inst->dst.writemask,
-                                    inst->src[0].swizzle);
-	       scan_inst->dst.file = inst->dst.file;
-               scan_inst->dst.nr = inst->dst.nr;
-	       scan_inst->dst.offset = inst->dst.offset;
-               if (inst->saturate &&
-                   inst->dst.type != scan_inst->dst.type) {
-                  /* If we have reached this point, scan_inst is a non
-                   * type-converting 'mov' and we can modify its register types
-                   * to match the ones in inst. Otherwise, we could have an
-                   * incorrect saturation result.
-                   */
-                  scan_inst->dst.type = inst->dst.type;
-                  scan_inst->src[0].type = inst->src[0].type;
-               }
-	       scan_inst->saturate |= inst->saturate;
-	    }
-	    scan_inst = (vec4_instruction *)scan_inst->next;
-	 }
-	 inst->remove(block);
-	 progress = true;
-      }
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
-
-   return progress;
-}
-
-/**
- * Eliminate FIND_LIVE_CHANNEL instructions occurring outside any control
- * flow.  We could probably do better here with some form of divergence
- * analysis.
- */
-bool
-vec4_visitor::eliminate_find_live_channel()
-{
-   bool progress = false;
-   unsigned depth = 0;
-
-   if (!brw_stage_has_packed_dispatch(devinfo, stage, 0, stage_prog_data)) {
-      /* The optimization below assumes that channel zero is live on thread
-       * dispatch, which may not be the case if the fixed function dispatches
-       * threads sparsely.
-       */
-      return false;
-   }
-
-   foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
-      switch (inst->opcode) {
-      case BRW_OPCODE_IF:
-      case BRW_OPCODE_DO:
-         depth++;
-         break;
-
-      case BRW_OPCODE_ENDIF:
-      case BRW_OPCODE_WHILE:
-         depth--;
-         break;
-
-      case SHADER_OPCODE_FIND_LIVE_CHANNEL:
-         if (depth == 0) {
-            inst->opcode = BRW_OPCODE_MOV;
-            inst->src[0] = brw_imm_d(0);
-            inst->force_writemask_all = true;
-            progress = true;
-         }
-         break;
-
-      default:
-         break;
-      }
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL);
-
-   return progress;
-}
-
-/**
- * Splits virtual GRFs requesting more than one contiguous physical register.
- *
- * We initially create large virtual GRFs for temporary structures, arrays,
- * and matrices, so that the visitor functions can add offsets to work their
- * way down to the actual member being accessed.  But when it comes to
- * optimization, we'd like to treat each register as individual storage if
- * possible.
- *
- * So far, the only thing that might prevent splitting is a send message from
- * a GRF on IVB.
- */
-void
-vec4_visitor::split_virtual_grfs()
-{
-   int num_vars = this->alloc.count;
-   int new_virtual_grf[num_vars];
-   bool split_grf[num_vars];
-
-   memset(new_virtual_grf, 0, sizeof(new_virtual_grf));
-
-   /* Try to split anything > 0 sized. */
-   for (int i = 0; i < num_vars; i++) {
-      split_grf[i] = this->alloc.sizes[i] != 1;
-   }
-
-   /* Check that the instructions are compatible with the registers we're trying
-    * to split.
-    */
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      if (inst->dst.file == VGRF && regs_written(inst) > 1)
-         split_grf[inst->dst.nr] = false;
-
-      for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file == VGRF && regs_read(inst, i) > 1)
-            split_grf[inst->src[i].nr] = false;
-      }
-   }
-
-   /* Allocate new space for split regs.  Note that the virtual
-    * numbers will be contiguous.
-    */
-   for (int i = 0; i < num_vars; i++) {
-      if (!split_grf[i])
-         continue;
-
-      new_virtual_grf[i] = alloc.allocate(1);
-      for (unsigned j = 2; j < this->alloc.sizes[i]; j++) {
-         unsigned reg = alloc.allocate(1);
-         assert(reg == new_virtual_grf[i] + j - 1);
-         (void) reg;
-      }
-      this->alloc.sizes[i] = 1;
-   }
-
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      if (inst->dst.file == VGRF && split_grf[inst->dst.nr] &&
-          inst->dst.offset / REG_SIZE != 0) {
-         inst->dst.nr = (new_virtual_grf[inst->dst.nr] +
-                         inst->dst.offset / REG_SIZE - 1);
-         inst->dst.offset %= REG_SIZE;
-      }
-      for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file == VGRF && split_grf[inst->src[i].nr] &&
-             inst->src[i].offset / REG_SIZE != 0) {
-            inst->src[i].nr = (new_virtual_grf[inst->src[i].nr] +
-                                inst->src[i].offset / REG_SIZE - 1);
-            inst->src[i].offset %= REG_SIZE;
-         }
-      }
-   }
-   invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL | DEPENDENCY_VARIABLES);
-}
-
-void
-vec4_visitor::dump_instruction_to_file(const backend_instruction *be_inst, FILE *file) const
-{
-   const vec4_instruction *inst = (const vec4_instruction *)be_inst;
-
-   if (inst->predicate) {
-      fprintf(file, "(%cf%d.%d%s) ",
-              inst->predicate_inverse ? '-' : '+',
-              inst->flag_subreg / 2,
-              inst->flag_subreg % 2,
-              pred_ctrl_align16[inst->predicate]);
-   }
-
-   fprintf(file, "%s(%d)", brw_instruction_name(&compiler->isa, inst->opcode),
-           inst->exec_size);
-   if (inst->saturate)
-      fprintf(file, ".sat");
-   if (inst->conditional_mod) {
-      fprintf(file, "%s", conditional_modifier[inst->conditional_mod]);
-      if (!inst->predicate &&
-          (devinfo->ver < 5 || (inst->opcode != BRW_OPCODE_SEL &&
-                                inst->opcode != BRW_OPCODE_CSEL &&
-                                inst->opcode != BRW_OPCODE_IF &&
-                                inst->opcode != BRW_OPCODE_WHILE))) {
-         fprintf(file, ".f%d.%d", inst->flag_subreg / 2, inst->flag_subreg % 2);
-      }
-   }
-   fprintf(file, " ");
-
-   switch (inst->dst.file) {
-   case VGRF:
-      fprintf(file, "vgrf%d", inst->dst.nr);
-      break;
-   case FIXED_GRF:
-      fprintf(file, "g%d", inst->dst.nr);
-      break;
-   case MRF:
-      fprintf(file, "m%d", inst->dst.nr);
-      break;
-   case ARF:
-      switch (inst->dst.nr) {
-      case BRW_ARF_NULL:
-         fprintf(file, "null");
-         break;
-      case BRW_ARF_ADDRESS:
-         fprintf(file, "a0.%d", inst->dst.subnr);
-         break;
-      case BRW_ARF_ACCUMULATOR:
-         fprintf(file, "acc%d", inst->dst.subnr);
-         break;
-      case BRW_ARF_FLAG:
-         fprintf(file, "f%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
-         break;
-      default:
-         fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
-         break;
-      }
-      break;
-   case BAD_FILE:
-      fprintf(file, "(null)");
-      break;
-   case IMM:
-   case ATTR:
-   case UNIFORM:
-      unreachable("not reached");
-   }
-   if (inst->dst.offset ||
-       (inst->dst.file == VGRF &&
-        alloc.sizes[inst->dst.nr] * REG_SIZE != inst->size_written)) {
-      const unsigned reg_size = (inst->dst.file == UNIFORM ? 16 : REG_SIZE);
-      fprintf(file, "+%d.%d", inst->dst.offset / reg_size,
-              inst->dst.offset % reg_size);
-   }
-   if (inst->dst.writemask != WRITEMASK_XYZW) {
-      fprintf(file, ".");
-      if (inst->dst.writemask & 1)
-         fprintf(file, "x");
-      if (inst->dst.writemask & 2)
-         fprintf(file, "y");
-      if (inst->dst.writemask & 4)
-         fprintf(file, "z");
-      if (inst->dst.writemask & 8)
-         fprintf(file, "w");
-   }
-   fprintf(file, ":%s", brw_reg_type_to_letters(inst->dst.type));
-
-   if (inst->src[0].file != BAD_FILE)
-      fprintf(file, ", ");
-
-   for (int i = 0; i < 3 && inst->src[i].file != BAD_FILE; i++) {
-      if (inst->src[i].negate)
-         fprintf(file, "-");
-      if (inst->src[i].abs)
-         fprintf(file, "|");
-      switch (inst->src[i].file) {
-      case VGRF:
-         fprintf(file, "vgrf%d", inst->src[i].nr);
-         break;
-      case FIXED_GRF:
-         fprintf(file, "g%d.%d", inst->src[i].nr, inst->src[i].subnr);
-         break;
-      case ATTR:
-         fprintf(file, "attr%d", inst->src[i].nr);
-         break;
-      case UNIFORM:
-         fprintf(file, "u%d", inst->src[i].nr);
-         break;
-      case IMM:
-         switch (inst->src[i].type) {
-         case BRW_REGISTER_TYPE_F:
-            fprintf(file, "%fF", inst->src[i].f);
-            break;
-         case BRW_REGISTER_TYPE_DF:
-            fprintf(file, "%fDF", inst->src[i].df);
-            break;
-         case BRW_REGISTER_TYPE_D:
-            fprintf(file, "%dD", inst->src[i].d);
-            break;
-         case BRW_REGISTER_TYPE_UD:
-            fprintf(file, "%uU", inst->src[i].ud);
-            break;
-         case BRW_REGISTER_TYPE_VF:
-            fprintf(file, "[%-gF, %-gF, %-gF, %-gF]",
-                    brw_vf_to_float((inst->src[i].ud >>  0) & 0xff),
-                    brw_vf_to_float((inst->src[i].ud >>  8) & 0xff),
-                    brw_vf_to_float((inst->src[i].ud >> 16) & 0xff),
-                    brw_vf_to_float((inst->src[i].ud >> 24) & 0xff));
-            break;
-         default:
-            fprintf(file, "???");
-            break;
-         }
-         break;
-      case ARF:
-         switch (inst->src[i].nr) {
-         case BRW_ARF_NULL:
-            fprintf(file, "null");
-            break;
-         case BRW_ARF_ADDRESS:
-            fprintf(file, "a0.%d", inst->src[i].subnr);
-            break;
-         case BRW_ARF_ACCUMULATOR:
-            fprintf(file, "acc%d", inst->src[i].subnr);
-            break;
-         case BRW_ARF_FLAG:
-            fprintf(file, "f%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
-            break;
-         default:
-            fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
-            break;
-         }
-         break;
-      case BAD_FILE:
-         fprintf(file, "(null)");
-         break;
-      case MRF:
-         unreachable("not reached");
-      }
-
-      if (inst->src[i].offset ||
-          (inst->src[i].file == VGRF &&
-           alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i))) {
-         const unsigned reg_size = (inst->src[i].file == UNIFORM ? 16 : REG_SIZE);
-         fprintf(file, "+%d.%d", inst->src[i].offset / reg_size,
-                 inst->src[i].offset % reg_size);
-      }
-
-      if (inst->src[i].file != IMM) {
-         static const char *chans[4] = {"x", "y", "z", "w"};
-         fprintf(file, ".");
-         for (int c = 0; c < 4; c++) {
-            fprintf(file, "%s", chans[BRW_GET_SWZ(inst->src[i].swizzle, c)]);
-         }
-      }
-
-      if (inst->src[i].abs)
-         fprintf(file, "|");
-
-      if (inst->src[i].file != IMM) {
-         fprintf(file, ":%s", brw_reg_type_to_letters(inst->src[i].type));
-      }
-
-      if (i < 2 && inst->src[i + 1].file != BAD_FILE)
-         fprintf(file, ", ");
-   }
-
-   if (inst->force_writemask_all)
-      fprintf(file, " NoMask");
-
-   if (inst->exec_size != 8)
-      fprintf(file, " group%d", inst->group);
-
-   fprintf(file, "\n");
-}
-
-
-int
-vec4_vs_visitor::setup_attributes(int payload_reg)
-{
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file == ATTR) {
-            assert(inst->src[i].offset % REG_SIZE == 0);
-            int grf = payload_reg + inst->src[i].nr +
-                      inst->src[i].offset / REG_SIZE;
-
-            struct brw_reg reg = brw_vec8_grf(grf, 0);
-            reg.swizzle = inst->src[i].swizzle;
-            reg.type = inst->src[i].type;
-            reg.abs = inst->src[i].abs;
-            reg.negate = inst->src[i].negate;
-            inst->src[i] = reg;
-         }
-      }
-   }
-
-   return payload_reg + vs_prog_data->nr_attribute_slots;
-}
-
-void
-vec4_visitor::setup_push_ranges()
-{
-   /* Only allow 32 registers (256 uniform components) as push constants,
-    * which is the limit on gfx6.
-    *
-    * If changing this value, note the limitation about total_regs in
-    * brw_curbe.c.
-    */
-   const unsigned max_push_length = 32;
-
-   push_length = DIV_ROUND_UP(prog_data->base.nr_params, 8);
-   push_length = MIN2(push_length, max_push_length);
-
-   /* Shrink UBO push ranges so it all fits in max_push_length */
-   for (unsigned i = 0; i < 4; i++) {
-      struct brw_ubo_range *range = &prog_data->base.ubo_ranges[i];
-
-      if (push_length + range->length > max_push_length)
-         range->length = max_push_length - push_length;
-
-      push_length += range->length;
-   }
-   assert(push_length <= max_push_length);
-}
-
-int
-vec4_visitor::setup_uniforms(int reg)
-{
-   /* It's possible that uniform compaction will shrink further than expected
-    * so we re-compute the layout and set up our UBO push starts.
-    */
-   ASSERTED const unsigned old_push_length = push_length;
-   push_length = DIV_ROUND_UP(prog_data->base.nr_params, 8);
-   for (unsigned i = 0; i < 4; i++) {
-      ubo_push_start[i] = push_length;
-      push_length += stage_prog_data->ubo_ranges[i].length;
-   }
-   assert(push_length == old_push_length);
-
-   /* The pre-gfx6 VS requires that some push constants get loaded no
-    * matter what, or the GPU would hang.
-    */
-   if (devinfo->ver < 6 && push_length == 0) {
-      brw_stage_prog_data_add_params(stage_prog_data, 4);
-      for (unsigned int i = 0; i < 4; i++) {
-	 unsigned int slot = this->uniforms * 4 + i;
-	 stage_prog_data->param[slot] = BRW_PARAM_BUILTIN_ZERO;
-      }
-      push_length = 1;
-   }
-
-   prog_data->base.dispatch_grf_start_reg = reg;
-   prog_data->base.curb_read_length = push_length;
-
-   return reg + push_length;
-}
-
-void
-vec4_vs_visitor::setup_payload(void)
-{
-   int reg = 0;
-
-   /* The payload always contains important data in g0, which contains
-    * the URB handles that are passed on to the URB write at the end
-    * of the thread.  So, we always start push constants at g1.
-    */
-   reg++;
-
-   reg = setup_uniforms(reg);
-
-   reg = setup_attributes(reg);
-
-   this->first_non_payload_grf = reg;
-}
-
-bool
-vec4_visitor::lower_minmax()
-{
-   assert(devinfo->ver < 6);
-
-   bool progress = false;
-
-   foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
-      const vec4_builder ibld(this, block, inst);
-
-      if (inst->opcode == BRW_OPCODE_SEL &&
-          inst->predicate == BRW_PREDICATE_NONE) {
-         /* If src1 is an immediate value that is not NaN, then it can't be
-          * NaN.  In that case, emit CMP because it is much better for cmod
-          * propagation.  Likewise if src1 is not float.  Gfx4 and Gfx5 don't
-          * support HF or DF, so it is not necessary to check for those.
-          */
-         if (inst->src[1].type != BRW_REGISTER_TYPE_F ||
-             (inst->src[1].file == IMM && !isnan(inst->src[1].f))) {
-            ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1],
-                     inst->conditional_mod);
-         } else {
-            ibld.CMPN(ibld.null_reg_d(), inst->src[0], inst->src[1],
-                      inst->conditional_mod);
-         }
-         inst->predicate = BRW_PREDICATE_NORMAL;
-         inst->conditional_mod = BRW_CONDITIONAL_NONE;
-
-         progress = true;
-      }
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
-
-   return progress;
-}
-
-src_reg
-vec4_visitor::get_timestamp()
-{
-   assert(devinfo->ver == 7);
-
-   src_reg ts = src_reg(brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
-                                BRW_ARF_TIMESTAMP,
-                                0,
-                                0,
-                                0,
-                                BRW_REGISTER_TYPE_UD,
-                                BRW_VERTICAL_STRIDE_0,
-                                BRW_WIDTH_4,
-                                BRW_HORIZONTAL_STRIDE_4,
-                                BRW_SWIZZLE_XYZW,
-                                WRITEMASK_XYZW));
-
-   dst_reg dst = dst_reg(this, glsl_uvec4_type());
-
-   vec4_instruction *mov = emit(MOV(dst, ts));
-   /* We want to read the 3 fields we care about (mostly field 0, but also 2)
-    * even if it's not enabled in the dispatch.
-    */
-   mov->force_writemask_all = true;
-
-   return src_reg(dst);
-}
-
-static bool
-is_align1_df(vec4_instruction *inst)
-{
-   switch (inst->opcode) {
-   case VEC4_OPCODE_DOUBLE_TO_F32:
-   case VEC4_OPCODE_DOUBLE_TO_D32:
-   case VEC4_OPCODE_DOUBLE_TO_U32:
-   case VEC4_OPCODE_TO_DOUBLE:
-   case VEC4_OPCODE_PICK_LOW_32BIT:
-   case VEC4_OPCODE_PICK_HIGH_32BIT:
-   case VEC4_OPCODE_SET_LOW_32BIT:
-   case VEC4_OPCODE_SET_HIGH_32BIT:
-      return true;
-   default:
-      return false;
-   }
-}
-
-/**
- * Three source instruction must have a GRF/MRF destination register.
- * ARF NULL is not allowed.  Fix that up by allocating a temporary GRF.
- */
-void
-vec4_visitor::fixup_3src_null_dest()
-{
-   bool progress = false;
-
-   foreach_block_and_inst_safe (block, vec4_instruction, inst, cfg) {
-      if (inst->is_3src(compiler) && inst->dst.is_null()) {
-         const unsigned size_written = type_sz(inst->dst.type);
-         const unsigned num_regs = DIV_ROUND_UP(size_written, REG_SIZE);
-
-         inst->dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)),
-                            inst->dst.type);
-         progress = true;
-      }
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL |
-                          DEPENDENCY_VARIABLES);
-}
-
-void
-vec4_visitor::convert_to_hw_regs()
-{
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      for (int i = 0; i < 3; i++) {
-         class src_reg &src = inst->src[i];
-         struct brw_reg reg;
-         switch (src.file) {
-         case VGRF: {
-            reg = byte_offset(brw_vecn_grf(4, src.nr, 0), src.offset);
-            reg.type = src.type;
-            reg.abs = src.abs;
-            reg.negate = src.negate;
-            break;
-         }
-
-         case UNIFORM: {
-            if (src.nr >= UBO_START) {
-               reg = byte_offset(brw_vec4_grf(
-                                    prog_data->base.dispatch_grf_start_reg +
-                                    ubo_push_start[src.nr - UBO_START] +
-                                    src.offset / 32, 0),
-                                 src.offset % 32);
-            } else {
-               reg = byte_offset(brw_vec4_grf(
-                                    prog_data->base.dispatch_grf_start_reg +
-                                    src.nr / 2, src.nr % 2 * 4),
-                                 src.offset);
-            }
-            reg = stride(reg, 0, 4, 1);
-            reg.type = src.type;
-            reg.abs = src.abs;
-            reg.negate = src.negate;
-
-            /* This should have been moved to pull constants. */
-            assert(!src.reladdr);
-            break;
-         }
-
-         case FIXED_GRF:
-            if (type_sz(src.type) == 8) {
-               reg = src.as_brw_reg();
-               break;
-            }
-            FALLTHROUGH;
-         case ARF:
-         case IMM:
-            continue;
-
-         case BAD_FILE:
-            /* Probably unused. */
-            reg = brw_null_reg();
-            reg = retype(reg, src.type);
-            break;
-
-         case MRF:
-         case ATTR:
-            unreachable("not reached");
-         }
-
-         apply_logical_swizzle(&reg, inst, i);
-         src = reg;
-
-         /* From IVB PRM, vol4, part3, "General Restrictions on Regioning
-          * Parameters":
-          *
-          *   "If ExecSize = Width and HorzStride ≠ 0, VertStride must be set
-          *    to Width * HorzStride."
-          *
-          * We can break this rule with DF sources on DF align1
-          * instructions, because the exec_size would be 4 and width is 4.
-          * As we know we are not accessing to next GRF, it is safe to
-          * set vstride to the formula given by the rule itself.
-          */
-         if (is_align1_df(inst) && (cvt(inst->exec_size) - 1) == src.width)
-            src.vstride = src.width + src.hstride;
-      }
-
-      if (inst->is_3src(compiler)) {
-         /* 3-src instructions with scalar sources support arbitrary subnr,
-          * but don't actually use swizzles.  Convert swizzle into subnr.
-          * Skip this for double-precision instructions: RepCtrl=1 is not
-          * allowed for them and needs special handling.
-          */
-         for (int i = 0; i < 3; i++) {
-            if (inst->src[i].vstride == BRW_VERTICAL_STRIDE_0 &&
-                type_sz(inst->src[i].type) < 8) {
-               assert(brw_is_single_value_swizzle(inst->src[i].swizzle));
-               inst->src[i].subnr += 4 * BRW_GET_SWZ(inst->src[i].swizzle, 0);
-            }
-         }
-      }
-
-      dst_reg &dst = inst->dst;
-      struct brw_reg reg;
-
-      switch (inst->dst.file) {
-      case VGRF:
-         reg = byte_offset(brw_vec8_grf(dst.nr, 0), dst.offset);
-         reg.type = dst.type;
-         reg.writemask = dst.writemask;
-         break;
-
-      case MRF:
-         reg = byte_offset(brw_message_reg(dst.nr), dst.offset);
-         assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->ver));
-         reg.type = dst.type;
-         reg.writemask = dst.writemask;
-         break;
-
-      case ARF:
-      case FIXED_GRF:
-         reg = dst.as_brw_reg();
-         break;
-
-      case BAD_FILE:
-         reg = brw_null_reg();
-         reg = retype(reg, dst.type);
-         break;
-
-      case IMM:
-      case ATTR:
-      case UNIFORM:
-         unreachable("not reached");
-      }
-
-      dst = reg;
-   }
-}
-
-static bool
-stage_uses_interleaved_attributes(unsigned stage,
-                                  enum intel_shader_dispatch_mode dispatch_mode)
-{
-   switch (stage) {
-   case MESA_SHADER_TESS_EVAL:
-      return true;
-   case MESA_SHADER_GEOMETRY:
-      return dispatch_mode != INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT;
-   default:
-      return false;
-   }
-}
-
-/**
- * Get the closest native SIMD width supported by the hardware for instruction
- * \p inst.  The instruction will be left untouched by
- * vec4_visitor::lower_simd_width() if the returned value matches the
- * instruction's original execution size.
- */
-static unsigned
-get_lowered_simd_width(const struct intel_device_info *devinfo,
-                       enum intel_shader_dispatch_mode dispatch_mode,
-                       unsigned stage, const vec4_instruction *inst)
-{
-   /* Do not split some instructions that require special handling */
-   switch (inst->opcode) {
-   case SHADER_OPCODE_GFX4_SCRATCH_READ:
-   case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
-      return inst->exec_size;
-   default:
-      break;
-   }
-
-   unsigned lowered_width = MIN2(16, inst->exec_size);
-
-   /* We need to split some cases of double-precision instructions that write
-    * 2 registers. We only need to care about this in gfx7 because that is the
-    * only hardware that implements fp64 in Align16.
-    */
-   if (devinfo->ver == 7 && inst->size_written > REG_SIZE) {
-      /* Align16 8-wide double-precision SEL does not work well. Verified
-       * empirically.
-       */
-      if (inst->opcode == BRW_OPCODE_SEL && type_sz(inst->dst.type) == 8)
-         lowered_width = MIN2(lowered_width, 4);
-
-      /* HSW PRM, 3D Media GPGPU Engine, Region Alignment Rules for Direct
-       * Register Addressing:
-       *
-       *    "When destination spans two registers, the source MUST span two
-       *     registers."
-       */
-      for (unsigned i = 0; i < 3; i++) {
-         if (inst->src[i].file == BAD_FILE)
-            continue;
-         if (inst->size_read(i) <= REG_SIZE)
-            lowered_width = MIN2(lowered_width, 4);
-
-         /* Interleaved attribute setups use a vertical stride of 0, which
-          * makes them hit the associated instruction decompression bug in gfx7.
-          * Split them to prevent this.
-          */
-         if (inst->src[i].file == ATTR &&
-             stage_uses_interleaved_attributes(stage, dispatch_mode))
-            lowered_width = MIN2(lowered_width, 4);
-      }
-   }
-
-   /* IvyBridge can manage a maximum of 4 DFs per SIMD4x2 instruction, since
-    * it doesn't support compression in Align16 mode, no matter if it has
-    * force_writemask_all enabled or disabled (the latter is affected by the
-    * compressed instruction bug in gfx7, which is another reason to enforce
-    * this limit).
-    */
-   if (devinfo->verx10 == 70 &&
-       (get_exec_type_size(inst) == 8 || type_sz(inst->dst.type) == 8))
-      lowered_width = MIN2(lowered_width, 4);
-
-   return lowered_width;
-}
-
-static bool
-dst_src_regions_overlap(vec4_instruction *inst)
-{
-   if (inst->size_written == 0)
-      return false;
-
-   unsigned dst_start = inst->dst.offset;
-   unsigned dst_end = dst_start + inst->size_written - 1;
-   for (int i = 0; i < 3; i++) {
-      if (inst->src[i].file == BAD_FILE)
-         continue;
-
-      if (inst->dst.file != inst->src[i].file ||
-          inst->dst.nr != inst->src[i].nr)
-         continue;
-
-      unsigned src_start = inst->src[i].offset;
-      unsigned src_end = src_start + inst->size_read(i) - 1;
-
-      if ((dst_start >= src_start && dst_start <= src_end) ||
-          (dst_end >= src_start && dst_end <= src_end) ||
-          (dst_start <= src_start && dst_end >= src_end)) {
-         return true;
-      }
-   }
-
-   return false;
-}
-
-bool
-vec4_visitor::lower_simd_width()
-{
-   bool progress = false;
-
-   foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
-      const unsigned lowered_width =
-         get_lowered_simd_width(devinfo, prog_data->dispatch_mode, stage, inst);
-      assert(lowered_width <= inst->exec_size);
-      if (lowered_width == inst->exec_size)
-         continue;
-
-      /* We need to deal with source / destination overlaps when splitting.
-       * The hardware supports reading from and writing to the same register
-       * in the same instruction, but we need to be careful that each split
-       * instruction we produce does not corrupt the source of the next.
-       *
-       * The easiest way to handle this is to make the split instructions write
-       * to temporaries if there is an src/dst overlap and then move from the
-       * temporaries to the original destination. We also need to consider
-       * instructions that do partial writes via align1 opcodes, in which case
-       * we need to make sure that the we initialize the temporary with the
-       * value of the instruction's dst.
-       */
-      bool needs_temp = dst_src_regions_overlap(inst);
-      for (unsigned n = 0; n < inst->exec_size / lowered_width; n++)  {
-         unsigned channel_offset = lowered_width * n;
-
-         unsigned size_written = lowered_width * type_sz(inst->dst.type);
-
-         /* Create the split instruction from the original so that we copy all
-          * relevant instruction fields, then set the width and calculate the
-          * new dst/src regions.
-          */
-         vec4_instruction *linst = new(mem_ctx) vec4_instruction(*inst);
-         linst->exec_size = lowered_width;
-         linst->group = channel_offset;
-         linst->size_written = size_written;
-
-         /* Compute split dst region */
-         dst_reg dst;
-         if (needs_temp) {
-            unsigned num_regs = DIV_ROUND_UP(size_written, REG_SIZE);
-            dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)),
-                         inst->dst.type);
-            if (inst->is_align1_partial_write()) {
-               vec4_instruction *copy = MOV(dst, src_reg(inst->dst));
-               copy->exec_size = lowered_width;
-               copy->group = channel_offset;
-               copy->size_written = size_written;
-               inst->insert_before(block, copy);
-            }
-         } else {
-            dst = horiz_offset(inst->dst, channel_offset);
-         }
-         linst->dst = dst;
-
-         /* Compute split source regions */
-         for (int i = 0; i < 3; i++) {
-            if (linst->src[i].file == BAD_FILE)
-               continue;
-
-            bool is_interleaved_attr =
-               linst->src[i].file == ATTR &&
-               stage_uses_interleaved_attributes(stage,
-                                                 prog_data->dispatch_mode);
-
-            if (!is_uniform(linst->src[i]) && !is_interleaved_attr)
-               linst->src[i] = horiz_offset(linst->src[i], channel_offset);
-         }
-
-         inst->insert_before(block, linst);
-
-         /* If we used a temporary to store the result of the split
-          * instruction, copy the result to the original destination
-          */
-         if (needs_temp) {
-            vec4_instruction *mov =
-               MOV(offset(inst->dst, lowered_width, n), src_reg(dst));
-            mov->exec_size = lowered_width;
-            mov->group = channel_offset;
-            mov->size_written = size_written;
-            mov->predicate = inst->predicate;
-            inst->insert_before(block, mov);
-         }
-      }
-
-      inst->remove(block);
-      progress = true;
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
-
-   return progress;
-}
-
-static brw_predicate
-scalarize_predicate(brw_predicate predicate, unsigned writemask)
-{
-   if (predicate != BRW_PREDICATE_NORMAL)
-      return predicate;
-
-   switch (writemask) {
-   case WRITEMASK_X:
-      return BRW_PREDICATE_ALIGN16_REPLICATE_X;
-   case WRITEMASK_Y:
-      return BRW_PREDICATE_ALIGN16_REPLICATE_Y;
-   case WRITEMASK_Z:
-      return BRW_PREDICATE_ALIGN16_REPLICATE_Z;
-   case WRITEMASK_W:
-      return BRW_PREDICATE_ALIGN16_REPLICATE_W;
-   default:
-      unreachable("invalid writemask");
-   }
-}
-
-/* Gfx7 has a hardware decompression bug that we can exploit to represent
- * handful of additional swizzles natively.
- */
-static bool
-is_gfx7_supported_64bit_swizzle(vec4_instruction *inst, unsigned arg)
-{
-   switch (inst->src[arg].swizzle) {
-   case BRW_SWIZZLE_XXXX:
-   case BRW_SWIZZLE_YYYY:
-   case BRW_SWIZZLE_ZZZZ:
-   case BRW_SWIZZLE_WWWW:
-   case BRW_SWIZZLE_XYXY:
-   case BRW_SWIZZLE_YXYX:
-   case BRW_SWIZZLE_ZWZW:
-   case BRW_SWIZZLE_WZWZ:
-      return true;
-   default:
-      return false;
-   }
-}
-
-/* 64-bit sources use regions with a width of 2. These 2 elements in each row
- * can be addressed using 32-bit swizzles (which is what the hardware supports)
- * but it also means that the swizzle we apply on the first two components of a
- * dvec4 is coupled with the swizzle we use for the last 2. In other words,
- * only some specific swizzle combinations can be natively supported.
- *
- * FIXME: we can go an step further and implement even more swizzle
- *        variations using only partial scalarization.
- *
- * For more details see:
- * https://bugs.freedesktop.org/show_bug.cgi?id=92760#c82
- */
-bool
-vec4_visitor::is_supported_64bit_region(vec4_instruction *inst, unsigned arg)
-{
-   const src_reg &src = inst->src[arg];
-   assert(type_sz(src.type) == 8);
-
-   /* Uniform regions have a vstride=0. Because we use 2-wide rows with
-    * 64-bit regions it means that we cannot access components Z/W, so
-    * return false for any such case. Interleaved attributes will also be
-    * mapped to GRF registers with a vstride of 0, so apply the same
-    * treatment.
-    */
-   if ((is_uniform(src) ||
-        (stage_uses_interleaved_attributes(stage, prog_data->dispatch_mode) &&
-         src.file == ATTR)) &&
-       (brw_mask_for_swizzle(src.swizzle) & 12))
-      return false;
-
-   switch (src.swizzle) {
-   case BRW_SWIZZLE_XYZW:
-   case BRW_SWIZZLE_XXZZ:
-   case BRW_SWIZZLE_YYWW:
-   case BRW_SWIZZLE_YXWZ:
-      return true;
-   default:
-      return devinfo->ver == 7 && is_gfx7_supported_64bit_swizzle(inst, arg);
-   }
-}
-
-bool
-vec4_visitor::scalarize_df()
-{
-   bool progress = false;
-
-   foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
-      /* Skip DF instructions that operate in Align1 mode */
-      if (is_align1_df(inst))
-         continue;
-
-      /* Check if this is a double-precision instruction */
-      bool is_double = type_sz(inst->dst.type) == 8;
-      for (int arg = 0; !is_double && arg < 3; arg++) {
-         is_double = inst->src[arg].file != BAD_FILE &&
-                     type_sz(inst->src[arg].type) == 8;
-      }
-
-      if (!is_double)
-         continue;
-
-      /* Skip the lowering for specific regioning scenarios that we can
-       * support natively.
-       */
-      bool skip_lowering = true;
-
-      /* XY and ZW writemasks operate in 32-bit, which means that they don't
-       * have a native 64-bit representation and they should always be split.
-       */
-      if (inst->dst.writemask == WRITEMASK_XY ||
-          inst->dst.writemask == WRITEMASK_ZW) {
-         skip_lowering = false;
-      } else {
-         for (unsigned i = 0; i < 3; i++) {
-            if (inst->src[i].file == BAD_FILE || type_sz(inst->src[i].type) < 8)
-               continue;
-            skip_lowering = skip_lowering && is_supported_64bit_region(inst, i);
-         }
-      }
-
-      if (skip_lowering)
-         continue;
-
-      /* Generate scalar instructions for each enabled channel */
-      for (unsigned chan = 0; chan < 4; chan++) {
-         unsigned chan_mask = 1 << chan;
-         if (!(inst->dst.writemask & chan_mask))
-            continue;
-
-         vec4_instruction *scalar_inst = new(mem_ctx) vec4_instruction(*inst);
-
-         for (unsigned i = 0; i < 3; i++) {
-            unsigned swz = BRW_GET_SWZ(inst->src[i].swizzle, chan);
-            scalar_inst->src[i].swizzle = BRW_SWIZZLE4(swz, swz, swz, swz);
-         }
-
-         scalar_inst->dst.writemask = chan_mask;
-
-         if (inst->predicate != BRW_PREDICATE_NONE) {
-            scalar_inst->predicate =
-               scalarize_predicate(inst->predicate, chan_mask);
-         }
-
-         inst->insert_before(block, scalar_inst);
-      }
-
-      inst->remove(block);
-      progress = true;
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
-
-   return progress;
-}
-
-bool
-vec4_visitor::lower_64bit_mad_to_mul_add()
-{
-   bool progress = false;
-
-   foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
-      if (inst->opcode != BRW_OPCODE_MAD)
-         continue;
-
-      if (type_sz(inst->dst.type) != 8)
-         continue;
-
-      dst_reg mul_dst = dst_reg(this, glsl_dvec4_type());
-
-      /* Use the copy constructor so we copy all relevant instruction fields
-       * from the original mad into the add and mul instructions
-       */
-      vec4_instruction *mul = new(mem_ctx) vec4_instruction(*inst);
-      mul->opcode = BRW_OPCODE_MUL;
-      mul->dst = mul_dst;
-      mul->src[0] = inst->src[1];
-      mul->src[1] = inst->src[2];
-      mul->src[2].file = BAD_FILE;
-
-      vec4_instruction *add = new(mem_ctx) vec4_instruction(*inst);
-      add->opcode = BRW_OPCODE_ADD;
-      add->src[0] = src_reg(mul_dst);
-      add->src[1] = inst->src[0];
-      add->src[2].file = BAD_FILE;
-
-      inst->insert_before(block, mul);
-      inst->insert_before(block, add);
-      inst->remove(block);
-
-      progress = true;
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
-
-   return progress;
-}
-
-/* The align16 hardware can only do 32-bit swizzle channels, so we need to
- * translate the logical 64-bit swizzle channels that we use in the Vec4 IR
- * to 32-bit swizzle channels in hardware registers.
- *
- * @inst and @arg identify the original vec4 IR source operand we need to
- * translate the swizzle for and @hw_reg is the hardware register where we
- * will write the hardware swizzle to use.
- *
- * This pass assumes that Align16/DF instructions have been fully scalarized
- * previously so there is just one 64-bit swizzle channel to deal with for any
- * given Vec4 IR source.
- */
-void
-vec4_visitor::apply_logical_swizzle(struct brw_reg *hw_reg,
-                                    vec4_instruction *inst, int arg)
-{
-   src_reg reg = inst->src[arg];
-
-   if (reg.file == BAD_FILE || reg.file == BRW_IMMEDIATE_VALUE)
-      return;
-
-   /* If this is not a 64-bit operand or this is a scalar instruction we don't
-    * need to do anything about the swizzles.
-    */
-   if(type_sz(reg.type) < 8 || is_align1_df(inst)) {
-      hw_reg->swizzle = reg.swizzle;
-      return;
-   }
-
-   /* Take the 64-bit logical swizzle channel and translate it to 32-bit */
-   assert(brw_is_single_value_swizzle(reg.swizzle) ||
-          is_supported_64bit_region(inst, arg));
-
-   /* Apply the region <2, 2, 1> for GRF or <0, 2, 1> for uniforms, as align16
-    * HW can only do 32-bit swizzle channels.
-    */
-   hw_reg->width = BRW_WIDTH_2;
-
-   if (is_supported_64bit_region(inst, arg) &&
-       !is_gfx7_supported_64bit_swizzle(inst, arg)) {
-      /* Supported 64-bit swizzles are those such that their first two
-       * components, when expanded to 32-bit swizzles, match the semantics
-       * of the original 64-bit swizzle with 2-wide row regioning.
-       */
-      unsigned swizzle0 = BRW_GET_SWZ(reg.swizzle, 0);
-      unsigned swizzle1 = BRW_GET_SWZ(reg.swizzle, 1);
-      hw_reg->swizzle = BRW_SWIZZLE4(swizzle0 * 2, swizzle0 * 2 + 1,
-                                     swizzle1 * 2, swizzle1 * 2 + 1);
-   } else {
-      /* If we got here then we have one of the following:
-       *
-       * 1. An unsupported swizzle, which should be single-value thanks to the
-       *    scalarization pass.
-       *
-       * 2. A gfx7 supported swizzle. These can be single-value or double-value
-       *    swizzles. If the latter, they are never cross-dvec2 channels. For
-       *    these we always need to activate the gfx7 vstride=0 exploit.
-       */
-      unsigned swizzle0 = BRW_GET_SWZ(reg.swizzle, 0);
-      unsigned swizzle1 = BRW_GET_SWZ(reg.swizzle, 1);
-      assert((swizzle0 < 2) == (swizzle1 < 2));
-
-      /* To gain access to Z/W components we need to select the second half
-       * of the register and then use a X/Y swizzle to select Z/W respectively.
-       */
-      if (swizzle0 >= 2) {
-         *hw_reg = suboffset(*hw_reg, 2);
-         swizzle0 -= 2;
-         swizzle1 -= 2;
-      }
-
-      /* All gfx7-specific supported swizzles require the vstride=0 exploit */
-      if (devinfo->ver == 7 && is_gfx7_supported_64bit_swizzle(inst, arg))
-         hw_reg->vstride = BRW_VERTICAL_STRIDE_0;
-
-      /* Any 64-bit source with an offset at 16B is intended to address the
-       * second half of a register and needs a vertical stride of 0 so we:
-       *
-       * 1. Don't violate register region restrictions.
-       * 2. Activate the gfx7 instruction decompression bug exploit when
-       *    execsize > 4
-       */
-      if (hw_reg->subnr % REG_SIZE == 16) {
-         assert(devinfo->ver == 7);
-         hw_reg->vstride = BRW_VERTICAL_STRIDE_0;
-      }
-
-      hw_reg->swizzle = BRW_SWIZZLE4(swizzle0 * 2, swizzle0 * 2 + 1,
-                                     swizzle1 * 2, swizzle1 * 2 + 1);
-   }
-}
-
-void
-vec4_visitor::invalidate_analysis(brw::analysis_dependency_class c)
-{
-   backend_shader::invalidate_analysis(c);
-   live_analysis.invalidate(c);
-}
-
-bool
-vec4_visitor::run()
-{
-   setup_push_ranges();
-
-   if (prog_data->base.zero_push_reg) {
-      /* push_reg_mask_param is in uint32 params and UNIFORM is in vec4s */
-      const unsigned mask_param = stage_prog_data->push_reg_mask_param;
-      src_reg mask = src_reg(dst_reg(UNIFORM, mask_param / 4));
-      assert(mask_param % 2 == 0); /* Should be 64-bit-aligned */
-      mask.swizzle = BRW_SWIZZLE4((mask_param + 0) % 4,
-                                  (mask_param + 1) % 4,
-                                  (mask_param + 0) % 4,
-                                  (mask_param + 1) % 4);
-
-      emit(VEC4_OPCODE_ZERO_OOB_PUSH_REGS,
-           dst_reg(VGRF, alloc.allocate(3)), mask);
-   }
-
-   emit_prolog();
-
-   emit_nir_code();
-   if (failed)
-      return false;
-   base_ir = NULL;
-
-   emit_thread_end();
-
-   calculate_cfg();
-   cfg->validate(_mesa_shader_stage_to_abbrev(stage));
-
-   /* Before any optimization, push array accesses out to scratch
-    * space where we need them to be.  This pass may allocate new
-    * virtual GRFs, so we want to do it early.  It also makes sure
-    * that we have reladdr computations available for CSE, since we'll
-    * often do repeated subexpressions for those.
-    */
-   move_grf_array_access_to_scratch();
-   split_uniform_registers();
-
-   split_virtual_grfs();
-
-#define OPT(pass, args...) ({                                          \
-      pass_num++;                                                      \
-      bool this_progress = pass(args);                                 \
-                                                                       \
-      if (INTEL_DEBUG(DEBUG_OPTIMIZER) && this_progress) {             \
-         char filename[64];                                            \
-         snprintf(filename, 64, "%s-%s-%02d-%02d-" #pass,              \
-                  _mesa_shader_stage_to_abbrev(stage),                 \
-                  nir->info.name, iteration, pass_num);                \
-                                                                       \
-         backend_shader::dump_instructions(filename);                  \
-      }                                                                \
-                                                                       \
-      cfg->validate(_mesa_shader_stage_to_abbrev(stage));              \
-      progress = progress || this_progress;                            \
-      this_progress;                                                   \
-   })
-
-
-   if (INTEL_DEBUG(DEBUG_OPTIMIZER)) {
-      char filename[64];
-      snprintf(filename, 64, "%s-%s-00-00-start",
-               _mesa_shader_stage_to_abbrev(stage), nir->info.name);
-
-      backend_shader::dump_instructions(filename);
-   }
-
-   bool progress;
-   int iteration = 0;
-   int pass_num = 0;
-   do {
-      progress = false;
-      pass_num = 0;
-      iteration++;
-
-      OPT(opt_predicated_break, *this);
-      OPT(opt_reduce_swizzle);
-      OPT(dead_code_eliminate);
-      OPT(dead_control_flow_eliminate, *this);
-      OPT(opt_copy_propagation);
-      OPT(opt_cmod_propagation);
-      OPT(opt_cse);
-      OPT(opt_algebraic);
-      OPT(opt_register_coalesce);
-      OPT(eliminate_find_live_channel);
-   } while (progress);
-
-   pass_num = 0;
-
-   if (OPT(opt_vector_float)) {
-      OPT(opt_cse);
-      OPT(opt_copy_propagation, false);
-      OPT(opt_copy_propagation, true);
-      OPT(dead_code_eliminate);
-   }
-
-   if (devinfo->ver <= 5 && OPT(lower_minmax)) {
-      OPT(opt_cmod_propagation);
-      OPT(opt_cse);
-      OPT(opt_copy_propagation);
-      OPT(dead_code_eliminate);
-   }
-
-   if (OPT(lower_simd_width)) {
-      OPT(opt_copy_propagation);
-      OPT(dead_code_eliminate);
-   }
-
-   if (failed)
-      return false;
-
-   OPT(lower_64bit_mad_to_mul_add);
-
-   /* Run this before payload setup because tessellation shaders
-    * rely on it to prevent cross dvec2 regioning on DF attributes
-    * that are setup so that XY are on the second half of register and
-    * ZW are in the first half of the next.
-    */
-   OPT(scalarize_df);
-
-   setup_payload();
-
-   if (INTEL_DEBUG(DEBUG_SPILL_VEC4)) {
-      /* Debug of register spilling: Go spill everything. */
-      const int grf_count = alloc.count;
-      float spill_costs[alloc.count];
-      bool no_spill[alloc.count];
-      evaluate_spill_costs(spill_costs, no_spill);
-      for (int i = 0; i < grf_count; i++) {
-         if (no_spill[i])
-            continue;
-         spill_reg(i);
-      }
-
-      /* We want to run this after spilling because 64-bit (un)spills need to
-       * emit code to shuffle 64-bit data for the 32-bit scratch read/write
-       * messages that can produce unsupported 64-bit swizzle regions.
-       */
-      OPT(scalarize_df);
-   }
-
-   fixup_3src_null_dest();
-
-   bool allocated_without_spills = reg_allocate();
-
-   if (!allocated_without_spills) {
-      brw_shader_perf_log(compiler, log_data,
-                          "%s shader triggered register spilling.  "
-                          "Try reducing the number of live vec4 values "
-                          "to improve performance.\n",
-                          _mesa_shader_stage_to_string(stage));
-
-      while (!reg_allocate()) {
-         if (failed)
-            return false;
-      }
-
-      /* We want to run this after spilling because 64-bit (un)spills need to
-       * emit code to shuffle 64-bit data for the 32-bit scratch read/write
-       * messages that can produce unsupported 64-bit swizzle regions.
-       */
-      OPT(scalarize_df);
-   }
-
-   opt_schedule_instructions();
-
-   opt_set_dependency_control();
-
-   convert_to_hw_regs();
-
-   if (last_scratch > 0) {
-      prog_data->base.total_scratch =
-         brw_get_scratch_size(last_scratch * REG_SIZE);
-   }
-
-   return !failed;
-}
-
-} /* namespace brw */
-
diff --git a/src/intel/compiler/brw_vec4.h b/src/intel/compiler/brw_vec4.h
deleted file mode 100644
index ca803386309..00000000000
--- a/src/intel/compiler/brw_vec4.h
+++ /dev/null
@@ -1,350 +0,0 @@
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_VEC4_H
-#define BRW_VEC4_H
-
-#include "brw_shader.h"
-
-#ifdef __cplusplus
-#include "brw_ir_vec4.h"
-#include "brw_ir_performance.h"
-#include "brw_vec4_builder.h"
-#include "brw_vec4_live_variables.h"
-#endif
-
-#include "compiler/glsl/ir.h"
-#include "compiler/nir/nir.h"
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-const unsigned *
-brw_vec4_generate_assembly(const struct brw_compiler *compiler,
-                           const struct brw_compile_params *params,
-                           const nir_shader *nir,
-                           struct brw_vue_prog_data *prog_data,
-                           const struct cfg_t *cfg,
-                           const brw::performance &perf,
-                           bool debug_enabled);
-
-#ifdef __cplusplus
-} /* extern "C" */
-
-namespace brw {
-/**
- * The vertex shader front-end.
- *
- * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
- * fixed-function) into VS IR.
- */
-class vec4_visitor : public backend_shader
-{
-public:
-   vec4_visitor(const struct brw_compiler *compiler,
-                const struct brw_compile_params *params,
-                const struct brw_sampler_prog_key_data *key,
-                struct brw_vue_prog_data *prog_data,
-                const nir_shader *shader,
-                bool no_spills,
-                bool debug_enabled);
-
-   dst_reg dst_null_f()
-   {
-      return dst_reg(brw_null_reg());
-   }
-
-   dst_reg dst_null_df()
-   {
-      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
-   }
-
-   dst_reg dst_null_d()
-   {
-      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   }
-
-   dst_reg dst_null_ud()
-   {
-      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
-   }
-
-   const struct brw_sampler_prog_key_data * const key_tex;
-   struct brw_vue_prog_data * const prog_data;
-   char *fail_msg;
-   bool failed;
-
-   /**
-    * GLSL IR currently being processed, which is associated with our
-    * driver IR instructions for debugging purposes.
-    */
-   const void *base_ir;
-   const char *current_annotation;
-
-   int first_non_payload_grf;
-   unsigned ubo_push_start[4];
-   unsigned push_length;
-   unsigned int max_grf;
-   brw_analysis<brw::vec4_live_variables, backend_shader> live_analysis;
-   brw_analysis<brw::performance, vec4_visitor> performance_analysis;
-
-   /* Regs for vertex results.  Generated at ir_variable visiting time
-    * for the ir->location's used.
-    */
-   dst_reg output_reg[VARYING_SLOT_TESS_MAX][4];
-   unsigned output_num_components[VARYING_SLOT_TESS_MAX][4];
-   const char *output_reg_annotation[VARYING_SLOT_TESS_MAX];
-   int uniforms;
-
-   bool run();
-   void fail(const char *msg, ...);
-
-   int setup_uniforms(int payload_reg);
-
-   bool reg_allocate_trivial();
-   bool reg_allocate();
-   void evaluate_spill_costs(float *spill_costs, bool *no_spill);
-   int choose_spill_reg(struct ra_graph *g);
-   void spill_reg(unsigned spill_reg);
-   void move_grf_array_access_to_scratch();
-   void split_uniform_registers();
-   void setup_push_ranges();
-   virtual void invalidate_analysis(brw::analysis_dependency_class c);
-   void split_virtual_grfs();
-   bool opt_vector_float();
-   bool opt_reduce_swizzle();
-   bool dead_code_eliminate();
-   bool opt_cmod_propagation();
-   bool opt_copy_propagation(bool do_constant_prop = true);
-   bool opt_cse_local(bblock_t *block, const vec4_live_variables &live);
-   bool opt_cse();
-   bool opt_algebraic();
-   bool opt_register_coalesce();
-   bool eliminate_find_live_channel();
-   bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
-   void opt_set_dependency_control();
-   void opt_schedule_instructions();
-   void convert_to_hw_regs();
-   void fixup_3src_null_dest();
-
-   bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
-   bool lower_simd_width();
-   bool scalarize_df();
-   bool lower_64bit_mad_to_mul_add();
-   void apply_logical_swizzle(struct brw_reg *hw_reg,
-                              vec4_instruction *inst, int arg);
-
-   vec4_instruction *emit(vec4_instruction *inst);
-
-   vec4_instruction *emit(enum opcode opcode);
-   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
-   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
-                          const src_reg &src0);
-   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
-                          const src_reg &src0, const src_reg &src1);
-   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
-                          const src_reg &src0, const src_reg &src1,
-                          const src_reg &src2);
-
-   vec4_instruction *emit_before(bblock_t *block,
-                                 vec4_instruction *inst,
-				 vec4_instruction *new_inst);
-
-#define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
-#define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
-#define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
-   EMIT1(MOV)
-   EMIT1(NOT)
-   EMIT1(RNDD)
-   EMIT1(RNDE)
-   EMIT1(RNDZ)
-   EMIT1(FRC)
-   EMIT1(F32TO16)
-   EMIT1(F16TO32)
-   EMIT2(ADD)
-   EMIT2(MUL)
-   EMIT2(MACH)
-   EMIT2(MAC)
-   EMIT2(AND)
-   EMIT2(OR)
-   EMIT2(XOR)
-   EMIT2(DP3)
-   EMIT2(DP4)
-   EMIT2(DPH)
-   EMIT2(SHL)
-   EMIT2(SHR)
-   EMIT2(ASR)
-   vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
-			 enum brw_conditional_mod condition);
-   vec4_instruction *IF(src_reg src0, src_reg src1,
-                        enum brw_conditional_mod condition);
-   vec4_instruction *IF(enum brw_predicate predicate);
-   EMIT1(SCRATCH_READ)
-   EMIT2(SCRATCH_WRITE)
-   EMIT3(LRP)
-   EMIT1(BFREV)
-   EMIT3(BFE)
-   EMIT2(BFI1)
-   EMIT3(BFI2)
-   EMIT1(FBH)
-   EMIT1(FBL)
-   EMIT1(CBIT)
-   EMIT1(LZD)
-   EMIT3(MAD)
-   EMIT2(ADDC)
-   EMIT2(SUBB)
-   EMIT1(DIM)
-
-#undef EMIT1
-#undef EMIT2
-#undef EMIT3
-
-   vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
-                                 src_reg src0, src_reg src1);
-
-   /**
-    * Copy any live channel from \p src to the first channel of the
-    * result.
-    */
-   src_reg emit_uniformize(const src_reg &src);
-
-   /** Fix all float operands of a 3-source instruction. */
-   void fix_float_operands(src_reg op[3], nir_alu_instr *instr);
-
-   src_reg fix_3src_operand(const src_reg &src);
-
-   vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
-                               const src_reg &src1 = src_reg());
-
-   src_reg fix_math_operand(const src_reg &src);
-
-   void emit_pack_half_2x16(dst_reg dst, src_reg src0);
-   void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
-   void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
-   void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
-   void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
-   void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
-
-   src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
-                          src_reg surface);
-
-   void emit_ndc_computation();
-   void emit_psiz_and_flags(dst_reg reg);
-   vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
-   virtual void emit_urb_slot(dst_reg reg, int varying);
-
-   src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
-			      src_reg *reladdr, int reg_offset);
-   void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
-			  dst_reg dst,
-			  src_reg orig_src,
-			  int base_offset);
-   void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
-			   int base_offset);
-   void emit_pull_constant_load_reg(dst_reg dst,
-                                    src_reg surf_index,
-                                    src_reg offset,
-                                    bblock_t *before_block,
-                                    vec4_instruction *before_inst);
-   src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
-                                vec4_instruction *inst, src_reg src);
-
-   void resolve_ud_negate(src_reg *reg);
-
-   void emit_shader_float_controls_execution_mode();
-
-   bool lower_minmax();
-
-   src_reg get_timestamp();
-
-   virtual void dump_instruction_to_file(const backend_instruction *inst, FILE *file) const;
-
-   bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate);
-
-   void emit_conversion_from_double(dst_reg dst, src_reg src);
-   void emit_conversion_to_double(dst_reg dst, src_reg src);
-
-   vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
-                                        bool for_write,
-                                        bool for_scratch = false,
-                                        bblock_t *block = NULL,
-                                        vec4_instruction *ref = NULL);
-
-   virtual void emit_nir_code();
-   virtual void nir_setup_uniforms();
-   virtual void nir_emit_impl(nir_function_impl *impl);
-   virtual void nir_emit_cf_list(exec_list *list);
-   virtual void nir_emit_if(nir_if *if_stmt);
-   virtual void nir_emit_loop(nir_loop *loop);
-   virtual void nir_emit_block(nir_block *block);
-   virtual void nir_emit_instr(nir_instr *instr);
-   virtual void nir_emit_load_const(nir_load_const_instr *instr);
-   src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr);
-   virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
-   virtual void nir_emit_alu(nir_alu_instr *instr);
-   virtual void nir_emit_jump(nir_jump_instr *instr);
-   virtual void nir_emit_texture(nir_tex_instr *instr);
-   virtual void nir_emit_undef(nir_undef_instr *instr);
-   virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
-
-   dst_reg get_nir_def(const nir_def &def, enum brw_reg_type type);
-   dst_reg get_nir_def(const nir_def &def, nir_alu_type type);
-   dst_reg get_nir_def(const nir_def &def);
-   src_reg get_nir_src(const nir_src &src, enum brw_reg_type type,
-                       unsigned num_components = 4);
-   src_reg get_nir_src(const nir_src &src, nir_alu_type type,
-                       unsigned num_components = 4);
-   src_reg get_nir_src(const nir_src &src,
-                       unsigned num_components = 4);
-   src_reg get_nir_src_imm(const nir_src &src);
-   src_reg get_indirect_offset(nir_intrinsic_instr *instr);
-
-   dst_reg *nir_ssa_values;
-
-protected:
-   void emit_vertex();
-   void setup_payload_interference(struct ra_graph *g, int first_payload_node,
-                                   int reg_node_count);
-   virtual void setup_payload() = 0;
-   virtual void emit_prolog() = 0;
-   virtual void emit_thread_end() = 0;
-   virtual void emit_urb_write_header(int mrf) = 0;
-   virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
-   virtual void gs_emit_vertex(int stream_id);
-   virtual void gs_end_primitive();
-
-private:
-   /**
-    * If true, then register allocation should fail instead of spilling.
-    */
-   const bool no_spills;
-
-   unsigned last_scratch; /**< measured in 32-byte (register size) units */
-};
-
-} /* namespace brw */
-#endif /* __cplusplus */
-
-#endif /* BRW_VEC4_H */
diff --git a/src/intel/compiler/brw_vec4_builder.h b/src/intel/compiler/brw_vec4_builder.h
deleted file mode 100644
index 322a6aae20b..00000000000
--- a/src/intel/compiler/brw_vec4_builder.h
+++ /dev/null
@@ -1,646 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright © 2010-2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_VEC4_BUILDER_H
-#define BRW_VEC4_BUILDER_H
-
-#include "brw_ir_vec4.h"
-#include "brw_ir_allocator.h"
-
-namespace brw {
-   /**
-    * Toolbox to assemble a VEC4 IR program out of individual instructions.
-    *
-    * This object is meant to have an interface consistent with
-    * brw::fs_builder.  They cannot be fully interchangeable because
-    * brw::fs_builder generates scalar code while brw::vec4_builder generates
-    * vector code.
-    */
-   class vec4_builder {
-   public:
-      /** Type used in this IR to represent a source of an instruction. */
-      typedef brw::src_reg src_reg;
-
-      /** Type used in this IR to represent the destination of an instruction. */
-      typedef brw::dst_reg dst_reg;
-
-      /** Type used in this IR to represent an instruction. */
-      typedef vec4_instruction instruction;
-
-      /**
-       * Construct a vec4_builder that inserts instructions into \p shader.
-       */
-      vec4_builder(backend_shader *shader, unsigned dispatch_width = 8) :
-         shader(shader), block(NULL), cursor(NULL),
-         _dispatch_width(dispatch_width), _group(0),
-         force_writemask_all(false),
-         annotation()
-      {
-      }
-
-      /**
-       * Construct a vec4_builder that inserts instructions into \p shader
-       * before instruction \p inst in basic block \p block.  The default
-       * execution controls and debug annotation are initialized from the
-       * instruction passed as argument.
-       */
-      vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) :
-         shader(shader), block(block), cursor(inst),
-         _dispatch_width(inst->exec_size), _group(inst->group),
-         force_writemask_all(inst->force_writemask_all)
-      {
-         annotation.str = inst->annotation;
-         annotation.ir = inst->ir;
-      }
-
-      /**
-       * Construct a vec4_builder that inserts instructions before \p cursor
-       * in basic block \p block, inheriting other code generation parameters
-       * from this.
-       */
-      vec4_builder
-      at(bblock_t *block, exec_node *cursor) const
-      {
-         vec4_builder bld = *this;
-         bld.block = block;
-         bld.cursor = cursor;
-         return bld;
-      }
-
-      /**
-       * Construct a vec4_builder appending instructions at the end of the
-       * instruction list of the shader, inheriting other code generation
-       * parameters from this.
-       */
-      vec4_builder
-      at_end() const
-      {
-         return at(NULL, (exec_node *)&shader->instructions.tail_sentinel);
-      }
-
-      /**
-       * Construct a builder specifying the default SIMD width and group of
-       * channel enable signals, inheriting other code generation parameters
-       * from this.
-       *
-       * \p n gives the default SIMD width, \p i gives the slot group used for
-       * predication and control flow masking in multiples of \p n channels.
-       */
-      vec4_builder
-      group(unsigned n, unsigned i) const
-      {
-         assert(force_writemask_all ||
-                (n <= dispatch_width() && i < dispatch_width() / n));
-         vec4_builder bld = *this;
-         bld._dispatch_width = n;
-         bld._group += i * n;
-         return bld;
-      }
-
-      /**
-       * Construct a builder with per-channel control flow execution masking
-       * disabled if \p b is true.  If control flow execution masking is
-       * already disabled this has no effect.
-       */
-      vec4_builder
-      exec_all(bool b = true) const
-      {
-         vec4_builder bld = *this;
-         if (b)
-            bld.force_writemask_all = true;
-         return bld;
-      }
-
-      /**
-       * Construct a builder with the given debug annotation info.
-       */
-      vec4_builder
-      annotate(const char *str, const void *ir = NULL) const
-      {
-         vec4_builder bld = *this;
-         bld.annotation.str = str;
-         bld.annotation.ir = ir;
-         return bld;
-      }
-
-      /**
-       * Get the SIMD width in use.
-       */
-      unsigned
-      dispatch_width() const
-      {
-         return _dispatch_width;
-      }
-
-      /**
-       * Get the channel group in use.
-       */
-      unsigned
-      group() const
-      {
-         return _group;
-      }
-
-      /**
-       * Allocate a virtual register of natural vector size (four for this IR)
-       * and SIMD width.  \p n gives the amount of space to allocate in
-       * dispatch_width units (which is just enough space for four logical
-       * components in this IR).
-       */
-      dst_reg
-      vgrf(enum brw_reg_type type, unsigned n = 1) const
-      {
-         assert(dispatch_width() <= 32);
-
-         if (n > 0)
-            return retype(dst_reg(VGRF, shader->alloc.allocate(
-                                     n * DIV_ROUND_UP(type_sz(type), 4))),
-                           type);
-         else
-            return retype(null_reg_ud(), type);
-      }
-
-      /**
-       * Create a null register of floating type.
-       */
-      dst_reg
-      null_reg_f() const
-      {
-         return dst_reg(retype(brw_null_vec(dispatch_width()),
-                               BRW_REGISTER_TYPE_F));
-      }
-
-      /**
-       * Create a null register of signed integer type.
-       */
-      dst_reg
-      null_reg_d() const
-      {
-         return dst_reg(retype(brw_null_vec(dispatch_width()),
-                               BRW_REGISTER_TYPE_D));
-      }
-
-      /**
-       * Create a null register of unsigned integer type.
-       */
-      dst_reg
-      null_reg_ud() const
-      {
-         return dst_reg(retype(brw_null_vec(dispatch_width()),
-                               BRW_REGISTER_TYPE_UD));
-      }
-
-      /**
-       * Insert an instruction into the program.
-       */
-      instruction *
-      emit(const instruction &inst) const
-      {
-         return emit(new(shader->mem_ctx) instruction(inst));
-      }
-
-      /**
-       * Create and insert a nullary control instruction into the program.
-       */
-      instruction *
-      emit(enum opcode opcode) const
-      {
-         return emit(instruction(opcode));
-      }
-
-      /**
-       * Create and insert a nullary instruction into the program.
-       */
-      instruction *
-      emit(enum opcode opcode, const dst_reg &dst) const
-      {
-         return emit(instruction(opcode, dst));
-      }
-
-      /**
-       * Create and insert a unary instruction into the program.
-       */
-      instruction *
-      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
-      {
-         switch (opcode) {
-         case SHADER_OPCODE_RCP:
-         case SHADER_OPCODE_RSQ:
-         case SHADER_OPCODE_SQRT:
-         case SHADER_OPCODE_EXP2:
-         case SHADER_OPCODE_LOG2:
-         case SHADER_OPCODE_SIN:
-         case SHADER_OPCODE_COS:
-            return fix_math_instruction(
-               emit(instruction(opcode, dst,
-                                fix_math_operand(src0))));
-
-         default:
-            return emit(instruction(opcode, dst, src0));
-         }
-      }
-
-      /**
-       * Create and insert a binary instruction into the program.
-       */
-      instruction *
-      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
-           const src_reg &src1) const
-      {
-         switch (opcode) {
-         case SHADER_OPCODE_POW:
-         case SHADER_OPCODE_INT_QUOTIENT:
-         case SHADER_OPCODE_INT_REMAINDER:
-            return fix_math_instruction(
-               emit(instruction(opcode, dst,
-                                fix_math_operand(src0),
-                                fix_math_operand(src1))));
-
-         default:
-            return emit(instruction(opcode, dst, src0, src1));
-         }
-      }
-
-      /**
-       * Create and insert a ternary instruction into the program.
-       */
-      instruction *
-      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
-           const src_reg &src1, const src_reg &src2) const
-      {
-         switch (opcode) {
-         case BRW_OPCODE_BFE:
-         case BRW_OPCODE_BFI2:
-         case BRW_OPCODE_MAD:
-         case BRW_OPCODE_LRP:
-            return emit(instruction(opcode, dst,
-                                    fix_3src_operand(src0),
-                                    fix_3src_operand(src1),
-                                    fix_3src_operand(src2)));
-
-         default:
-            return emit(instruction(opcode, dst, src0, src1, src2));
-         }
-      }
-
-      /**
-       * Insert a preallocated instruction into the program.
-       */
-      instruction *
-      emit(instruction *inst) const
-      {
-         inst->exec_size = dispatch_width();
-         inst->group = group();
-         inst->force_writemask_all = force_writemask_all;
-         inst->size_written = inst->exec_size * type_sz(inst->dst.type);
-         inst->annotation = annotation.str;
-         inst->ir = annotation.ir;
-
-         if (block)
-            static_cast<instruction *>(cursor)->insert_before(block, inst);
-         else
-            cursor->insert_before(inst);
-
-         return inst;
-      }
-
-      /**
-       * Select \p src0 if the comparison of both sources with the given
-       * conditional mod evaluates to true, otherwise select \p src1.
-       *
-       * Generally useful to get the minimum or maximum of two values.
-       */
-      instruction *
-      emit_minmax(const dst_reg &dst, const src_reg &src0,
-                  const src_reg &src1, brw_conditional_mod mod) const
-      {
-         assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
-
-         return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
-                                     fix_unsigned_negate(src1)));
-      }
-
-      /**
-       * Copy any live channel from \p src to the first channel of the result.
-       */
-      src_reg
-      emit_uniformize(const src_reg &src) const
-      {
-         const vec4_builder ubld = exec_all();
-         const dst_reg chan_index =
-            writemask(vgrf(BRW_REGISTER_TYPE_UD), WRITEMASK_X);
-         const dst_reg dst = vgrf(src.type);
-
-         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
-         ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index));
-
-         return src_reg(dst);
-      }
-
-      /**
-       * Assorted arithmetic ops.
-       * @{
-       */
-#define ALU1(op)                                        \
-      instruction *                                     \
-      op(const dst_reg &dst, const src_reg &src0) const \
-      {                                                 \
-         return emit(BRW_OPCODE_##op, dst, src0);       \
-      }
-
-#define ALU2(op)                                                        \
-      instruction *                                                     \
-      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
-      {                                                                 \
-         return emit(BRW_OPCODE_##op, dst, src0, src1);                 \
-      }
-
-#define ALU2_ACC(op)                                                    \
-      instruction *                                                     \
-      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
-      {                                                                 \
-         instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1);    \
-         inst->writes_accumulator = true;                               \
-         return inst;                                                   \
-      }
-
-#define ALU3(op)                                                        \
-      instruction *                                                     \
-      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
-         const src_reg &src2) const                                     \
-      {                                                                 \
-         return emit(BRW_OPCODE_##op, dst, src0, src1, src2);           \
-      }
-
-      ALU2(ADD)
-      ALU2_ACC(ADDC)
-      ALU2(AND)
-      ALU2(ASR)
-      ALU2(AVG)
-      ALU3(BFE)
-      ALU2(BFI1)
-      ALU3(BFI2)
-      ALU1(BFREV)
-      ALU1(CBIT)
-      ALU3(CSEL)
-      ALU1(DIM)
-      ALU2(DP2)
-      ALU2(DP3)
-      ALU2(DP4)
-      ALU2(DPH)
-      ALU1(F16TO32)
-      ALU1(F32TO16)
-      ALU1(FBH)
-      ALU1(FBL)
-      ALU1(FRC)
-      ALU2(LINE)
-      ALU1(LZD)
-      ALU2(MAC)
-      ALU2_ACC(MACH)
-      ALU3(MAD)
-      ALU1(MOV)
-      ALU2(MUL)
-      ALU1(NOT)
-      ALU2(OR)
-      ALU2(PLN)
-      ALU1(RNDD)
-      ALU1(RNDE)
-      ALU1(RNDU)
-      ALU1(RNDZ)
-      ALU2(SAD2)
-      ALU2_ACC(SADA2)
-      ALU2(SEL)
-      ALU2(SHL)
-      ALU2(SHR)
-      ALU2_ACC(SUBB)
-      ALU2(XOR)
-
-#undef ALU3
-#undef ALU2_ACC
-#undef ALU2
-#undef ALU1
-      /** @} */
-
-      /**
-       * CMP: Sets the low bit of the destination channels with the result
-       * of the comparison, while the upper bits are undefined, and updates
-       * the flag register with the packed 16 bits of the result.
-       */
-      instruction *
-      CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
-          brw_conditional_mod condition) const
-      {
-         /* Take the instruction:
-          *
-          * CMP null<d> src0<f> src1<f>
-          *
-          * Original gfx4 does type conversion to the destination type
-          * before comparison, producing garbage results for floating
-          * point comparisons.
-          *
-          * The destination type doesn't matter on newer generations,
-          * so we set the type to match src0 so we can compact the
-          * instruction.
-          */
-         return set_condmod(condition,
-                            emit(BRW_OPCODE_CMP, retype(dst, src0.type),
-                                 fix_unsigned_negate(src0),
-                                 fix_unsigned_negate(src1)));
-      }
-
-      /**
-       * CMPN: Behaves like CMP, but produces true if src1 is NaN.
-       */
-      instruction *
-      CMPN(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
-          brw_conditional_mod condition) const
-      {
-         /* Take the instruction:
-          *
-          * CMPN null<d> src0<f> src1<f>
-          *
-          * Original gfx4 does type conversion to the destination type
-          * before comparison, producing garbage results for floating
-          * point comparisons.
-          *
-          * The destination type doesn't matter on newer generations,
-          * so we set the type to match src0 so we can compact the
-          * instruction.
-          */
-         return set_condmod(condition,
-                            emit(BRW_OPCODE_CMPN, retype(dst, src0.type),
-                                 fix_unsigned_negate(src0),
-                                 fix_unsigned_negate(src1)));
-      }
-
-      /**
-       * Gfx4 predicated IF.
-       */
-      instruction *
-      IF(brw_predicate predicate) const
-      {
-         return set_predicate(predicate, emit(BRW_OPCODE_IF));
-      }
-
-      /**
-       * Gfx6 IF with embedded comparison.
-       */
-      instruction *
-      IF(const src_reg &src0, const src_reg &src1,
-         brw_conditional_mod condition) const
-      {
-         assert(shader->devinfo->ver == 6);
-         return set_condmod(condition,
-                            emit(BRW_OPCODE_IF,
-                                 null_reg_d(),
-                                 fix_unsigned_negate(src0),
-                                 fix_unsigned_negate(src1)));
-      }
-
-      /**
-       * Emit a linear interpolation instruction.
-       */
-      instruction *
-      LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
-          const src_reg &a) const
-      {
-         /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
-          * we need to reorder the operands.
-          */
-         assert(shader->devinfo->ver >= 6 && shader->devinfo->ver <= 9);
-         return emit(BRW_OPCODE_LRP, dst, a, y, x);
-      }
-
-      backend_shader *shader;
-
-   protected:
-      /**
-       * Workaround for negation of UD registers.  See comment in
-       * fs_generator::generate_code() for the details.
-       */
-      src_reg
-      fix_unsigned_negate(const src_reg &src) const
-      {
-         if (src.type == BRW_REGISTER_TYPE_UD && src.negate) {
-            dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
-            MOV(temp, src);
-            return src_reg(temp);
-         } else {
-            return src;
-         }
-      }
-
-      /**
-       * Workaround for register access modes not supported by the ternary
-       * instruction encoding.
-       */
-      src_reg
-      fix_3src_operand(const src_reg &src) const
-      {
-         /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
-          * able to use vertical stride of zero to replicate the vec4 uniform, like
-          *
-          *    g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
-          *
-          * But you can't, since vertical stride is always four in three-source
-          * instructions. Instead, insert a MOV instruction to do the replication so
-          * that the three-source instruction can consume it.
-          */
-
-         /* The MOV is only needed if the source is a uniform or immediate. */
-         if (src.file != UNIFORM && src.file != IMM)
-            return src;
-
-         if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
-            return src;
-
-         const dst_reg expanded = vgrf(src.type);
-         emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
-         return src_reg(expanded);
-      }
-
-      /**
-       * Workaround for register access modes not supported by the math
-       * instruction.
-       */
-      src_reg
-      fix_math_operand(const src_reg &src) const
-      {
-         /* The gfx6 math instruction ignores the source modifiers --
-          * swizzle, abs, negate, and at least some parts of the register
-          * region description.
-          *
-          * Rather than trying to enumerate all these cases, *always* expand the
-          * operand to a temp GRF for gfx6.
-          *
-          * For gfx7, keep the operand as-is, except if immediate, which gfx7 still
-          * can't use.
-          */
-         if (shader->devinfo->ver == 6 ||
-             (shader->devinfo->ver == 7 && src.file == IMM)) {
-            const dst_reg tmp = vgrf(src.type);
-            MOV(tmp, src);
-            return src_reg(tmp);
-         } else {
-            return src;
-         }
-      }
-
-      /**
-       * Workaround other weirdness of the math instruction.
-       */
-      instruction *
-      fix_math_instruction(instruction *inst) const
-      {
-         if (shader->devinfo->ver == 6 &&
-             inst->dst.writemask != WRITEMASK_XYZW) {
-            const dst_reg tmp = vgrf(inst->dst.type);
-            MOV(inst->dst, src_reg(tmp));
-            inst->dst = tmp;
-
-         } else if (shader->devinfo->ver < 6) {
-            const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2);
-            inst->base_mrf = 1;
-            inst->mlen = sources;
-         }
-
-         return inst;
-      }
-
-      bblock_t *block;
-      exec_node *cursor;
-
-      unsigned _dispatch_width;
-      unsigned _group;
-      bool force_writemask_all;
-
-      /** Debug annotation info. */
-      struct {
-         const char *str;
-         const void *ir;
-      } annotation;
-   };
-}
-
-#endif
diff --git a/src/intel/compiler/brw_vec4_cmod_propagation.cpp b/src/intel/compiler/brw_vec4_cmod_propagation.cpp
deleted file mode 100644
index a3d7f7e8558..00000000000
--- a/src/intel/compiler/brw_vec4_cmod_propagation.cpp
+++ /dev/null
@@ -1,365 +0,0 @@
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-/** @file brw_vec4_cmod_propagation.cpp
- *
- * Really similar to brw_fs_cmod_propagation but adapted to vec4 needs. Check
- * brw_fs_cmod_propagation for further details on the rationale behind this
- * optimization.
- */
-
-#include "brw_vec4.h"
-#include "brw_cfg.h"
-#include "brw_eu.h"
-
-namespace brw {
-
-static bool
-writemasks_incompatible(const vec4_instruction *earlier,
-                        const vec4_instruction *later)
-{
-   return (earlier->dst.writemask != WRITEMASK_X &&
-           earlier->dst.writemask != WRITEMASK_XYZW) ||
-          (earlier->dst.writemask == WRITEMASK_XYZW &&
-           later->src[0].swizzle != BRW_SWIZZLE_XYZW) ||
-          (later->dst.writemask & ~earlier->dst.writemask) != 0;
-}
-
-static bool
-opt_cmod_propagation_local(bblock_t *block, vec4_visitor *v)
-{
-   bool progress = false;
-   UNUSED int ip = block->end_ip + 1;
-
-   foreach_inst_in_block_reverse_safe(vec4_instruction, inst, block) {
-      ip--;
-
-      if ((inst->opcode != BRW_OPCODE_AND &&
-           inst->opcode != BRW_OPCODE_CMP &&
-           inst->opcode != BRW_OPCODE_MOV) ||
-          inst->predicate != BRW_PREDICATE_NONE ||
-          !inst->dst.is_null() ||
-          (inst->src[0].file != VGRF && inst->src[0].file != ATTR &&
-           inst->src[0].file != UNIFORM))
-         continue;
-
-      /* An ABS source modifier can only be handled when processing a compare
-       * with a value other than zero.
-       */
-      if (inst->src[0].abs &&
-          (inst->opcode != BRW_OPCODE_CMP || inst->src[1].is_zero()))
-         continue;
-
-      if (inst->opcode == BRW_OPCODE_AND &&
-          !(inst->src[1].is_one() &&
-            inst->conditional_mod == BRW_CONDITIONAL_NZ &&
-            !inst->src[0].negate))
-         continue;
-
-      if (inst->opcode == BRW_OPCODE_MOV &&
-          inst->conditional_mod != BRW_CONDITIONAL_NZ)
-         continue;
-
-      bool read_flag = false;
-      foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst, inst) {
-         /* A CMP with a second source of zero can match with anything.  A CMP
-          * with a second source that is not zero can only match with an ADD
-          * instruction.
-          */
-         if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) {
-            bool negate;
-
-            if (scan_inst->opcode != BRW_OPCODE_ADD)
-               goto not_match;
-
-            if (writemasks_incompatible(scan_inst, inst))
-               goto not_match;
-
-            /* A CMP is basically a subtraction.  The result of the
-             * subtraction must be the same as the result of the addition.
-             * This means that one of the operands must be negated.  So (a +
-             * b) vs (a == -b) or (a + -b) vs (a == b).
-             */
-            if ((inst->src[0].equals(scan_inst->src[0]) &&
-                 inst->src[1].negative_equals(scan_inst->src[1])) ||
-                (inst->src[0].equals(scan_inst->src[1]) &&
-                 inst->src[1].negative_equals(scan_inst->src[0]))) {
-               negate = false;
-            } else if ((inst->src[0].negative_equals(scan_inst->src[0]) &&
-                        inst->src[1].equals(scan_inst->src[1])) ||
-                       (inst->src[0].negative_equals(scan_inst->src[1]) &&
-                        inst->src[1].equals(scan_inst->src[0]))) {
-               negate = true;
-            } else {
-               goto not_match;
-            }
-
-            if (scan_inst->exec_size != inst->exec_size ||
-                scan_inst->group != inst->group)
-               goto not_match;
-
-            /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods":
-             *
-             *    * Note that the [post condition signal] bits generated at
-             *      the output of a compute are before the .sat.
-             *
-             * So we don't have to bail if scan_inst has saturate.
-             */
-
-            /* Otherwise, try propagating the conditional. */
-            const enum brw_conditional_mod cond =
-               negate ? brw_swap_cmod(inst->conditional_mod)
-                      : inst->conditional_mod;
-
-            if (scan_inst->can_do_cmod() &&
-                ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
-                 scan_inst->conditional_mod == cond)) {
-               scan_inst->conditional_mod = cond;
-               inst->remove(block);
-               progress = true;
-            }
-            break;
-         }
-
-         if (regions_overlap(inst->src[0], inst->size_read(0),
-                             scan_inst->dst, scan_inst->size_written)) {
-            if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) ||
-                scan_inst->dst.offset != inst->src[0].offset ||
-                scan_inst->exec_size != inst->exec_size ||
-                scan_inst->group != inst->group) {
-               break;
-            }
-
-            /* If scan_inst is a CMP that produces a single value and inst is
-             * a CMP.NZ that consumes only that value, remove inst.
-             */
-            if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
-                (inst->src[0].type == BRW_REGISTER_TYPE_D ||
-                 inst->src[0].type == BRW_REGISTER_TYPE_UD) &&
-                (inst->opcode == BRW_OPCODE_CMP ||
-                 inst->opcode == BRW_OPCODE_MOV) &&
-                scan_inst->opcode == BRW_OPCODE_CMP &&
-                ((inst->src[0].swizzle == BRW_SWIZZLE_XXXX &&
-                  scan_inst->dst.writemask == WRITEMASK_X) ||
-                 (inst->src[0].swizzle == BRW_SWIZZLE_YYYY &&
-                  scan_inst->dst.writemask == WRITEMASK_Y) ||
-                 (inst->src[0].swizzle == BRW_SWIZZLE_ZZZZ &&
-                  scan_inst->dst.writemask == WRITEMASK_Z) ||
-                 (inst->src[0].swizzle == BRW_SWIZZLE_WWWW &&
-                  scan_inst->dst.writemask == WRITEMASK_W))) {
-               if (inst->dst.writemask != scan_inst->dst.writemask) {
-                  src_reg temp(v, glsl_vec4_type(), 1);
-
-                  /* Given a sequence like:
-                   *
-                   *    cmp.ge.f0(8)  g21<1>.zF      g20<4>.xF      g18<4>.xF
-                   *    ...
-                   *    cmp.nz.f0(8)  null<1>D       g21<4>.zD      0D
-                   *
-                   * Replace it with something like:
-                   *
-                   *    cmp.ge.f0(8)  g22<1>.zF      g20<4>.xF      g18<4>.xF
-                   *    mov(8)        g21<1>.xF      g22<1>.zzzzF
-                   *
-                   * The added MOV will most likely be removed later.  In the
-                   * worst case, it should be cheaper to schedule.
-                   */
-                  temp.swizzle = brw_swizzle_for_mask(inst->dst.writemask);
-                  temp.type = scan_inst->src[0].type;
-
-                  vec4_instruction *mov = v->MOV(scan_inst->dst, temp);
-
-                  /* Modify the source swizzles on scan_inst.  If scan_inst
-                   * was
-                   *
-                   *    cmp.ge.f0(8)  g21<1>.zF      g20<4>.wzyxF   g18<4>.yxwzF
-                   *
-                   * replace it with
-                   *
-                   *    cmp.ge.f0(8)  g21<1>.zF      g20<4>.yyyyF   g18<4>.wwwwF
-                   */
-                  unsigned src0_chan;
-                  unsigned src1_chan;
-                  switch (scan_inst->dst.writemask) {
-                  case WRITEMASK_X:
-                     src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 0);
-                     src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 0);
-                     break;
-                  case WRITEMASK_Y:
-                     src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 1);
-                     src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 1);
-                     break;
-                  case WRITEMASK_Z:
-                     src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 2);
-                     src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 2);
-                     break;
-                  case WRITEMASK_W:
-                     src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 3);
-                     src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 3);
-                     break;
-                  default:
-                     unreachable("Impossible writemask");
-                  }
-
-                  scan_inst->src[0].swizzle = BRW_SWIZZLE4(src0_chan,
-                                                           src0_chan,
-                                                           src0_chan,
-                                                           src0_chan);
-
-                  /* There's no swizzle on immediate value sources. */
-                  if (scan_inst->src[1].file != IMM) {
-                     scan_inst->src[1].swizzle = BRW_SWIZZLE4(src1_chan,
-                                                              src1_chan,
-                                                              src1_chan,
-                                                              src1_chan);
-                  }
-
-                  scan_inst->dst = dst_reg(temp);
-                  scan_inst->dst.writemask = inst->dst.writemask;
-
-                  scan_inst->insert_after(block, mov);
-               }
-
-               inst->remove(block);
-               progress = true;
-               break;
-            }
-
-            if (writemasks_incompatible(scan_inst, inst))
-               break;
-
-            /* CMP's result is the same regardless of dest type. */
-            if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
-                scan_inst->opcode == BRW_OPCODE_CMP &&
-                (inst->dst.type == BRW_REGISTER_TYPE_D ||
-                 inst->dst.type == BRW_REGISTER_TYPE_UD)) {
-               inst->remove(block);
-               progress = true;
-               break;
-            }
-
-            /* If the AND wasn't handled by the previous case, it isn't safe
-             * to remove it.
-             */
-            if (inst->opcode == BRW_OPCODE_AND)
-               break;
-
-            /* Comparisons operate differently for ints and floats */
-            if (scan_inst->dst.type != inst->dst.type &&
-                (scan_inst->dst.type == BRW_REGISTER_TYPE_F ||
-                 inst->dst.type == BRW_REGISTER_TYPE_F))
-               break;
-
-            /* If the instruction generating inst's source also wrote the
-             * flag, and inst is doing a simple .nz comparison, then inst
-             * is redundant - the appropriate value is already in the flag
-             * register.  Delete inst.
-             */
-            if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
-                !inst->src[0].negate &&
-                scan_inst->writes_flag(v->devinfo)) {
-               inst->remove(block);
-               progress = true;
-               break;
-            }
-
-            /* The conditional mod of the CMP/CMPN instructions behaves
-             * specially because the flag output is not calculated from the
-             * result of the instruction, but the other way around, which
-             * means that even if the condmod to propagate and the condmod
-             * from the CMP instruction are the same they will in general give
-             * different results because they are evaluated based on different
-             * inputs.
-             */
-            if (scan_inst->opcode == BRW_OPCODE_CMP ||
-                scan_inst->opcode == BRW_OPCODE_CMPN)
-               break;
-
-            /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods":
-             *
-             *    * Note that the [post condition signal] bits generated at
-             *      the output of a compute are before the .sat.
-             */
-            if (scan_inst->saturate)
-               break;
-
-            /* From the Sky Lake PRM, Vol 2a, "Multiply":
-             *
-             *    "When multiplying integer data types, if one of the sources
-             *    is a DW, the resulting full precision data is stored in
-             *    the accumulator. However, if the destination data type is
-             *    either W or DW, the low bits of the result are written to
-             *    the destination register and the remaining high bits are
-             *    discarded. This results in undefined Overflow and Sign
-             *    flags. Therefore, conditional modifiers and saturation
-             *    (.sat) cannot be used in this case.
-             *
-             * We just disallow cmod propagation on all integer multiplies.
-             */
-            if (!brw_reg_type_is_floating_point(scan_inst->dst.type) &&
-                scan_inst->opcode == BRW_OPCODE_MUL)
-               break;
-
-            /* Otherwise, try propagating the conditional. */
-            enum brw_conditional_mod cond =
-               inst->src[0].negate ? brw_swap_cmod(inst->conditional_mod)
-                                   : inst->conditional_mod;
-
-            if (scan_inst->can_do_cmod() &&
-                ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
-                 scan_inst->conditional_mod == cond)) {
-               scan_inst->conditional_mod = cond;
-               inst->remove(block);
-               progress = true;
-            }
-            break;
-         }
-
-      not_match:
-         if (scan_inst->writes_flag(v->devinfo))
-            break;
-
-         read_flag = read_flag || scan_inst->reads_flag();
-      }
-   }
-
-   return progress;
-}
-
-bool
-vec4_visitor::opt_cmod_propagation()
-{
-   bool progress = false;
-
-   foreach_block_reverse(block, cfg) {
-      progress = opt_cmod_propagation_local(block, this) || progress;
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
-
-   return progress;
-}
-
-} /* namespace brw */
diff --git a/src/intel/compiler/brw_vec4_copy_propagation.cpp b/src/intel/compiler/brw_vec4_copy_propagation.cpp
deleted file mode 100644
index fd535fd88af..00000000000
--- a/src/intel/compiler/brw_vec4_copy_propagation.cpp
+++ /dev/null
@@ -1,556 +0,0 @@
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/**
- * @file brw_vec4_copy_propagation.cpp
- *
- * Implements tracking of values copied between registers, and
- * optimizations based on that: copy propagation and constant
- * propagation.
- */
-
-#include "brw_vec4.h"
-#include "brw_cfg.h"
-#include "brw_eu.h"
-
-namespace brw {
-
-struct copy_entry {
-   src_reg *value[4];
-   int saturatemask;
-};
-
-static bool
-is_direct_copy(vec4_instruction *inst)
-{
-   return (inst->opcode == BRW_OPCODE_MOV &&
-	   !inst->predicate &&
-	   inst->dst.file == VGRF &&
-	   inst->dst.offset % REG_SIZE == 0 &&
-	   !inst->dst.reladdr &&
-	   !inst->src[0].reladdr &&
-	   (inst->dst.type == inst->src[0].type ||
-            (inst->dst.type == BRW_REGISTER_TYPE_F &&
-             inst->src[0].type == BRW_REGISTER_TYPE_VF)));
-}
-
-static bool
-is_dominated_by_previous_instruction(vec4_instruction *inst)
-{
-   return (inst->opcode != BRW_OPCODE_DO &&
-	   inst->opcode != BRW_OPCODE_WHILE &&
-	   inst->opcode != BRW_OPCODE_ELSE &&
-	   inst->opcode != BRW_OPCODE_ENDIF);
-}
-
-static bool
-is_channel_updated(vec4_instruction *inst, src_reg *values[4], int ch)
-{
-   const src_reg *src = values[ch];
-
-   /* consider GRF only */
-   assert(inst->dst.file == VGRF);
-   if (!src || src->file != VGRF)
-      return false;
-
-   return regions_overlap(*src, REG_SIZE, inst->dst, inst->size_written) &&
-          (inst->dst.offset != src->offset ||
-           inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, ch)));
-}
-
-/**
- * Get the origin of a copy as a single register if all components present in
- * the given readmask originate from the same register and have compatible
- * regions, otherwise return a BAD_FILE register.
- */
-static src_reg
-get_copy_value(const copy_entry &entry, unsigned readmask)
-{
-   unsigned swz[4] = {};
-   src_reg value;
-
-   for (unsigned i = 0; i < 4; i++) {
-      if (readmask & (1 << i)) {
-         if (entry.value[i]) {
-            src_reg src = *entry.value[i];
-
-            if (src.file == IMM) {
-               swz[i] = i;
-            } else {
-               swz[i] = BRW_GET_SWZ(src.swizzle, i);
-               /* Overwrite the original swizzle so the src_reg::equals call
-                * below doesn't care about it, the correct swizzle will be
-                * calculated once the swizzles of all components are known.
-                */
-               src.swizzle = BRW_SWIZZLE_XYZW;
-            }
-
-            if (value.file == BAD_FILE) {
-               value = src;
-            } else if (!value.equals(src)) {
-               return src_reg();
-            }
-         } else {
-            return src_reg();
-         }
-      }
-   }
-
-   return swizzle(value,
-                  brw_compose_swizzle(brw_swizzle_for_mask(readmask),
-                                      BRW_SWIZZLE4(swz[0], swz[1],
-                                                   swz[2], swz[3])));
-}
-
-static bool
-try_constant_propagate(vec4_instruction *inst,
-                       int arg, const copy_entry *entry)
-{
-   /* For constant propagation, we only handle the same constant
-    * across all 4 channels.  Some day, we should handle the 8-bit
-    * float vector format, which would let us constant propagate
-    * vectors better.
-    * We could be more aggressive here -- some channels might not get used
-    * based on the destination writemask.
-    */
-   src_reg value =
-      get_copy_value(*entry,
-                     brw_apply_inv_swizzle_to_mask(inst->src[arg].swizzle,
-                                                   WRITEMASK_XYZW));
-
-   if (value.file != IMM)
-      return false;
-
-   /* 64-bit types can't be used except for one-source instructions, which
-    * higher levels should have constant folded away, so there's no point in
-    * propagating immediates here.
-    */
-   if (type_sz(value.type) == 8 || type_sz(inst->src[arg].type) == 8)
-      return false;
-
-   if (value.type == BRW_REGISTER_TYPE_VF) {
-      /* The result of bit-casting the component values of a vector float
-       * cannot in general be represented as an immediate.
-       */
-      if (inst->src[arg].type != BRW_REGISTER_TYPE_F)
-         return false;
-   } else {
-      value.type = inst->src[arg].type;
-   }
-
-   if (inst->src[arg].abs) {
-      if (!brw_abs_immediate(value.type, &value.as_brw_reg()))
-         return false;
-   }
-
-   if (inst->src[arg].negate) {
-      if (!brw_negate_immediate(value.type, &value.as_brw_reg()))
-         return false;
-   }
-
-   value = swizzle(value, inst->src[arg].swizzle);
-
-   switch (inst->opcode) {
-   case BRW_OPCODE_MOV:
-   case SHADER_OPCODE_BROADCAST:
-      inst->src[arg] = value;
-      return true;
-
-   case VEC4_OPCODE_UNTYPED_ATOMIC:
-      if (arg == 1) {
-         inst->src[arg] = value;
-         return true;
-      }
-      break;
-
-   case SHADER_OPCODE_POW:
-   case SHADER_OPCODE_INT_QUOTIENT:
-   case SHADER_OPCODE_INT_REMAINDER:
-         break;
-   case BRW_OPCODE_DP2:
-   case BRW_OPCODE_DP3:
-   case BRW_OPCODE_DP4:
-   case BRW_OPCODE_DPH:
-   case BRW_OPCODE_BFI1:
-   case BRW_OPCODE_ASR:
-   case BRW_OPCODE_SHL:
-   case BRW_OPCODE_SHR:
-   case BRW_OPCODE_SUBB:
-      if (arg == 1) {
-         inst->src[arg] = value;
-         return true;
-      }
-      break;
-
-   case BRW_OPCODE_MACH:
-   case BRW_OPCODE_MUL:
-   case SHADER_OPCODE_MULH:
-   case BRW_OPCODE_ADD:
-   case BRW_OPCODE_OR:
-   case BRW_OPCODE_AND:
-   case BRW_OPCODE_XOR:
-   case BRW_OPCODE_ADDC:
-      if (arg == 1) {
-	 inst->src[arg] = value;
-	 return true;
-      } else if (arg == 0 && inst->src[1].file != IMM) {
-	 /* Fit this constant in by commuting the operands.  Exception: we
-	  * can't do this for 32-bit integer MUL/MACH because it's asymmetric.
-	  */
-	 if ((inst->opcode == BRW_OPCODE_MUL ||
-              inst->opcode == BRW_OPCODE_MACH) &&
-	     (inst->src[1].type == BRW_REGISTER_TYPE_D ||
-	      inst->src[1].type == BRW_REGISTER_TYPE_UD))
-	    break;
-	 inst->src[0] = inst->src[1];
-	 inst->src[1] = value;
-	 return true;
-      }
-      break;
-   case GS_OPCODE_SET_WRITE_OFFSET:
-      /* This is just a multiply by a constant with special strides.
-       * The generator will handle immediates in both arguments (generating
-       * a single MOV of the product).  So feel free to propagate in src0.
-       */
-      inst->src[arg] = value;
-      return true;
-
-   case BRW_OPCODE_CMP:
-      if (arg == 1) {
-	 inst->src[arg] = value;
-	 return true;
-      } else if (arg == 0 && inst->src[1].file != IMM) {
-	 enum brw_conditional_mod new_cmod;
-
-	 new_cmod = brw_swap_cmod(inst->conditional_mod);
-	 if (new_cmod != BRW_CONDITIONAL_NONE) {
-	    /* Fit this constant in by swapping the operands and
-	     * flipping the test.
-	     */
-	    inst->src[0] = inst->src[1];
-	    inst->src[1] = value;
-	    inst->conditional_mod = new_cmod;
-	    return true;
-	 }
-      }
-      break;
-
-   case BRW_OPCODE_SEL:
-      if (arg == 1) {
-	 inst->src[arg] = value;
-	 return true;
-      } else if (arg == 0 && inst->src[1].file != IMM) {
-	 inst->src[0] = inst->src[1];
-	 inst->src[1] = value;
-
-	 /* If this was predicated, flipping operands means
-	  * we also need to flip the predicate.
-	  */
-	 if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
-	    inst->predicate_inverse = !inst->predicate_inverse;
-	 }
-	 return true;
-      }
-      break;
-
-   default:
-      break;
-   }
-
-   return false;
-}
-
-static bool
-is_align1_opcode(unsigned opcode)
-{
-   switch (opcode) {
-   case VEC4_OPCODE_DOUBLE_TO_F32:
-   case VEC4_OPCODE_DOUBLE_TO_D32:
-   case VEC4_OPCODE_DOUBLE_TO_U32:
-   case VEC4_OPCODE_TO_DOUBLE:
-   case VEC4_OPCODE_PICK_LOW_32BIT:
-   case VEC4_OPCODE_PICK_HIGH_32BIT:
-   case VEC4_OPCODE_SET_LOW_32BIT:
-   case VEC4_OPCODE_SET_HIGH_32BIT:
-      return true;
-   default:
-      return false;
-   }
-}
-
-static bool
-try_copy_propagate(const struct brw_compiler *compiler,
-                   vec4_instruction *inst, int arg,
-                   const copy_entry *entry, int attributes_per_reg)
-{
-   const struct intel_device_info *devinfo = compiler->devinfo;
-
-   /* Build up the value we are propagating as if it were the source of a
-    * single MOV
-    */
-   src_reg value =
-      get_copy_value(*entry,
-                     brw_apply_inv_swizzle_to_mask(inst->src[arg].swizzle,
-                                                   WRITEMASK_XYZW));
-
-   /* Check that we can propagate that value */
-   if (value.file != UNIFORM &&
-       value.file != VGRF &&
-       value.file != ATTR)
-      return false;
-
-   /* Instructions that write 2 registers also need to read 2 registers. Make
-    * sure we don't break that restriction by copy propagating from a uniform.
-    */
-   if (inst->size_written > REG_SIZE && is_uniform(value))
-      return false;
-
-   /* There is a regioning restriction such that if execsize == width
-    * and hstride != 0 then the vstride can't be 0. When we split instrutions
-    * that take a single-precision source (like F->DF conversions) we end up
-    * with a 4-wide source on an instruction with an execution size of 4.
-    * If we then copy-propagate the source from a uniform we also end up with a
-    * vstride of 0 and we violate the restriction.
-    */
-   if (inst->exec_size == 4 && value.file == UNIFORM &&
-       type_sz(value.type) == 4)
-      return false;
-
-   /* If the type of the copy value is different from the type of the
-    * instruction then the swizzles and writemasks involved don't have the same
-    * meaning and simply replacing the source would produce different semantics.
-    */
-   if (type_sz(value.type) != type_sz(inst->src[arg].type))
-      return false;
-
-   if (inst->src[arg].offset % REG_SIZE || value.offset % REG_SIZE)
-      return false;
-
-   bool has_source_modifiers = value.negate || value.abs;
-
-   /* gfx6 math and gfx7+ SENDs from GRFs ignore source modifiers on
-    * instructions.
-    */
-   if (has_source_modifiers && !inst->can_do_source_mods(devinfo))
-      return false;
-
-   /* Reject cases that would violate register regioning restrictions. */
-   if ((value.file == UNIFORM || value.swizzle != BRW_SWIZZLE_XYZW) &&
-       ((devinfo->ver == 6 && inst->is_math()) ||
-        inst->is_send_from_grf() ||
-        inst->uses_indirect_addressing())) {
-      return false;
-   }
-
-   if (has_source_modifiers &&
-       value.type != inst->src[arg].type &&
-       !inst->can_change_types())
-      return false;
-
-   if (has_source_modifiers &&
-       (inst->opcode == SHADER_OPCODE_GFX4_SCRATCH_WRITE ||
-        inst->opcode == VEC4_OPCODE_PICK_HIGH_32BIT))
-      return false;
-
-   unsigned composed_swizzle = brw_compose_swizzle(inst->src[arg].swizzle,
-                                                   value.swizzle);
-
-   /* Instructions that operate on vectors in ALIGN1 mode will ignore swizzles
-    * so copy-propagation won't be safe if the composed swizzle is anything
-    * other than the identity.
-    */
-   if (is_align1_opcode(inst->opcode) && composed_swizzle != BRW_SWIZZLE_XYZW)
-      return false;
-
-   if (inst->is_3src(compiler) &&
-       (value.file == UNIFORM ||
-        (value.file == ATTR && attributes_per_reg != 1)) &&
-       !brw_is_single_value_swizzle(composed_swizzle))
-      return false;
-
-   if (inst->is_send_from_grf())
-      return false;
-
-   /* we can't generally copy-propagate UD negations because we
-    * end up accessing the resulting values as signed integers
-    * instead. See also resolve_ud_negate().
-    */
-   if (value.negate &&
-       value.type == BRW_REGISTER_TYPE_UD)
-      return false;
-
-   /* Don't report progress if this is a noop. */
-   if (value.equals(inst->src[arg]))
-      return false;
-
-   const unsigned dst_saturate_mask = inst->dst.writemask &
-      brw_apply_swizzle_to_mask(inst->src[arg].swizzle, entry->saturatemask);
-
-   if (dst_saturate_mask) {
-      /* We either saturate all or nothing. */
-      if (dst_saturate_mask != inst->dst.writemask)
-         return false;
-
-      /* Limit saturate propagation only to SEL with src1 bounded within 0.0
-       * and 1.0, otherwise skip copy propagate altogether.
-       */
-      switch(inst->opcode) {
-      case BRW_OPCODE_SEL:
-         if (arg != 0 ||
-             inst->src[0].type != BRW_REGISTER_TYPE_F ||
-             inst->src[1].file != IMM ||
-             inst->src[1].type != BRW_REGISTER_TYPE_F ||
-             inst->src[1].f < 0.0 ||
-             inst->src[1].f > 1.0) {
-            return false;
-         }
-         if (!inst->saturate)
-            inst->saturate = true;
-         break;
-      default:
-         return false;
-      }
-   }
-
-   /* Build the final value */
-   if (inst->src[arg].abs) {
-      value.negate = false;
-      value.abs = true;
-   }
-   if (inst->src[arg].negate)
-      value.negate = !value.negate;
-
-   value.swizzle = composed_swizzle;
-   if (has_source_modifiers &&
-       value.type != inst->src[arg].type) {
-      assert(inst->can_change_types());
-      for (int i = 0; i < 3; i++) {
-         inst->src[i].type = value.type;
-      }
-      inst->dst.type = value.type;
-   } else {
-      value.type = inst->src[arg].type;
-   }
-
-   inst->src[arg] = value;
-   return true;
-}
-
-bool
-vec4_visitor::opt_copy_propagation(bool do_constant_prop)
-{
-   /* If we are in dual instanced or single mode, then attributes are going
-    * to be interleaved, so one register contains two attribute slots.
-    */
-   const int attributes_per_reg =
-      prog_data->dispatch_mode == INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
-   bool progress = false;
-   struct copy_entry entries[alloc.total_size];
-
-   memset(&entries, 0, sizeof(entries));
-
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      /* This pass only works on basic blocks.  If there's flow
-       * control, throw out all our information and start from
-       * scratch.
-       *
-       * This should really be fixed by using a structure like in
-       * src/glsl/opt_copy_propagation.cpp to track available copies.
-       */
-      if (!is_dominated_by_previous_instruction(inst)) {
-	 memset(&entries, 0, sizeof(entries));
-	 continue;
-      }
-
-      /* For each source arg, see if each component comes from a copy
-       * from the same type file (IMM, VGRF, UNIFORM), and try
-       * optimizing out access to the copy result
-       */
-      for (int i = 2; i >= 0; i--) {
-	 /* Copied values end up in GRFs, and we don't track reladdr
-	  * accesses.
-	  */
-	 if (inst->src[i].file != VGRF ||
-	     inst->src[i].reladdr)
-	    continue;
-
-         /* We only handle register-aligned single GRF copies. */
-         if (inst->size_read(i) != REG_SIZE ||
-             inst->src[i].offset % REG_SIZE)
-            continue;
-
-         const unsigned reg = (alloc.offsets[inst->src[i].nr] +
-                               inst->src[i].offset / REG_SIZE);
-         const copy_entry &entry = entries[reg];
-
-         if (do_constant_prop && try_constant_propagate(inst, i, &entry))
-            progress = true;
-         else if (try_copy_propagate(compiler, inst, i, &entry, attributes_per_reg))
-	    progress = true;
-      }
-
-      /* Track available source registers. */
-      if (inst->dst.file == VGRF) {
-	 const int reg =
-            alloc.offsets[inst->dst.nr] + inst->dst.offset / REG_SIZE;
-
-	 /* Update our destination's current channel values.  For a direct copy,
-	  * the value is the newly propagated source.  Otherwise, we don't know
-	  * the new value, so clear it.
-	  */
-	 bool direct_copy = is_direct_copy(inst);
-         entries[reg].saturatemask &= ~inst->dst.writemask;
-	 for (int i = 0; i < 4; i++) {
-	    if (inst->dst.writemask & (1 << i)) {
-               entries[reg].value[i] = direct_copy ? &inst->src[0] : NULL;
-               entries[reg].saturatemask |=
-                  inst->saturate && direct_copy ? 1 << i : 0;
-            }
-	 }
-
-	 /* Clear the records for any registers whose current value came from
-	  * our destination's updated channels, as the two are no longer equal.
-	  */
-	 if (inst->dst.reladdr)
-	    memset(&entries, 0, sizeof(entries));
-	 else {
-	    for (unsigned i = 0; i < alloc.total_size; i++) {
-	       for (int j = 0; j < 4; j++) {
-		  if (is_channel_updated(inst, entries[i].value, j)) {
-		     entries[i].value[j] = NULL;
-		     entries[i].saturatemask &= ~(1 << j);
-                  }
-	       }
-	    }
-	 }
-      }
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |
-                          DEPENDENCY_INSTRUCTION_DETAIL);
-
-   return progress;
-}
-
-} /* namespace brw */
diff --git a/src/intel/compiler/brw_vec4_cse.cpp b/src/intel/compiler/brw_vec4_cse.cpp
deleted file mode 100644
index c4c9ea68e15..00000000000
--- a/src/intel/compiler/brw_vec4_cse.cpp
+++ /dev/null
@@ -1,322 +0,0 @@
-/*
- * Copyright © 2012, 2013, 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_vec4.h"
-#include "brw_vec4_live_variables.h"
-#include "brw_cfg.h"
-
-using namespace brw;
-
-/** @file brw_vec4_cse.cpp
- *
- * Support for local common subexpression elimination.
- *
- * See Muchnick's Advanced Compiler Design and Implementation, section
- * 13.1 (p378).
- */
-
-namespace {
-struct aeb_entry : public exec_node {
-   /** The instruction that generates the expression value. */
-   vec4_instruction *generator;
-
-   /** The temporary where the value is stored. */
-   src_reg tmp;
-};
-}
-
-static bool
-is_expression(const vec4_instruction *const inst)
-{
-   switch (inst->opcode) {
-   case BRW_OPCODE_MOV:
-   case BRW_OPCODE_SEL:
-   case BRW_OPCODE_NOT:
-   case BRW_OPCODE_AND:
-   case BRW_OPCODE_OR:
-   case BRW_OPCODE_XOR:
-   case BRW_OPCODE_SHR:
-   case BRW_OPCODE_SHL:
-   case BRW_OPCODE_ASR:
-   case BRW_OPCODE_CMP:
-   case BRW_OPCODE_CMPN:
-   case BRW_OPCODE_ADD:
-   case BRW_OPCODE_MUL:
-   case SHADER_OPCODE_MULH:
-   case BRW_OPCODE_FRC:
-   case BRW_OPCODE_RNDU:
-   case BRW_OPCODE_RNDD:
-   case BRW_OPCODE_RNDE:
-   case BRW_OPCODE_RNDZ:
-   case BRW_OPCODE_LINE:
-   case BRW_OPCODE_PLN:
-   case BRW_OPCODE_MAD:
-   case BRW_OPCODE_LRP:
-   case VEC4_OPCODE_UNPACK_UNIFORM:
-   case SHADER_OPCODE_FIND_LIVE_CHANNEL:
-   case SHADER_OPCODE_BROADCAST:
-   case VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS:
-   case VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
-      return true;
-   case SHADER_OPCODE_RCP:
-   case SHADER_OPCODE_RSQ:
-   case SHADER_OPCODE_SQRT:
-   case SHADER_OPCODE_EXP2:
-   case SHADER_OPCODE_LOG2:
-   case SHADER_OPCODE_POW:
-   case SHADER_OPCODE_INT_QUOTIENT:
-   case SHADER_OPCODE_INT_REMAINDER:
-   case SHADER_OPCODE_SIN:
-   case SHADER_OPCODE_COS:
-      return inst->mlen == 0;
-   default:
-      return false;
-   }
-}
-
-static bool
-operands_match(const vec4_instruction *a, const vec4_instruction *b)
-{
-   const src_reg *xs = a->src;
-   const src_reg *ys = b->src;
-
-   if (a->opcode == BRW_OPCODE_MAD) {
-      return xs[0].equals(ys[0]) &&
-             ((xs[1].equals(ys[1]) && xs[2].equals(ys[2])) ||
-              (xs[2].equals(ys[1]) && xs[1].equals(ys[2])));
-   } else if (a->opcode == BRW_OPCODE_MOV &&
-              xs[0].file == IMM &&
-              xs[0].type == BRW_REGISTER_TYPE_VF) {
-      src_reg tmp_x = xs[0];
-      src_reg tmp_y = ys[0];
-
-      /* Smash out the values that are not part of the writemask.  Otherwise
-       * the equals operator will fail due to mismatches in unused components.
-       */
-      const unsigned ab_writemask = a->dst.writemask & b->dst.writemask;
-      const uint32_t mask = ((ab_writemask & WRITEMASK_X) ? 0x000000ff : 0) |
-                            ((ab_writemask & WRITEMASK_Y) ? 0x0000ff00 : 0) |
-                            ((ab_writemask & WRITEMASK_Z) ? 0x00ff0000 : 0) |
-                            ((ab_writemask & WRITEMASK_W) ? 0xff000000 : 0);
-
-      tmp_x.ud &= mask;
-      tmp_y.ud &= mask;
-
-      return tmp_x.equals(tmp_y);
-   } else if (!a->is_commutative()) {
-      return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]);
-   } else {
-      return (xs[0].equals(ys[0]) && xs[1].equals(ys[1])) ||
-             (xs[1].equals(ys[0]) && xs[0].equals(ys[1]));
-   }
-}
-
-/**
- * Checks if instructions match, exactly for sources, but loosely for
- * destination writemasks.
- *
- * \param 'a' is the generating expression from the AEB entry.
- * \param 'b' is the second occurrence of the expression that we're
- *        considering eliminating.
- */
-static bool
-instructions_match(vec4_instruction *a, vec4_instruction *b)
-{
-   return a->opcode == b->opcode &&
-          a->saturate == b->saturate &&
-          a->predicate == b->predicate &&
-          a->predicate_inverse == b->predicate_inverse &&
-          a->conditional_mod == b->conditional_mod &&
-          a->flag_subreg == b->flag_subreg &&
-          a->dst.type == b->dst.type &&
-          a->offset == b->offset &&
-          a->mlen == b->mlen &&
-          a->base_mrf == b->base_mrf &&
-          a->header_size == b->header_size &&
-          a->shadow_compare == b->shadow_compare &&
-          ((a->dst.writemask & b->dst.writemask) == a->dst.writemask) &&
-          a->force_writemask_all == b->force_writemask_all &&
-          a->size_written == b->size_written &&
-          a->exec_size == b->exec_size &&
-          a->group == b->group &&
-          operands_match(a, b);
-}
-
-bool
-vec4_visitor::opt_cse_local(bblock_t *block, const vec4_live_variables &live)
-{
-   bool progress = false;
-   exec_list aeb;
-
-   void *cse_ctx = ralloc_context(NULL);
-
-   int ip = block->start_ip;
-   foreach_inst_in_block (vec4_instruction, inst, block) {
-      /* Skip some cases. */
-      if (is_expression(inst) && !inst->predicate && inst->mlen == 0 &&
-          ((inst->dst.file != ARF && inst->dst.file != FIXED_GRF) ||
-           inst->dst.is_null()))
-      {
-         bool found = false;
-
-         foreach_in_list_use_after(aeb_entry, entry, &aeb) {
-            /* Match current instruction's expression against those in AEB. */
-            if (!(entry->generator->dst.is_null() && !inst->dst.is_null()) &&
-                instructions_match(inst, entry->generator)) {
-               found = true;
-               progress = true;
-               break;
-            }
-         }
-
-         if (!found) {
-            if (inst->opcode != BRW_OPCODE_MOV ||
-                (inst->opcode == BRW_OPCODE_MOV &&
-                 inst->src[0].file == IMM &&
-                 inst->src[0].type == BRW_REGISTER_TYPE_VF)) {
-               /* Our first sighting of this expression.  Create an entry. */
-               aeb_entry *entry = ralloc(cse_ctx, aeb_entry);
-               entry->tmp = src_reg(); /* file will be BAD_FILE */
-               entry->generator = inst;
-               aeb.push_tail(entry);
-            }
-         } else {
-            /* This is at least our second sighting of this expression.
-             * If we don't have a temporary already, make one.
-             */
-            bool no_existing_temp = entry->tmp.file == BAD_FILE;
-            if (no_existing_temp && !entry->generator->dst.is_null()) {
-               entry->tmp = retype(src_reg(VGRF, alloc.allocate(
-                                              regs_written(entry->generator)),
-                                           NULL), inst->dst.type);
-
-               const unsigned width = entry->generator->exec_size;
-               unsigned component_size = width * type_sz(entry->tmp.type);
-               unsigned num_copy_movs =
-                  DIV_ROUND_UP(entry->generator->size_written, component_size);
-               for (unsigned i = 0; i < num_copy_movs; ++i) {
-                  vec4_instruction *copy =
-                     MOV(offset(entry->generator->dst, width, i),
-                         offset(entry->tmp, width, i));
-                  copy->exec_size = width;
-                  copy->group = entry->generator->group;
-                  copy->force_writemask_all =
-                     entry->generator->force_writemask_all;
-                  entry->generator->insert_after(block, copy);
-               }
-
-               entry->generator->dst = dst_reg(entry->tmp);
-            }
-
-            /* dest <- temp */
-            if (!inst->dst.is_null()) {
-               assert(inst->dst.type == entry->tmp.type);
-               const unsigned width = inst->exec_size;
-               unsigned component_size = width * type_sz(inst->dst.type);
-               unsigned num_copy_movs =
-                  DIV_ROUND_UP(inst->size_written, component_size);
-               for (unsigned i = 0; i < num_copy_movs; ++i) {
-                  vec4_instruction *copy =
-                     MOV(offset(inst->dst, width, i),
-                         offset(entry->tmp, width, i));
-                  copy->exec_size = inst->exec_size;
-                  copy->group = inst->group;
-                  copy->force_writemask_all = inst->force_writemask_all;
-                  inst->insert_before(block, copy);
-               }
-            }
-
-            /* Set our iterator so that next time through the loop inst->next
-             * will get the instruction in the basic block after the one we've
-             * removed.
-             */
-            vec4_instruction *prev = (vec4_instruction *)inst->prev;
-
-            inst->remove(block);
-            inst = prev;
-         }
-      }
-
-      foreach_in_list_safe(aeb_entry, entry, &aeb) {
-         /* Kill all AEB entries that write a different value to or read from
-          * the flag register if we just wrote it.
-          */
-         if (inst->writes_flag(devinfo)) {
-            if (entry->generator->reads_flag() ||
-                (entry->generator->writes_flag(devinfo) &&
-                 !instructions_match(inst, entry->generator))) {
-               entry->remove();
-               ralloc_free(entry);
-               continue;
-            }
-         }
-
-         for (int i = 0; i < 3; i++) {
-            src_reg *src = &entry->generator->src[i];
-
-            /* Kill all AEB entries that use the destination we just
-             * overwrote.
-             */
-            if (inst->dst.file == entry->generator->src[i].file &&
-                inst->dst.nr == entry->generator->src[i].nr) {
-               entry->remove();
-               ralloc_free(entry);
-               break;
-            }
-
-            /* Kill any AEB entries using registers that don't get reused any
-             * more -- a sure sign they'll fail operands_match().
-             */
-            if (src->file == VGRF) {
-               if (live.var_range_end(var_from_reg(alloc, dst_reg(*src)), 8) < ip) {
-                  entry->remove();
-                  ralloc_free(entry);
-                  break;
-               }
-            }
-         }
-      }
-
-      ip++;
-   }
-
-   ralloc_free(cse_ctx);
-
-   return progress;
-}
-
-bool
-vec4_visitor::opt_cse()
-{
-   bool progress = false;
-   const vec4_live_variables &live = live_analysis.require();
-
-   foreach_block (block, cfg) {
-      progress = opt_cse_local(block, live) || progress;
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
-
-   return progress;
-}
diff --git a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp b/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp
deleted file mode 100644
index 10a64a56143..00000000000
--- a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_vec4.h"
-#include "brw_vec4_live_variables.h"
-#include "brw_cfg.h"
-
-/** @file brw_vec4_dead_code_eliminate.cpp
- *
- * Dataflow-aware dead code elimination.
- *
- * Walks the instruction list from the bottom, removing instructions that
- * have results that both aren't used in later blocks and haven't been read
- * yet in the tail end of this block.
- */
-
-using namespace brw;
-
-bool
-vec4_visitor::dead_code_eliminate()
-{
-   bool progress = false;
-
-   const vec4_live_variables &live_vars = live_analysis.require();
-   int num_vars = live_vars.num_vars;
-   BITSET_WORD *live = rzalloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars));
-   BITSET_WORD *flag_live = rzalloc_array(NULL, BITSET_WORD, 1);
-
-   foreach_block_reverse_safe(block, cfg) {
-      memcpy(live, live_vars.block_data[block->num].liveout,
-             sizeof(BITSET_WORD) * BITSET_WORDS(num_vars));
-      memcpy(flag_live, live_vars.block_data[block->num].flag_liveout,
-             sizeof(BITSET_WORD));
-
-      foreach_inst_in_block_reverse_safe(vec4_instruction, inst, block) {
-         if ((inst->dst.file == VGRF && !inst->has_side_effects()) ||
-             (inst->dst.is_null() && inst->writes_flag(devinfo))){
-            bool result_live[4] = { false };
-            if (inst->dst.file == VGRF) {
-               for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
-                  for (int c = 0; c < 4; c++) {
-                     const unsigned v = var_from_reg(alloc, inst->dst, c, i);
-                     result_live[c] |= BITSET_TEST(live, v);
-                  }
-               }
-            } else {
-               for (unsigned c = 0; c < 4; c++)
-                  result_live[c] = BITSET_TEST(flag_live, c);
-            }
-
-            /* If the instruction can't do writemasking, then it's all or
-             * nothing.
-             */
-            if (!inst->can_do_writemask(devinfo)) {
-               bool result = result_live[0] | result_live[1] |
-                             result_live[2] | result_live[3];
-               result_live[0] = result;
-               result_live[1] = result;
-               result_live[2] = result;
-               result_live[3] = result;
-            }
-
-            if (inst->writes_flag(devinfo)) {
-               /* Independently calculate the usage of the flag components and
-                * the destination value components.
-                */
-               uint8_t flag_mask = inst->dst.writemask;
-               uint8_t dest_mask = inst->dst.writemask;
-
-               for (int c = 0; c < 4; c++) {
-                  if (!result_live[c] && dest_mask & (1 << c))
-                     dest_mask &= ~(1 << c);
-
-                  if (!BITSET_TEST(flag_live, c))
-                     flag_mask &= ~(1 << c);
-               }
-
-               if (inst->dst.writemask != (flag_mask | dest_mask)) {
-                  progress = true;
-                  inst->dst.writemask = flag_mask | dest_mask;
-               }
-
-               /* If none of the destination components are read, replace the
-                * destination register with the NULL register.
-                */
-               if (dest_mask == 0) {
-                  progress = true;
-                  inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
-               }
-            } else {
-               for (int c = 0; c < 4; c++) {
-                  if (!result_live[c] && inst->dst.writemask & (1 << c)) {
-                     inst->dst.writemask &= ~(1 << c);
-                     progress = true;
-
-                     if (inst->dst.writemask == 0) {
-                        if (inst->writes_accumulator) {
-                           inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
-                        } else {
-                           inst->opcode = BRW_OPCODE_NOP;
-                           break;
-                        }
-                     }
-                  }
-               }
-            }
-         }
-
-         if (inst->dst.is_null() && inst->writes_flag(devinfo)) {
-            bool combined_live = false;
-            for (unsigned c = 0; c < 4; c++)
-               combined_live |= BITSET_TEST(flag_live, c);
-
-            if (!combined_live) {
-               inst->opcode = BRW_OPCODE_NOP;
-               progress = true;
-            }
-         }
-
-         if (inst->dst.file == VGRF && !inst->predicate &&
-             !inst->is_align1_partial_write()) {
-            for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
-               for (int c = 0; c < 4; c++) {
-                  if (inst->dst.writemask & (1 << c)) {
-                     const unsigned v = var_from_reg(alloc, inst->dst, c, i);
-                     BITSET_CLEAR(live, v);
-                  }
-               }
-            }
-         }
-
-         if (inst->writes_flag(devinfo) && !inst->predicate && inst->exec_size == 8) {
-            for (unsigned c = 0; c < 4; c++)
-               BITSET_CLEAR(flag_live, c);
-         }
-
-         if (inst->opcode == BRW_OPCODE_NOP) {
-            inst->remove(block);
-            continue;
-         }
-
-         for (int i = 0; i < 3; i++) {
-            if (inst->src[i].file == VGRF) {
-               for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) {
-                  for (int c = 0; c < 4; c++) {
-                     const unsigned v = var_from_reg(alloc, inst->src[i], c, j);
-                     BITSET_SET(live, v);
-                  }
-               }
-            }
-         }
-
-         for (unsigned c = 0; c < 4; c++) {
-            if (inst->reads_flag(c)) {
-               BITSET_SET(flag_live, c);
-            }
-         }
-      }
-   }
-
-   ralloc_free(live);
-   ralloc_free(flag_live);
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
-
-   return progress;
-}
diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp
deleted file mode 100644
index df414189f4b..00000000000
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ /dev/null
@@ -1,2319 +0,0 @@
-/* Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_vec4.h"
-#include "brw_cfg.h"
-#include "brw_eu.h"
-#include "brw_disasm_info.h"
-#include "dev/intel_debug.h"
-#include "util/mesa-sha1.h"
-
-using namespace brw;
-
-static void
-generate_math1_gfx4(struct brw_codegen *p,
-                    vec4_instruction *inst,
-                    struct brw_reg dst,
-                    struct brw_reg src)
-{
-   gfx4_math(p,
-	     dst,
-	     brw_math_function(inst->opcode),
-	     inst->base_mrf,
-	     src,
-	     BRW_MATH_PRECISION_FULL);
-}
-
-static void
-check_gfx6_math_src_arg(struct brw_reg src)
-{
-   /* Source swizzles are ignored. */
-   assert(!src.abs);
-   assert(!src.negate);
-   assert(src.swizzle == BRW_SWIZZLE_XYZW);
-}
-
-static void
-generate_math_gfx6(struct brw_codegen *p,
-                   vec4_instruction *inst,
-                   struct brw_reg dst,
-                   struct brw_reg src0,
-                   struct brw_reg src1)
-{
-   /* Can't do writemask because math can't be align16. */
-   assert(dst.writemask == WRITEMASK_XYZW);
-   /* Source swizzles are ignored. */
-   check_gfx6_math_src_arg(src0);
-   if (src1.file == BRW_GENERAL_REGISTER_FILE)
-      check_gfx6_math_src_arg(src1);
-
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   gfx6_math(p, dst, brw_math_function(inst->opcode), src0, src1);
-   brw_set_default_access_mode(p, BRW_ALIGN_16);
-}
-
-static void
-generate_math2_gfx4(struct brw_codegen *p,
-                    vec4_instruction *inst,
-                    struct brw_reg dst,
-                    struct brw_reg src0,
-                    struct brw_reg src1)
-{
-   /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
-    * "Message Payload":
-    *
-    * "Operand0[7].  For the INT DIV functions, this operand is the
-    *  denominator."
-    *  ...
-    * "Operand1[7].  For the INT DIV functions, this operand is the
-    *  numerator."
-    */
-   bool is_int_div = inst->opcode != SHADER_OPCODE_POW;
-   struct brw_reg &op0 = is_int_div ? src1 : src0;
-   struct brw_reg &op1 = is_int_div ? src0 : src1;
-
-   brw_push_insn_state(p);
-   brw_set_default_saturate(p, false);
-   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
-   brw_set_default_flag_reg(p, 0, 0);
-   brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1);
-   brw_pop_insn_state(p);
-
-   gfx4_math(p,
-	     dst,
-	     brw_math_function(inst->opcode),
-	     inst->base_mrf,
-	     op0,
-	     BRW_MATH_PRECISION_FULL);
-}
-
-static void
-generate_tex(struct brw_codegen *p,
-             struct brw_vue_prog_data *prog_data,
-             gl_shader_stage stage,
-             vec4_instruction *inst,
-             struct brw_reg dst,
-             struct brw_reg src,
-             struct brw_reg surface_index,
-             struct brw_reg sampler_index)
-{
-   const struct intel_device_info *devinfo = p->devinfo;
-   int msg_type = -1;
-
-   if (devinfo->ver >= 5) {
-      switch (inst->opcode) {
-      case SHADER_OPCODE_TEX:
-      case SHADER_OPCODE_TXL:
-	 if (inst->shadow_compare) {
-	    msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
-	 } else {
-	    msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_LOD;
-	 }
-	 break;
-      case SHADER_OPCODE_TXD:
-         if (inst->shadow_compare) {
-            /* Gfx7.5+.  Otherwise, lowered by brw_lower_texture_gradients(). */
-            assert(devinfo->verx10 == 75);
-            msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
-         } else {
-            msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
-         }
-	 break;
-      case SHADER_OPCODE_TXF:
-	 msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_LD;
-	 break;
-      case SHADER_OPCODE_TXF_CMS:
-         if (devinfo->ver >= 7)
-            msg_type = GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
-         else
-            msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_LD;
-         break;
-      case SHADER_OPCODE_TXF_MCS:
-         assert(devinfo->ver >= 7);
-         msg_type = GFX7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
-         break;
-      case SHADER_OPCODE_TXS:
-	 msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
-	 break;
-      case SHADER_OPCODE_TG4:
-         if (inst->shadow_compare) {
-            msg_type = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
-         } else {
-            msg_type = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
-         }
-         break;
-      case SHADER_OPCODE_TG4_OFFSET:
-         if (inst->shadow_compare) {
-            msg_type = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
-         } else {
-            msg_type = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
-         }
-         break;
-      case SHADER_OPCODE_SAMPLEINFO:
-         msg_type = GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
-         break;
-      default:
-	 unreachable("should not get here: invalid vec4 texture opcode");
-      }
-   } else {
-      switch (inst->opcode) {
-      case SHADER_OPCODE_TEX:
-      case SHADER_OPCODE_TXL:
-	 if (inst->shadow_compare) {
-	    msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE;
-	    assert(inst->mlen == 3);
-	 } else {
-	    msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD;
-	    assert(inst->mlen == 2);
-	 }
-	 break;
-      case SHADER_OPCODE_TXD:
-	 /* There is no sample_d_c message; comparisons are done manually. */
-	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS;
-	 assert(inst->mlen == 4);
-	 break;
-      case SHADER_OPCODE_TXF:
-	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD;
-	 assert(inst->mlen == 2);
-	 break;
-      case SHADER_OPCODE_TXS:
-	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO;
-	 assert(inst->mlen == 2);
-	 break;
-      default:
-	 unreachable("should not get here: invalid vec4 texture opcode");
-      }
-   }
-
-   assert(msg_type != -1);
-
-   assert(sampler_index.type == BRW_REGISTER_TYPE_UD);
-
-   /* Load the message header if present.  If there's a texture offset, we need
-    * to set it up explicitly and load the offset bitfield.  Otherwise, we can
-    * use an implied move from g0 to the first message register.
-    */
-   if (inst->header_size != 0) {
-      if (devinfo->ver < 6 && !inst->offset) {
-         /* Set up an implied move from g0 to the MRF. */
-         src = brw_vec8_grf(0, 0);
-      } else {
-         struct brw_reg header =
-            retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);
-         uint32_t dw2 = 0;
-
-         /* Explicitly set up the message header by copying g0 to the MRF. */
-         brw_push_insn_state(p);
-         brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-         brw_MOV(p, header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
-
-         brw_set_default_access_mode(p, BRW_ALIGN_1);
-
-         if (inst->offset)
-            /* Set the texel offset bits in DWord 2. */
-            dw2 = inst->offset;
-
-         /* The VS, DS, and FS stages have the g0.2 payload delivered as 0,
-          * so header0.2 is 0 when g0 is copied.  The HS and GS stages do
-          * not, so we must set to to 0 to avoid setting undesirable bits
-          * in the message header.
-          */
-         if (dw2 ||
-             stage == MESA_SHADER_TESS_CTRL ||
-             stage == MESA_SHADER_GEOMETRY) {
-            brw_MOV(p, get_element_ud(header, 2), brw_imm_ud(dw2));
-         }
-
-         brw_adjust_sampler_state_pointer(p, header, sampler_index);
-         brw_pop_insn_state(p);
-      }
-   }
-
-   uint32_t return_format;
-
-   switch (dst.type) {
-   case BRW_REGISTER_TYPE_D:
-      return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
-      break;
-   case BRW_REGISTER_TYPE_UD:
-      return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
-      break;
-   default:
-      return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
-      break;
-   }
-
-   /* Stomp the resinfo output type to UINT32.  On gens 4-5, the output type
-    * is set as part of the message descriptor.  On gfx4, the PRM seems to
-    * allow UINT32 and FLOAT32 (i965 PRM, Vol. 4 Section 4.8.1.1), but on
-    * later gens UINT32 is required.  Once you hit Sandy Bridge, the bit is
-    * gone from the message descriptor entirely and you just get UINT32 all
-    * the time regasrdless.  Since we can really only do non-UINT32 on gfx4,
-    * just stomp it to UINT32 all the time.
-    */
-   if (inst->opcode == SHADER_OPCODE_TXS)
-      return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
-
-   if (surface_index.file == BRW_IMMEDIATE_VALUE &&
-       sampler_index.file == BRW_IMMEDIATE_VALUE) {
-      uint32_t surface = surface_index.ud;
-      uint32_t sampler = sampler_index.ud;
-
-      brw_SAMPLE(p,
-                 dst,
-                 inst->base_mrf,
-                 src,
-                 surface,
-                 sampler % 16,
-                 msg_type,
-                 1, /* response length */
-                 inst->mlen,
-                 inst->header_size != 0,
-                 BRW_SAMPLER_SIMD_MODE_SIMD4X2,
-                 return_format);
-   } else {
-      /* Non-constant sampler index. */
-
-      struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
-      struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD));
-      struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));
-
-      brw_push_insn_state(p);
-      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-      brw_set_default_access_mode(p, BRW_ALIGN_1);
-
-      if (brw_regs_equal(&surface_reg, &sampler_reg)) {
-         brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101));
-      } else {
-         if (sampler_reg.file == BRW_IMMEDIATE_VALUE) {
-            brw_OR(p, addr, surface_reg, brw_imm_ud(sampler_reg.ud << 8));
-         } else {
-            brw_SHL(p, addr, sampler_reg, brw_imm_ud(8));
-            brw_OR(p, addr, addr, surface_reg);
-         }
-      }
-      brw_AND(p, addr, addr, brw_imm_ud(0xfff));
-
-      brw_pop_insn_state(p);
-
-      if (inst->base_mrf != -1)
-         gfx6_resolve_implied_move(p, &src, inst->base_mrf);
-
-      /* dst = send(offset, a0.0 | <descriptor>) */
-      brw_send_indirect_message(
-         p, BRW_SFID_SAMPLER, dst, src, addr,
-         brw_message_desc(devinfo, inst->mlen, 1, inst->header_size) |
-         brw_sampler_desc(devinfo,
-                          0 /* surface */,
-                          0 /* sampler */,
-                          msg_type,
-                          BRW_SAMPLER_SIMD_MODE_SIMD4X2,
-                          return_format),
-         false /* EOT */);
-
-      /* visitor knows more than we do about the surface limit required,
-       * so has already done marking.
-       */
-   }
-}
-
-static void
-generate_vs_urb_write(struct brw_codegen *p, vec4_instruction *inst)
-{
-   brw_urb_WRITE(p,
-		 brw_null_reg(), /* dest */
-		 inst->base_mrf, /* starting mrf reg nr */
-		 brw_vec8_grf(0, 0), /* src */
-                 inst->urb_write_flags,
-		 inst->mlen,
-		 0,		/* response len */
-		 inst->offset,	/* urb destination offset */
-		 BRW_URB_SWIZZLE_INTERLEAVE);
-}
-
-static void
-generate_gs_urb_write(struct brw_codegen *p, vec4_instruction *inst)
-{
-   struct brw_reg src = brw_message_reg(inst->base_mrf);
-   brw_urb_WRITE(p,
-                 brw_null_reg(), /* dest */
-                 inst->base_mrf, /* starting mrf reg nr */
-                 src,
-                 inst->urb_write_flags,
-                 inst->mlen,
-                 0,             /* response len */
-                 inst->offset,  /* urb destination offset */
-                 BRW_URB_SWIZZLE_INTERLEAVE);
-}
-
-static void
-generate_gs_urb_write_allocate(struct brw_codegen *p, vec4_instruction *inst)
-{
-   struct brw_reg src = brw_message_reg(inst->base_mrf);
-
-   /* We pass the temporary passed in src0 as the writeback register */
-   brw_urb_WRITE(p,
-                 inst->src[0].as_brw_reg(), /* dest */
-                 inst->base_mrf, /* starting mrf reg nr */
-                 src,
-                 BRW_URB_WRITE_ALLOCATE_COMPLETE,
-                 inst->mlen,
-                 1, /* response len */
-                 inst->offset,  /* urb destination offset */
-                 BRW_URB_SWIZZLE_INTERLEAVE);
-
-   /* Now put allocated urb handle in dst.0 */
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_MOV(p, get_element_ud(inst->dst.as_brw_reg(), 0),
-           get_element_ud(inst->src[0].as_brw_reg(), 0));
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_gs_thread_end(struct brw_codegen *p, vec4_instruction *inst)
-{
-   struct brw_reg src = brw_message_reg(inst->base_mrf);
-   brw_urb_WRITE(p,
-                 brw_null_reg(), /* dest */
-                 inst->base_mrf, /* starting mrf reg nr */
-                 src,
-                 BRW_URB_WRITE_EOT | inst->urb_write_flags,
-                 inst->mlen,
-                 0,              /* response len */
-                 0,              /* urb destination offset */
-                 BRW_URB_SWIZZLE_INTERLEAVE);
-}
-
-static void
-generate_gs_set_write_offset(struct brw_codegen *p,
-                             struct brw_reg dst,
-                             struct brw_reg src0,
-                             struct brw_reg src1)
-{
-   /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
-    * Header: M0.3):
-    *
-    *     Slot 0 Offset. This field, after adding to the Global Offset field
-    *     in the message descriptor, specifies the offset (in 256-bit units)
-    *     from the start of the URB entry, as referenced by URB Handle 0, at
-    *     which the data will be accessed.
-    *
-    * Similar text describes DWORD M0.4, which is slot 1 offset.
-    *
-    * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
-    * of the register for geometry shader invocations 0 and 1) by the
-    * immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
-    *
-    * We can do this with the following EU instruction:
-    *
-    *     mul(2) dst.3<1>UD src0<8;2,4>UD src1<...>UW   { Align1 WE_all }
-    */
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   assert(p->devinfo->ver >= 7 &&
-          src1.file == BRW_IMMEDIATE_VALUE &&
-          src1.type == BRW_REGISTER_TYPE_UD &&
-          src1.ud <= USHRT_MAX);
-   if (src0.file == BRW_IMMEDIATE_VALUE) {
-      brw_MOV(p, suboffset(stride(dst, 2, 2, 1), 3),
-              brw_imm_ud(src0.ud * src1.ud));
-   } else {
-      if (src1.file == BRW_IMMEDIATE_VALUE) {
-         src1 = brw_imm_uw(src1.ud);
-      }
-      brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
-              retype(src1, BRW_REGISTER_TYPE_UW));
-   }
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_gs_set_vertex_count(struct brw_codegen *p,
-                             struct brw_reg dst,
-                             struct brw_reg src)
-{
-   brw_push_insn_state(p);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-
-   /* If we think of the src and dst registers as composed of 8 DWORDs each,
-    * we want to pick up the contents of DWORDs 0 and 4 from src, truncate
-    * them to WORDs, and then pack them into DWORD 2 of dst.
-    *
-    * It's easier to get the EU to do this if we think of the src and dst
-    * registers as composed of 16 WORDS each; then, we want to pick up the
-    * contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5
-    * of dst.
-    *
-    * We can do that by the following EU instruction:
-    *
-    *     mov (2) dst.4<1>:uw src<8;1,0>:uw   { Align1, Q1, NoMask }
-    */
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_MOV(p,
-           suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
-           stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
-
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_gs_svb_write(struct brw_codegen *p,
-                      vec4_instruction *inst,
-                      struct brw_reg dst,
-                      struct brw_reg src0,
-                      struct brw_reg src1)
-{
-   int binding = inst->sol_binding;
-   bool final_write = inst->sol_final_write;
-
-   brw_push_insn_state(p);
-   brw_set_default_exec_size(p, BRW_EXECUTE_4);
-   /* Copy Vertex data into M0.x */
-   brw_MOV(p, stride(dst, 4, 4, 1),
-           stride(retype(src0, BRW_REGISTER_TYPE_UD), 4, 4, 1));
-   brw_pop_insn_state(p);
-
-   brw_push_insn_state(p);
-   /* Send SVB Write */
-   brw_svb_write(p,
-                 final_write ? src1 : brw_null_reg(), /* dest == src1 */
-                 1, /* msg_reg_nr */
-                 dst, /* src0 == previous dst */
-                 BRW_GFX6_SOL_BINDING_START + binding, /* binding_table_index */
-                 final_write); /* send_commit_msg */
-
-   /* Finally, wait for the write commit to occur so that we can proceed to
-    * other things safely.
-    *
-    * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
-    *
-    *   The write commit does not modify the destination register, but
-    *   merely clears the dependency associated with the destination
-    *   register. Thus, a simple “mov” instruction using the register as a
-    *   source is sufficient to wait for the write commit to occur.
-    */
-   if (final_write) {
-      brw_MOV(p, src1, src1);
-   }
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_gs_svb_set_destination_index(struct brw_codegen *p,
-                                      vec4_instruction *inst,
-                                      struct brw_reg dst,
-                                      struct brw_reg src)
-{
-   int vertex = inst->sol_vertex;
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_MOV(p, get_element_ud(dst, 5), get_element_ud(src, vertex));
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_gs_set_dword_2(struct brw_codegen *p,
-                        struct brw_reg dst,
-                        struct brw_reg src)
-{
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_MOV(p, suboffset(vec1(dst), 2), suboffset(vec1(src), 0));
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_gs_prepare_channel_masks(struct brw_codegen *p,
-                                  struct brw_reg dst)
-{
-   /* We want to left shift just DWORD 4 (the x component belonging to the
-    * second geometry shader invocation) by 4 bits.  So generate the
-    * instruction:
-    *
-    *     shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
-    */
-   dst = suboffset(vec1(dst), 4);
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_SHL(p, dst, dst, brw_imm_ud(4));
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_gs_set_channel_masks(struct brw_codegen *p,
-                              struct brw_reg dst,
-                              struct brw_reg src)
-{
-   /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
-    * Header: M0.5):
-    *
-    *     15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
-    *
-    *        When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
-    *        DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
-    *        Vertex 0 DATA[7].  This bit is ANDed with the corresponding
-    *        channel enable to determine the final channel enable.  For the
-    *        URB_READ_OWORD & URB_READ_HWORD messages, when final channel
-    *        enable is 1 it indicates that Vertex 1 DATA [3] will be included
-    *        in the writeback message.  For the URB_WRITE_OWORD &
-    *        URB_WRITE_HWORD messages, when final channel enable is 1 it
-    *        indicates that Vertex 1 DATA [3] will be written to the surface.
-    *
-    *        0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
-    *        1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
-    *
-    *     14 Vertex 1 DATA [2] Channel Mask
-    *     13 Vertex 1 DATA [1] Channel Mask
-    *     12 Vertex 1 DATA [0] Channel Mask
-    *     11 Vertex 0 DATA [3] Channel Mask
-    *     10 Vertex 0 DATA [2] Channel Mask
-    *      9 Vertex 0 DATA [1] Channel Mask
-    *      8 Vertex 0 DATA [0] Channel Mask
-    *
-    * (This is from a section of the PRM that is agnostic to the particular
-    * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
-    * geometry shader invocations 0 and 1, respectively).  Since we have the
-    * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0,
-    * and the enable flags for geometry shader invocation 1 in bits 7:0 of
-    * DWORD 4, we just need to OR them together and store the result in bits
-    * 15:8 of DWORD 5.
-    *
-    * It's easier to get the EU to do this if we think of the src and dst
-    * registers as composed of 32 bytes each; then, we want to pick up the
-    * contents of bytes 0 and 16 from src, OR them together, and store them in
-    * byte 21.
-    *
-    * We can do that by the following EU instruction:
-    *
-    *     or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
-    *
-    * Note: this relies on the source register having zeros in (a) bits 7:4 of
-    * DWORD 0 and (b) bits 3:0 of DWORD 4.  We can rely on (b) because the
-    * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which
-    * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to
-    * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to
-    * contain valid channel mask values (which are in the range 0x0-0xf).
-    */
-   dst = retype(dst, BRW_REGISTER_TYPE_UB);
-   src = retype(src, BRW_REGISTER_TYPE_UB);
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_OR(p, suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16));
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_gs_get_instance_id(struct brw_codegen *p,
-                            struct brw_reg dst)
-{
-   /* We want to right shift R0.0 & R0.1 by GFX7_GS_PAYLOAD_INSTANCE_ID_SHIFT
-    * and store into dst.0 & dst.4. So generate the instruction:
-    *
-    *     shr(8) dst<1> R0<1,4,0> GFX7_GS_PAYLOAD_INSTANCE_ID_SHIFT { align1 WE_normal 1Q }
-    */
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   dst = retype(dst, BRW_REGISTER_TYPE_UD);
-   struct brw_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
-   brw_SHR(p, dst, stride(r0, 1, 4, 0),
-           brw_imm_ud(GFX7_GS_PAYLOAD_INSTANCE_ID_SHIFT));
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_gs_ff_sync_set_primitives(struct brw_codegen *p,
-                                   struct brw_reg dst,
-                                   struct brw_reg src0,
-                                   struct brw_reg src1,
-                                   struct brw_reg src2)
-{
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   /* Save src0 data in 16:31 bits of dst.0 */
-   brw_AND(p, suboffset(vec1(dst), 0), suboffset(vec1(src0), 0),
-           brw_imm_ud(0xffffu));
-   brw_SHL(p, suboffset(vec1(dst), 0), suboffset(vec1(dst), 0), brw_imm_ud(16));
-   /* Save src1 data in 0:15 bits of dst.0 */
-   brw_AND(p, suboffset(vec1(src2), 0), suboffset(vec1(src1), 0),
-           brw_imm_ud(0xffffu));
-   brw_OR(p, suboffset(vec1(dst), 0),
-          suboffset(vec1(dst), 0),
-          suboffset(vec1(src2), 0));
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_gs_ff_sync(struct brw_codegen *p,
-                    vec4_instruction *inst,
-                    struct brw_reg dst,
-                    struct brw_reg src0,
-                    struct brw_reg src1)
-{
-   /* This opcode uses an implied MRF register for:
-    *  - the header of the ff_sync message. And as such it is expected to be
-    *    initialized to r0 before calling here.
-    *  - the destination where we will write the allocated URB handle.
-    */
-   struct brw_reg header =
-      retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);
-
-   /* Overwrite dword 0 of the header (SO vertices to write) and
-    * dword 1 (number of primitives written).
-    */
-   brw_push_insn_state(p);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_MOV(p, get_element_ud(header, 0), get_element_ud(src1, 0));
-   brw_MOV(p, get_element_ud(header, 1), get_element_ud(src0, 0));
-   brw_pop_insn_state(p);
-
-   /* Allocate URB handle in dst */
-   brw_ff_sync(p,
-               dst,
-               0,
-               header,
-               1, /* allocate */
-               1, /* response length */
-               0 /* eot */);
-
-   /* Now put allocated urb handle in header.0 */
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_MOV(p, get_element_ud(header, 0), get_element_ud(dst, 0));
-
-   /* src1 is not an immediate when we use transform feedback */
-   if (src1.file != BRW_IMMEDIATE_VALUE) {
-      brw_set_default_exec_size(p, BRW_EXECUTE_4);
-      brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1));
-   }
-
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_gs_set_primitive_id(struct brw_codegen *p, struct brw_reg dst)
-{
-   /* In gfx6, PrimitiveID is delivered in R0.1 of the payload */
-   struct brw_reg src = brw_vec8_grf(0, 0);
-   brw_push_insn_state(p);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src, 1));
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_tcs_get_instance_id(struct brw_codegen *p, struct brw_reg dst)
-{
-   const struct intel_device_info *devinfo = p->devinfo;
-   const bool ivb = devinfo->platform == INTEL_PLATFORM_IVB ||
-                    devinfo->platform == INTEL_PLATFORM_BYT;
-
-   /* "Instance Count" comes as part of the payload in r0.2 bits 23:17.
-    *
-    * Since we operate in SIMD4x2 mode, we need run half as many threads
-    * as necessary.  So we assign (2i + 1, 2i) as the thread counts.  We
-    * shift right by one less to accomplish the multiplication by two.
-    */
-   dst = retype(dst, BRW_REGISTER_TYPE_UD);
-   struct brw_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
-
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-
-   const int mask = ivb ? INTEL_MASK(22, 16) : INTEL_MASK(23, 17);
-   const int shift = ivb ? 16 : 17;
-
-   brw_AND(p, get_element_ud(dst, 0), get_element_ud(r0, 2), brw_imm_ud(mask));
-   brw_SHR(p, get_element_ud(dst, 0), get_element_ud(dst, 0),
-           brw_imm_ud(shift - 1));
-   brw_ADD(p, get_element_ud(dst, 4), get_element_ud(dst, 0), brw_imm_ud(1));
-
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_tcs_urb_write(struct brw_codegen *p,
-                       vec4_instruction *inst,
-                       struct brw_reg urb_header)
-{
-   const struct intel_device_info *devinfo = p->devinfo;
-
-   brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
-   brw_set_dest(p, send, brw_null_reg());
-   brw_set_src0(p, send, urb_header);
-   brw_set_desc(p, send, brw_message_desc(devinfo, inst->mlen, 0, true));
-
-   brw_inst_set_sfid(devinfo, send, BRW_SFID_URB);
-   brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_WRITE_OWORD);
-   brw_inst_set_urb_global_offset(devinfo, send, inst->offset);
-   if (inst->urb_write_flags & BRW_URB_WRITE_EOT) {
-      brw_inst_set_eot(devinfo, send, 1);
-   } else {
-      brw_inst_set_urb_per_slot_offset(devinfo, send, 1);
-      brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE);
-   }
-
-   /* what happens to swizzles? */
-}
-
-
-static void
-generate_tcs_input_urb_offsets(struct brw_codegen *p,
-                               struct brw_reg dst,
-                               struct brw_reg vertex,
-                               struct brw_reg offset)
-{
-   /* Generates an URB read/write message header for HS/DS operation.
-    * Inputs are a vertex index, and a byte offset from the beginning of
-    * the vertex. */
-
-   /* If `vertex` is not an immediate, we clobber a0.0 */
-
-   assert(vertex.file == BRW_IMMEDIATE_VALUE || vertex.file == BRW_GENERAL_REGISTER_FILE);
-   assert(vertex.type == BRW_REGISTER_TYPE_UD || vertex.type == BRW_REGISTER_TYPE_D);
-
-   assert(dst.file == BRW_GENERAL_REGISTER_FILE);
-
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_MOV(p, dst, brw_imm_ud(0));
-
-   /* m0.5 bits 8-15 are channel enables */
-   brw_MOV(p, get_element_ud(dst, 5), brw_imm_ud(0xff00));
-
-   /* m0.0-0.1: URB handles */
-   if (vertex.file == BRW_IMMEDIATE_VALUE) {
-      uint32_t vertex_index = vertex.ud;
-      struct brw_reg index_reg = brw_vec1_grf(
-            1 + (vertex_index >> 3), vertex_index & 7);
-
-      brw_MOV(p, vec2(get_element_ud(dst, 0)),
-              retype(index_reg, BRW_REGISTER_TYPE_UD));
-   } else {
-      /* Use indirect addressing.  ICP Handles are DWords (single channels
-       * of a register) and start at g1.0.
-       *
-       * In order to start our region at g1.0, we add 8 to the vertex index,
-       * effectively skipping over the 8 channels in g0.0.  This gives us a
-       * DWord offset to the ICP Handle.
-       *
-       * Indirect addressing works in terms of bytes, so we then multiply
-       * the DWord offset by 4 (by shifting left by 2).
-       */
-      struct brw_reg addr = brw_address_reg(0);
-
-      /* bottom half: m0.0 = g[1.0 + vertex.0]UD */
-      brw_ADD(p, addr, retype(get_element_ud(vertex, 0), BRW_REGISTER_TYPE_UW),
-              brw_imm_uw(0x8));
-      brw_SHL(p, addr, addr, brw_imm_uw(2));
-      brw_MOV(p, get_element_ud(dst, 0), deref_1ud(brw_indirect(0, 0), 0));
-
-      /* top half: m0.1 = g[1.0 + vertex.4]UD */
-      brw_ADD(p, addr, retype(get_element_ud(vertex, 4), BRW_REGISTER_TYPE_UW),
-              brw_imm_uw(0x8));
-      brw_SHL(p, addr, addr, brw_imm_uw(2));
-      brw_MOV(p, get_element_ud(dst, 1), deref_1ud(brw_indirect(0, 0), 0));
-   }
-
-   /* m0.3-0.4: 128bit-granular offsets into the URB from the handles */
-   if (offset.file != ARF)
-      brw_MOV(p, vec2(get_element_ud(dst, 3)), stride(offset, 4, 1, 0));
-
-   brw_pop_insn_state(p);
-}
-
-
-static void
-generate_tcs_output_urb_offsets(struct brw_codegen *p,
-                                struct brw_reg dst,
-                                struct brw_reg write_mask,
-                                struct brw_reg offset)
-{
-   /* Generates an URB read/write message header for HS/DS operation, for the patch URB entry. */
-   assert(dst.file == BRW_GENERAL_REGISTER_FILE || dst.file == BRW_MESSAGE_REGISTER_FILE);
-
-   assert(write_mask.file == BRW_IMMEDIATE_VALUE);
-   assert(write_mask.type == BRW_REGISTER_TYPE_UD);
-
-   brw_push_insn_state(p);
-
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_MOV(p, dst, brw_imm_ud(0));
-
-   unsigned mask = write_mask.ud;
-
-   /* m0.5 bits 15:12 and 11:8 are channel enables */
-   brw_MOV(p, get_element_ud(dst, 5), brw_imm_ud((mask << 8) | (mask << 12)));
-
-   /* HS patch URB handle is delivered in r0.0 */
-   struct brw_reg urb_handle = brw_vec1_grf(0, 0);
-
-   /* m0.0-0.1: URB handles */
-   brw_MOV(p, vec2(get_element_ud(dst, 0)),
-           retype(urb_handle, BRW_REGISTER_TYPE_UD));
-
-   /* m0.3-0.4: 128bit-granular offsets into the URB from the handles */
-   if (offset.file != ARF)
-      brw_MOV(p, vec2(get_element_ud(dst, 3)), stride(offset, 4, 1, 0));
-
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_tes_create_input_read_header(struct brw_codegen *p,
-                                      struct brw_reg dst)
-{
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-
-   /* Initialize the register to 0 */
-   brw_MOV(p, dst, brw_imm_ud(0));
-
-   /* Enable all the channels in m0.5 bits 15:8 */
-   brw_MOV(p, get_element_ud(dst, 5), brw_imm_ud(0xff00));
-
-   /* Copy g1.3 (the patch URB handle) to m0.0 and m0.1.  For safety,
-    * mask out irrelevant "Reserved" bits, as they're not marked MBZ.
-    */
-   brw_AND(p, vec2(get_element_ud(dst, 0)),
-           retype(brw_vec1_grf(1, 3), BRW_REGISTER_TYPE_UD),
-           brw_imm_ud(0x1fff));
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_tes_add_indirect_urb_offset(struct brw_codegen *p,
-                                     struct brw_reg dst,
-                                     struct brw_reg header,
-                                     struct brw_reg offset)
-{
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-
-   brw_MOV(p, dst, header);
-
-   /* Uniforms will have a stride <0;4,1>, and we need to convert to <0;1,0>.
-    * Other values get <4;1,0>.
-    */
-   struct brw_reg restrided_offset;
-   if (offset.vstride == BRW_VERTICAL_STRIDE_0 &&
-       offset.width == BRW_WIDTH_4 &&
-       offset.hstride == BRW_HORIZONTAL_STRIDE_1) {
-      restrided_offset = stride(offset, 0, 1, 0);
-   } else {
-      restrided_offset = stride(offset, 4, 1, 0);
-   }
-
-   /* m0.3-0.4: 128-bit-granular offsets into the URB from the handles */
-   brw_MOV(p, vec2(get_element_ud(dst, 3)), restrided_offset);
-
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_vec4_urb_read(struct brw_codegen *p,
-                       vec4_instruction *inst,
-                       struct brw_reg dst,
-                       struct brw_reg header)
-{
-   const struct intel_device_info *devinfo = p->devinfo;
-
-   assert(header.file == BRW_GENERAL_REGISTER_FILE);
-   assert(header.type == BRW_REGISTER_TYPE_UD);
-
-   brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
-   brw_set_dest(p, send, dst);
-   brw_set_src0(p, send, header);
-
-   brw_set_desc(p, send, brw_message_desc(devinfo, 1, 1, true));
-
-   brw_inst_set_sfid(devinfo, send, BRW_SFID_URB);
-   brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_READ_OWORD);
-   brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE);
-   brw_inst_set_urb_per_slot_offset(devinfo, send, 1);
-
-   brw_inst_set_urb_global_offset(devinfo, send, inst->offset);
-}
-
-static void
-generate_tcs_release_input(struct brw_codegen *p,
-                           struct brw_reg header,
-                           struct brw_reg vertex,
-                           struct brw_reg is_unpaired)
-{
-   const struct intel_device_info *devinfo = p->devinfo;
-
-   assert(vertex.file == BRW_IMMEDIATE_VALUE);
-   assert(vertex.type == BRW_REGISTER_TYPE_UD);
-
-   /* m0.0-0.1: URB handles */
-   struct brw_reg urb_handles =
-      retype(brw_vec2_grf(1 + (vertex.ud >> 3), vertex.ud & 7),
-             BRW_REGISTER_TYPE_UD);
-
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_MOV(p, header, brw_imm_ud(0));
-   brw_MOV(p, vec2(get_element_ud(header, 0)), urb_handles);
-   brw_pop_insn_state(p);
-
-   brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
-   brw_set_dest(p, send, brw_null_reg());
-   brw_set_src0(p, send, header);
-   brw_set_desc(p, send, brw_message_desc(devinfo, 1, 0, true));
-
-   brw_inst_set_sfid(devinfo, send, BRW_SFID_URB);
-   brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_READ_OWORD);
-   brw_inst_set_urb_complete(devinfo, send, 1);
-   brw_inst_set_urb_swizzle_control(devinfo, send, is_unpaired.ud ?
-                                    BRW_URB_SWIZZLE_NONE :
-                                    BRW_URB_SWIZZLE_INTERLEAVE);
-}
-
-static void
-generate_tcs_thread_end(struct brw_codegen *p, vec4_instruction *inst)
-{
-   struct brw_reg header = brw_message_reg(inst->base_mrf);
-
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_MOV(p, header, brw_imm_ud(0));
-   brw_MOV(p, get_element_ud(header, 5), brw_imm_ud(WRITEMASK_X << 8));
-   brw_MOV(p, get_element_ud(header, 0),
-           retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD));
-   brw_MOV(p, brw_message_reg(inst->base_mrf + 1), brw_imm_ud(0u));
-   brw_pop_insn_state(p);
-
-   brw_urb_WRITE(p,
-                 brw_null_reg(), /* dest */
-                 inst->base_mrf, /* starting mrf reg nr */
-                 header,
-                 BRW_URB_WRITE_EOT | BRW_URB_WRITE_OWORD |
-                 BRW_URB_WRITE_USE_CHANNEL_MASKS,
-                 inst->mlen,
-                 0,              /* response len */
-                 0,              /* urb destination offset */
-                 0);
-}
-
-static void
-generate_tes_get_primitive_id(struct brw_codegen *p, struct brw_reg dst)
-{
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_MOV(p, dst, retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_D));
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_tcs_get_primitive_id(struct brw_codegen *p, struct brw_reg dst)
-{
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_MOV(p, dst, retype(brw_vec1_grf(0, 1), BRW_REGISTER_TYPE_UD));
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_tcs_create_barrier_header(struct brw_codegen *p,
-                                   struct brw_vue_prog_data *prog_data,
-                                   struct brw_reg dst)
-{
-   const struct intel_device_info *devinfo = p->devinfo;
-   const bool ivb = devinfo->platform == INTEL_PLATFORM_IVB ||
-                    devinfo->platform == INTEL_PLATFORM_BYT;
-   struct brw_reg m0_2 = get_element_ud(dst, 2);
-   unsigned instances = ((struct brw_tcs_prog_data *) prog_data)->instances;
-
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-
-   /* Zero the message header */
-   brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
-
-   /* Copy "Barrier ID" from r0.2, bits 16:13 (Gfx7.5+) or 15:12 (Gfx7) */
-   brw_AND(p, m0_2,
-           retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
-           brw_imm_ud(ivb ? INTEL_MASK(15, 12) : INTEL_MASK(16, 13)));
-
-   /* Shift it up to bits 27:24. */
-   brw_SHL(p, m0_2, get_element_ud(dst, 2), brw_imm_ud(ivb ? 12 : 11));
-
-   /* Set the Barrier Count and the enable bit */
-   brw_OR(p, m0_2, m0_2, brw_imm_ud(instances << 9 | (1 << 15)));
-
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_oword_dual_block_offsets(struct brw_codegen *p,
-                                  struct brw_reg m1,
-                                  struct brw_reg index)
-{
-   int second_vertex_offset;
-
-   if (p->devinfo->ver >= 6)
-      second_vertex_offset = 1;
-   else
-      second_vertex_offset = 16;
-
-   m1 = retype(m1, BRW_REGISTER_TYPE_D);
-
-   /* Set up M1 (message payload).  Only the block offsets in M1.0 and
-    * M1.4 are used, and the rest are ignored.
-    */
-   struct brw_reg m1_0 = suboffset(vec1(m1), 0);
-   struct brw_reg m1_4 = suboffset(vec1(m1), 4);
-   struct brw_reg index_0 = suboffset(vec1(index), 0);
-   struct brw_reg index_4 = suboffset(vec1(index), 4);
-
-   brw_push_insn_state(p);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-
-   brw_MOV(p, m1_0, index_0);
-
-   if (index.file == BRW_IMMEDIATE_VALUE) {
-      index_4.ud += second_vertex_offset;
-      brw_MOV(p, m1_4, index_4);
-   } else {
-      brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
-   }
-
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_unpack_flags(struct brw_codegen *p,
-                      struct brw_reg dst)
-{
-   brw_push_insn_state(p);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-
-   struct brw_reg flags = brw_flag_reg(0, 0);
-   struct brw_reg dst_0 = suboffset(vec1(dst), 0);
-   struct brw_reg dst_4 = suboffset(vec1(dst), 4);
-
-   brw_AND(p, dst_0, flags, brw_imm_ud(0x0f));
-   brw_AND(p, dst_4, flags, brw_imm_ud(0xf0));
-   brw_SHR(p, dst_4, dst_4, brw_imm_ud(4));
-
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_scratch_read(struct brw_codegen *p,
-                      vec4_instruction *inst,
-                      struct brw_reg dst,
-                      struct brw_reg index)
-{
-   const struct intel_device_info *devinfo = p->devinfo;
-   struct brw_reg header = brw_vec8_grf(0, 0);
-
-   gfx6_resolve_implied_move(p, &header, inst->base_mrf);
-
-   generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1),
-				     index);
-
-   uint32_t msg_type;
-
-   if (devinfo->ver >= 6)
-      msg_type = GFX6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
-   else if (devinfo->verx10 >= 45)
-      msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
-   else
-      msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
-
-   const unsigned target_cache =
-      devinfo->ver >= 7 ? GFX7_SFID_DATAPORT_DATA_CACHE :
-      devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE :
-      BRW_SFID_DATAPORT_READ;
-
-   /* Each of the 8 channel enables is considered for whether each
-    * dword is written.
-    */
-   brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
-   brw_inst_set_sfid(devinfo, send, target_cache);
-   brw_set_dest(p, send, dst);
-   brw_set_src0(p, send, header);
-   if (devinfo->ver < 6)
-      brw_inst_set_cond_modifier(devinfo, send, inst->base_mrf);
-   brw_set_desc(p, send,
-                brw_message_desc(devinfo, 2, 1, true) |
-                brw_dp_read_desc(devinfo,
-                                 brw_scratch_surface_idx(p),
-                                 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
-                                 msg_type, BRW_DATAPORT_READ_TARGET_RENDER_CACHE));
-}
-
-static void
-generate_scratch_write(struct brw_codegen *p,
-                       vec4_instruction *inst,
-                       struct brw_reg dst,
-                       struct brw_reg src,
-                       struct brw_reg index)
-{
-   const struct intel_device_info *devinfo = p->devinfo;
-   const unsigned target_cache =
-      (devinfo->ver >= 7 ? GFX7_SFID_DATAPORT_DATA_CACHE :
-       devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE :
-       BRW_SFID_DATAPORT_WRITE);
-   struct brw_reg header = brw_vec8_grf(0, 0);
-   bool write_commit;
-
-   /* If the instruction is predicated, we'll predicate the send, not
-    * the header setup.
-    */
-   brw_push_insn_state(p);
-   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
-   brw_set_default_flag_reg(p, 0, 0);
-
-   gfx6_resolve_implied_move(p, &header, inst->base_mrf);
-
-   generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1),
-				     index);
-
-   brw_MOV(p,
-	   retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D),
-	   retype(src, BRW_REGISTER_TYPE_D));
-
-   brw_pop_insn_state(p);
-
-   uint32_t msg_type;
-
-   if (devinfo->ver >= 7)
-      msg_type = GFX7_DATAPORT_DC_OWORD_DUAL_BLOCK_WRITE;
-   else if (devinfo->ver == 6)
-      msg_type = GFX6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
-   else
-      msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
-
-   brw_set_default_predicate_control(p, inst->predicate);
-
-   /* Pre-gfx6, we have to specify write commits to ensure ordering
-    * between reads and writes within a thread.  Afterwards, that's
-    * guaranteed and write commits only matter for inter-thread
-    * synchronization.
-    */
-   if (devinfo->ver >= 6) {
-      write_commit = false;
-   } else {
-      /* The visitor set up our destination register to be g0.  This
-       * means that when the next read comes along, we will end up
-       * reading from g0 and causing a block on the write commit.  For
-       * write-after-read, we are relying on the value of the previous
-       * read being used (and thus blocking on completion) before our
-       * write is executed.  This means we have to be careful in
-       * instruction scheduling to not violate this assumption.
-       */
-      write_commit = true;
-   }
-
-   /* Each of the 8 channel enables is considered for whether each
-    * dword is written.
-    */
-   brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
-   brw_inst_set_sfid(p->devinfo, send, target_cache);
-   brw_set_dest(p, send, dst);
-   brw_set_src0(p, send, header);
-   if (devinfo->ver < 6)
-      brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf);
-   brw_set_desc(p, send,
-                brw_message_desc(devinfo, 3, write_commit, true) |
-                brw_dp_write_desc(devinfo,
-                                  brw_scratch_surface_idx(p),
-                                  BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
-                                  msg_type,
-                                  write_commit));
-}
-
-static void
-generate_pull_constant_load(struct brw_codegen *p,
-                            vec4_instruction *inst,
-                            struct brw_reg dst,
-                            struct brw_reg index,
-                            struct brw_reg offset)
-{
-   const struct intel_device_info *devinfo = p->devinfo;
-   const unsigned target_cache =
-      (devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_SAMPLER_CACHE :
-       BRW_SFID_DATAPORT_READ);
-   assert(index.file == BRW_IMMEDIATE_VALUE &&
-	  index.type == BRW_REGISTER_TYPE_UD);
-   uint32_t surf_index = index.ud;
-
-   struct brw_reg header = brw_vec8_grf(0, 0);
-
-   gfx6_resolve_implied_move(p, &header, inst->base_mrf);
-
-   if (devinfo->ver >= 6) {
-      if (offset.file == BRW_IMMEDIATE_VALUE) {
-         brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1),
-                           BRW_REGISTER_TYPE_D),
-                 brw_imm_d(offset.ud >> 4));
-      } else {
-         brw_SHR(p, retype(brw_message_reg(inst->base_mrf + 1),
-                           BRW_REGISTER_TYPE_D),
-                 offset, brw_imm_d(4));
-      }
-   } else {
-      brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1),
-                        BRW_REGISTER_TYPE_D),
-              offset);
-   }
-
-   uint32_t msg_type;
-
-   if (devinfo->ver >= 6)
-      msg_type = GFX6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
-   else if (devinfo->verx10 >= 45)
-      msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
-   else
-      msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
-
-   /* Each of the 8 channel enables is considered for whether each
-    * dword is written.
-    */
-   brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
-   brw_inst_set_sfid(devinfo, send, target_cache);
-   brw_set_dest(p, send, dst);
-   brw_set_src0(p, send, header);
-   if (devinfo->ver < 6)
-      brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf);
-   brw_set_desc(p, send,
-                brw_message_desc(devinfo, 2, 1, true) |
-                brw_dp_read_desc(devinfo, surf_index,
-                                 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
-                                 msg_type,
-                                 BRW_DATAPORT_READ_TARGET_DATA_CACHE));
-}
-
-static void
-generate_get_buffer_size(struct brw_codegen *p,
-                         vec4_instruction *inst,
-                         struct brw_reg dst,
-                         struct brw_reg src,
-                         struct brw_reg surf_index)
-{
-   assert(p->devinfo->ver >= 7);
-   assert(surf_index.type == BRW_REGISTER_TYPE_UD &&
-          surf_index.file == BRW_IMMEDIATE_VALUE);
-
-   brw_SAMPLE(p,
-              dst,
-              inst->base_mrf,
-              src,
-              surf_index.ud,
-              0,
-              GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
-              1, /* response length */
-              inst->mlen,
-              inst->header_size > 0,
-              BRW_SAMPLER_SIMD_MODE_SIMD4X2,
-              BRW_SAMPLER_RETURN_FORMAT_SINT32);
-}
-
-static void
-generate_pull_constant_load_gfx7(struct brw_codegen *p,
-                                 vec4_instruction *inst,
-                                 struct brw_reg dst,
-                                 struct brw_reg surf_index,
-                                 struct brw_reg offset)
-{
-   const struct intel_device_info *devinfo = p->devinfo;
-   assert(surf_index.type == BRW_REGISTER_TYPE_UD);
-
-   if (surf_index.file == BRW_IMMEDIATE_VALUE) {
-
-      brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND);
-      brw_inst_set_sfid(devinfo, insn, BRW_SFID_SAMPLER);
-      brw_set_dest(p, insn, dst);
-      brw_set_src0(p, insn, offset);
-      brw_set_desc(p, insn,
-                   brw_message_desc(devinfo, inst->mlen, 1, inst->header_size) |
-                   brw_sampler_desc(devinfo, surf_index.ud,
-                                    0, /* LD message ignores sampler unit */
-                                    GFX5_SAMPLER_MESSAGE_SAMPLE_LD,
-                                    BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0));
-   } else {
-
-      struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
-
-      brw_push_insn_state(p);
-      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-      brw_set_default_access_mode(p, BRW_ALIGN_1);
-
-      /* a0.0 = surf_index & 0xff */
-      brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND);
-      brw_inst_set_exec_size(devinfo, insn_and, BRW_EXECUTE_1);
-      brw_set_dest(p, insn_and, addr);
-      brw_set_src0(p, insn_and, vec1(retype(surf_index, BRW_REGISTER_TYPE_UD)));
-      brw_set_src1(p, insn_and, brw_imm_ud(0x0ff));
-
-      brw_pop_insn_state(p);
-
-      /* dst = send(offset, a0.0 | <descriptor>) */
-      brw_send_indirect_message(
-         p, BRW_SFID_SAMPLER, dst, offset, addr,
-         brw_message_desc(devinfo, inst->mlen, 1, inst->header_size) |
-         brw_sampler_desc(devinfo,
-                          0 /* surface */,
-                          0 /* sampler */,
-                          GFX5_SAMPLER_MESSAGE_SAMPLE_LD,
-                          BRW_SAMPLER_SIMD_MODE_SIMD4X2,
-                          0),
-         false /* EOT */);
-   }
-}
-
-static void
-generate_mov_indirect(struct brw_codegen *p,
-                      vec4_instruction *,
-                      struct brw_reg dst, struct brw_reg reg,
-                      struct brw_reg indirect)
-{
-   assert(indirect.type == BRW_REGISTER_TYPE_UD);
-   assert(p->devinfo->ver >= 6);
-
-   unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr * (REG_SIZE / 2);
-
-   /* This instruction acts in align1 mode */
-   assert(dst.writemask == WRITEMASK_XYZW);
-
-   if (indirect.file == BRW_IMMEDIATE_VALUE) {
-      imm_byte_offset += indirect.ud;
-
-      reg.nr = imm_byte_offset / REG_SIZE;
-      reg.subnr = (imm_byte_offset / (REG_SIZE / 2)) % 2;
-      unsigned shift = (imm_byte_offset / 4) % 4;
-      reg.swizzle += BRW_SWIZZLE4(shift, shift, shift, shift);
-
-      brw_MOV(p, dst, reg);
-   } else {
-      brw_push_insn_state(p);
-      brw_set_default_access_mode(p, BRW_ALIGN_1);
-      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-
-      struct brw_reg addr = vec8(brw_address_reg(0));
-
-      /* We need to move the indirect value into the address register.  In
-       * order to make things make some sense, we want to respect at least the
-       * X component of the swizzle.  In order to do that, we need to convert
-       * the subnr (probably 0) to an align1 subnr and add in the swizzle.
-       */
-      assert(brw_is_single_value_swizzle(indirect.swizzle));
-      indirect.subnr = (indirect.subnr * 4 + BRW_GET_SWZ(indirect.swizzle, 0));
-
-      /* We then use a region of <8,4,0>:uw to pick off the first 2 bytes of
-       * the indirect and splat it out to all four channels of the given half
-       * of a0.
-       */
-      indirect.subnr *= 2;
-      indirect = stride(retype(indirect, BRW_REGISTER_TYPE_UW), 8, 4, 0);
-      brw_ADD(p, addr, indirect, brw_imm_uw(imm_byte_offset));
-
-      /* Now we need to incorporate the swizzle from the source register */
-      if (reg.swizzle != BRW_SWIZZLE_XXXX) {
-         uint32_t uv_swiz = BRW_GET_SWZ(reg.swizzle, 0) << 2 |
-                            BRW_GET_SWZ(reg.swizzle, 1) << 6 |
-                            BRW_GET_SWZ(reg.swizzle, 2) << 10 |
-                            BRW_GET_SWZ(reg.swizzle, 3) << 14;
-         uv_swiz |= uv_swiz << 16;
-
-         brw_ADD(p, addr, addr, brw_imm_uv(uv_swiz));
-      }
-
-      brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), reg.type));
-
-      brw_pop_insn_state(p);
-   }
-}
-
-static void
-generate_zero_oob_push_regs(struct brw_codegen *p,
-                            struct brw_stage_prog_data *prog_data,
-                            struct brw_reg scratch,
-                            struct brw_reg bit_mask_in)
-{
-   const uint64_t want_zero = prog_data->zero_push_reg;
-   assert(want_zero);
-
-   assert(bit_mask_in.file == BRW_GENERAL_REGISTER_FILE);
-   assert(BRW_GET_SWZ(bit_mask_in.swizzle, 1) ==
-          BRW_GET_SWZ(bit_mask_in.swizzle, 0) + 1);
-   bit_mask_in.subnr += BRW_GET_SWZ(bit_mask_in.swizzle, 0) * 4;
-   bit_mask_in.type = BRW_REGISTER_TYPE_W;
-
-   /* Scratch should be 3 registers in the GRF */
-   assert(scratch.file == BRW_GENERAL_REGISTER_FILE);
-   scratch = vec8(scratch);
-   struct brw_reg mask_w16 = retype(scratch, BRW_REGISTER_TYPE_W);
-   struct brw_reg mask_d16 = retype(byte_offset(scratch, REG_SIZE),
-                                    BRW_REGISTER_TYPE_D);
-
-   brw_push_insn_state(p);
-   brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-
-   for (unsigned i = 0; i < 64; i++) {
-      if (i % 16 == 0 && (want_zero & BITFIELD64_RANGE(i, 16))) {
-         brw_set_default_exec_size(p, BRW_EXECUTE_8);
-         brw_SHL(p, suboffset(mask_w16, 8),
-                    vec1(byte_offset(bit_mask_in, i / 8)),
-                    brw_imm_v(0x01234567));
-         brw_SHL(p, mask_w16, suboffset(mask_w16, 8), brw_imm_w(8));
-
-         brw_set_default_exec_size(p, BRW_EXECUTE_16);
-         brw_ASR(p, mask_d16, mask_w16, brw_imm_w(15));
-      }
-
-      if (want_zero & BITFIELD64_BIT(i)) {
-         unsigned push_start = prog_data->dispatch_grf_start_reg;
-         struct brw_reg push_reg =
-            retype(brw_vec8_grf(push_start + i, 0), BRW_REGISTER_TYPE_D);
-
-         brw_set_default_exec_size(p, BRW_EXECUTE_8);
-         brw_AND(p, push_reg, push_reg, vec1(suboffset(mask_d16, i)));
-      }
-   }
-
-   brw_pop_insn_state(p);
-}
-
-static void
-generate_code(struct brw_codegen *p,
-              const struct brw_compiler *compiler,
-              const struct brw_compile_params *params,
-              const nir_shader *nir,
-              struct brw_vue_prog_data *prog_data,
-              const struct cfg_t *cfg,
-              const performance &perf,
-              struct brw_compile_stats *stats,
-              bool debug_enabled)
-{
-   const struct intel_device_info *devinfo = p->devinfo;
-   const char *stage_abbrev = _mesa_shader_stage_to_abbrev(nir->info.stage);
-   struct disasm_info *disasm_info = disasm_initialize(p->isa, cfg);
-
-   /* `send_count` explicitly does not include spills or fills, as we'd
-    * like to use it as a metric for intentional memory access or other
-    * shared function use.  Otherwise, subtle changes to scheduling or
-    * register allocation could cause it to fluctuate wildly - and that
-    * effect is already counted in spill/fill counts.
-    */
-   int spill_count = 0, fill_count = 0;
-   int loop_count = 0, send_count = 0;
-
-   foreach_block_and_inst (block, vec4_instruction, inst, cfg) {
-      struct brw_reg src[3], dst;
-
-      if (unlikely(debug_enabled))
-         disasm_annotate(disasm_info, inst, p->next_insn_offset);
-
-      for (unsigned int i = 0; i < 3; i++) {
-         src[i] = inst->src[i].as_brw_reg();
-      }
-      dst = inst->dst.as_brw_reg();
-
-      brw_set_default_predicate_control(p, inst->predicate);
-      brw_set_default_predicate_inverse(p, inst->predicate_inverse);
-      brw_set_default_flag_reg(p, inst->flag_subreg / 2, inst->flag_subreg % 2);
-      brw_set_default_saturate(p, inst->saturate);
-      brw_set_default_mask_control(p, inst->force_writemask_all);
-      brw_set_default_acc_write_control(p, inst->writes_accumulator);
-
-      assert(inst->group % inst->exec_size == 0);
-      assert(inst->group % 4 == 0);
-
-      /* There are some instructions where the destination is 64-bit
-       * but we retype it to a smaller type. In that case, we cannot
-       * double the exec_size.
-       */
-      const bool is_df = (get_exec_type_size(inst) == 8 ||
-                          inst->dst.type == BRW_REGISTER_TYPE_DF) &&
-                         inst->opcode != VEC4_OPCODE_PICK_LOW_32BIT &&
-                         inst->opcode != VEC4_OPCODE_PICK_HIGH_32BIT &&
-                         inst->opcode != VEC4_OPCODE_SET_LOW_32BIT &&
-                         inst->opcode != VEC4_OPCODE_SET_HIGH_32BIT;
-
-      unsigned exec_size = inst->exec_size;
-      if (devinfo->verx10 == 70 && is_df)
-         exec_size *= 2;
-
-      brw_set_default_exec_size(p, cvt(exec_size) - 1);
-
-      if (!inst->force_writemask_all)
-         brw_set_default_group(p, inst->group);
-
-      assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->ver));
-      assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
-
-      unsigned pre_emit_nr_insn = p->nr_insn;
-
-      switch (inst->opcode) {
-      case VEC4_OPCODE_UNPACK_UNIFORM:
-      case BRW_OPCODE_MOV:
-      case VEC4_OPCODE_MOV_FOR_SCRATCH:
-         brw_MOV(p, dst, src[0]);
-         break;
-      case BRW_OPCODE_ADD:
-         brw_ADD(p, dst, src[0], src[1]);
-         break;
-      case BRW_OPCODE_MUL:
-         brw_MUL(p, dst, src[0], src[1]);
-         break;
-      case BRW_OPCODE_MACH:
-         brw_MACH(p, dst, src[0], src[1]);
-         break;
-
-      case BRW_OPCODE_MAD:
-         assert(devinfo->ver >= 6);
-         brw_MAD(p, dst, src[0], src[1], src[2]);
-         break;
-
-      case BRW_OPCODE_FRC:
-         brw_FRC(p, dst, src[0]);
-         break;
-      case BRW_OPCODE_RNDD:
-         brw_RNDD(p, dst, src[0]);
-         break;
-      case BRW_OPCODE_RNDE:
-         brw_RNDE(p, dst, src[0]);
-         break;
-      case BRW_OPCODE_RNDZ:
-         brw_RNDZ(p, dst, src[0]);
-         break;
-
-      case BRW_OPCODE_AND:
-         brw_AND(p, dst, src[0], src[1]);
-         break;
-      case BRW_OPCODE_OR:
-         brw_OR(p, dst, src[0], src[1]);
-         break;
-      case BRW_OPCODE_XOR:
-         brw_XOR(p, dst, src[0], src[1]);
-         break;
-      case BRW_OPCODE_NOT:
-         brw_NOT(p, dst, src[0]);
-         break;
-      case BRW_OPCODE_ASR:
-         brw_ASR(p, dst, src[0], src[1]);
-         break;
-      case BRW_OPCODE_SHR:
-         brw_SHR(p, dst, src[0], src[1]);
-         break;
-      case BRW_OPCODE_SHL:
-         brw_SHL(p, dst, src[0], src[1]);
-         break;
-
-      case BRW_OPCODE_CMP:
-         brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
-         break;
-      case BRW_OPCODE_CMPN:
-         brw_CMPN(p, dst, inst->conditional_mod, src[0], src[1]);
-         break;
-      case BRW_OPCODE_SEL:
-         brw_SEL(p, dst, src[0], src[1]);
-         break;
-
-      case BRW_OPCODE_DPH:
-         brw_DPH(p, dst, src[0], src[1]);
-         break;
-
-      case BRW_OPCODE_DP4:
-         brw_DP4(p, dst, src[0], src[1]);
-         break;
-
-      case BRW_OPCODE_DP3:
-         brw_DP3(p, dst, src[0], src[1]);
-         break;
-
-      case BRW_OPCODE_DP2:
-         brw_DP2(p, dst, src[0], src[1]);
-         break;
-
-      case BRW_OPCODE_F32TO16:
-         assert(devinfo->ver >= 7);
-         brw_F32TO16(p, dst, src[0]);
-         break;
-
-      case BRW_OPCODE_F16TO32:
-         assert(devinfo->ver >= 7);
-         brw_F16TO32(p, dst, src[0]);
-         break;
-
-      case BRW_OPCODE_LRP:
-         assert(devinfo->ver >= 6);
-         brw_LRP(p, dst, src[0], src[1], src[2]);
-         break;
-
-      case BRW_OPCODE_BFREV:
-         assert(devinfo->ver >= 7);
-         brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
-                   retype(src[0], BRW_REGISTER_TYPE_UD));
-         break;
-      case BRW_OPCODE_FBH:
-         assert(devinfo->ver >= 7);
-         brw_FBH(p, retype(dst, src[0].type), src[0]);
-         break;
-      case BRW_OPCODE_FBL:
-         assert(devinfo->ver >= 7);
-         brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD),
-                 retype(src[0], BRW_REGISTER_TYPE_UD));
-         break;
-      case BRW_OPCODE_LZD:
-         brw_LZD(p, dst, src[0]);
-         break;
-      case BRW_OPCODE_CBIT:
-         assert(devinfo->ver >= 7);
-         brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD),
-                  retype(src[0], BRW_REGISTER_TYPE_UD));
-         break;
-      case BRW_OPCODE_ADDC:
-         assert(devinfo->ver >= 7);
-         brw_ADDC(p, dst, src[0], src[1]);
-         break;
-      case BRW_OPCODE_SUBB:
-         assert(devinfo->ver >= 7);
-         brw_SUBB(p, dst, src[0], src[1]);
-         break;
-      case BRW_OPCODE_MAC:
-         brw_MAC(p, dst, src[0], src[1]);
-         break;
-
-      case BRW_OPCODE_BFE:
-         assert(devinfo->ver >= 7);
-         brw_BFE(p, dst, src[0], src[1], src[2]);
-         break;
-
-      case BRW_OPCODE_BFI1:
-         assert(devinfo->ver >= 7);
-         brw_BFI1(p, dst, src[0], src[1]);
-         break;
-      case BRW_OPCODE_BFI2:
-         assert(devinfo->ver >= 7);
-         brw_BFI2(p, dst, src[0], src[1], src[2]);
-         break;
-
-      case BRW_OPCODE_IF:
-         if (!inst->src[0].is_null()) {
-            /* The instruction has an embedded compare (only allowed on gfx6) */
-            assert(devinfo->ver == 6);
-            gfx6_IF(p, inst->conditional_mod, src[0], src[1]);
-         } else {
-            brw_inst *if_inst = brw_IF(p, BRW_EXECUTE_8);
-            brw_inst_set_pred_control(p->devinfo, if_inst, inst->predicate);
-         }
-         break;
-
-      case BRW_OPCODE_ELSE:
-         brw_ELSE(p);
-         break;
-      case BRW_OPCODE_ENDIF:
-         brw_ENDIF(p);
-         break;
-
-      case BRW_OPCODE_DO:
-         brw_DO(p, BRW_EXECUTE_8);
-         break;
-
-      case BRW_OPCODE_BREAK:
-         brw_BREAK(p);
-         brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
-         break;
-      case BRW_OPCODE_CONTINUE:
-         brw_CONT(p);
-         brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
-         break;
-
-      case BRW_OPCODE_WHILE:
-         brw_WHILE(p);
-         loop_count++;
-         break;
-
-      case SHADER_OPCODE_RCP:
-      case SHADER_OPCODE_RSQ:
-      case SHADER_OPCODE_SQRT:
-      case SHADER_OPCODE_EXP2:
-      case SHADER_OPCODE_LOG2:
-      case SHADER_OPCODE_SIN:
-      case SHADER_OPCODE_COS:
-         assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
-         if (devinfo->ver >= 7) {
-            gfx6_math(p, dst, brw_math_function(inst->opcode), src[0],
-                      brw_null_reg());
-         } else if (devinfo->ver == 6) {
-            generate_math_gfx6(p, inst, dst, src[0], brw_null_reg());
-         } else {
-            generate_math1_gfx4(p, inst, dst, src[0]);
-            send_count++;
-         }
-         break;
-
-      case SHADER_OPCODE_POW:
-      case SHADER_OPCODE_INT_QUOTIENT:
-      case SHADER_OPCODE_INT_REMAINDER:
-         assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
-         if (devinfo->ver >= 7) {
-            gfx6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
-         } else if (devinfo->ver == 6) {
-            generate_math_gfx6(p, inst, dst, src[0], src[1]);
-         } else {
-            generate_math2_gfx4(p, inst, dst, src[0], src[1]);
-            send_count++;
-         }
-         break;
-
-      case SHADER_OPCODE_TEX:
-      case SHADER_OPCODE_TXD:
-      case SHADER_OPCODE_TXF:
-      case SHADER_OPCODE_TXF_CMS:
-      case SHADER_OPCODE_TXF_CMS_W:
-      case SHADER_OPCODE_TXF_MCS:
-      case SHADER_OPCODE_TXL:
-      case SHADER_OPCODE_TXS:
-      case SHADER_OPCODE_TG4:
-      case SHADER_OPCODE_TG4_OFFSET:
-      case SHADER_OPCODE_SAMPLEINFO:
-         generate_tex(p, prog_data, nir->info.stage,
-                      inst, dst, src[0], src[1], src[2]);
-         send_count++;
-         break;
-
-      case SHADER_OPCODE_GET_BUFFER_SIZE:
-         generate_get_buffer_size(p, inst, dst, src[0], src[1]);
-         send_count++;
-         break;
-
-      case VEC4_VS_OPCODE_URB_WRITE:
-         generate_vs_urb_write(p, inst);
-         send_count++;
-         break;
-
-      case SHADER_OPCODE_GFX4_SCRATCH_READ:
-         generate_scratch_read(p, inst, dst, src[0]);
-         fill_count++;
-         break;
-
-      case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
-         generate_scratch_write(p, inst, dst, src[0], src[1]);
-         spill_count++;
-         break;
-
-      case VS_OPCODE_PULL_CONSTANT_LOAD:
-         generate_pull_constant_load(p, inst, dst, src[0], src[1]);
-         send_count++;
-         break;
-
-      case VS_OPCODE_PULL_CONSTANT_LOAD_GFX7:
-         generate_pull_constant_load_gfx7(p, inst, dst, src[0], src[1]);
-         send_count++;
-         break;
-
-      case VEC4_GS_OPCODE_URB_WRITE:
-         generate_gs_urb_write(p, inst);
-         send_count++;
-         break;
-
-      case VEC4_GS_OPCODE_URB_WRITE_ALLOCATE:
-         generate_gs_urb_write_allocate(p, inst);
-         send_count++;
-         break;
-
-      case GS_OPCODE_SVB_WRITE:
-         generate_gs_svb_write(p, inst, dst, src[0], src[1]);
-         send_count++;
-         break;
-
-      case GS_OPCODE_SVB_SET_DST_INDEX:
-         generate_gs_svb_set_destination_index(p, inst, dst, src[0]);
-         break;
-
-      case GS_OPCODE_THREAD_END:
-         generate_gs_thread_end(p, inst);
-         send_count++;
-         break;
-
-      case GS_OPCODE_SET_WRITE_OFFSET:
-         generate_gs_set_write_offset(p, dst, src[0], src[1]);
-         break;
-
-      case GS_OPCODE_SET_VERTEX_COUNT:
-         generate_gs_set_vertex_count(p, dst, src[0]);
-         break;
-
-      case GS_OPCODE_FF_SYNC:
-         generate_gs_ff_sync(p, inst, dst, src[0], src[1]);
-         send_count++;
-         break;
-
-      case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
-         generate_gs_ff_sync_set_primitives(p, dst, src[0], src[1], src[2]);
-         break;
-
-      case GS_OPCODE_SET_PRIMITIVE_ID:
-         generate_gs_set_primitive_id(p, dst);
-         break;
-
-      case GS_OPCODE_SET_DWORD_2:
-         generate_gs_set_dword_2(p, dst, src[0]);
-         break;
-
-      case GS_OPCODE_PREPARE_CHANNEL_MASKS:
-         generate_gs_prepare_channel_masks(p, dst);
-         break;
-
-      case GS_OPCODE_SET_CHANNEL_MASKS:
-         generate_gs_set_channel_masks(p, dst, src[0]);
-         break;
-
-      case GS_OPCODE_GET_INSTANCE_ID:
-         generate_gs_get_instance_id(p, dst);
-         break;
-
-      case VEC4_OPCODE_UNTYPED_ATOMIC:
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
-                            !inst->dst.is_null(), inst->header_size);
-         send_count++;
-         break;
-
-      case VEC4_OPCODE_UNTYPED_SURFACE_READ:
-         assert(!inst->header_size);
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen,
-                                  src[2].ud);
-         send_count++;
-         break;
-
-      case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_untyped_surface_write(p, src[0], src[1], inst->mlen,
-                                   src[2].ud, inst->header_size);
-         send_count++;
-         break;
-
-      case SHADER_OPCODE_MEMORY_FENCE:
-         brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND,
-                          brw_message_target(inst->sfid),
-                          inst->desc,
-                          /* commit_enable */ false,
-                          /* bti */ 0);
-         send_count++;
-         break;
-
-      case SHADER_OPCODE_FIND_LIVE_CHANNEL:
-         brw_find_live_channel(p, dst, false);
-         break;
-
-      case SHADER_OPCODE_BROADCAST:
-         assert(inst->force_writemask_all);
-         brw_broadcast(p, dst, src[0], src[1]);
-         break;
-
-      case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
-         generate_unpack_flags(p, dst);
-         break;
-
-      case VEC4_OPCODE_MOV_BYTES: {
-         /* Moves the low byte from each channel, using an Align1 access mode
-          * and a <4,1,0> source region.
-          */
-         assert(src[0].type == BRW_REGISTER_TYPE_UB ||
-                src[0].type == BRW_REGISTER_TYPE_B);
-
-         brw_set_default_access_mode(p, BRW_ALIGN_1);
-         src[0].vstride = BRW_VERTICAL_STRIDE_4;
-         src[0].width = BRW_WIDTH_1;
-         src[0].hstride = BRW_HORIZONTAL_STRIDE_0;
-         brw_MOV(p, dst, src[0]);
-         brw_set_default_access_mode(p, BRW_ALIGN_16);
-         break;
-      }
-
-      case VEC4_OPCODE_DOUBLE_TO_F32:
-      case VEC4_OPCODE_DOUBLE_TO_D32:
-      case VEC4_OPCODE_DOUBLE_TO_U32: {
-         assert(type_sz(src[0].type) == 8);
-         assert(type_sz(dst.type) == 8);
-
-         brw_reg_type dst_type;
-
-         switch (inst->opcode) {
-         case VEC4_OPCODE_DOUBLE_TO_F32:
-            dst_type = BRW_REGISTER_TYPE_F;
-            break;
-         case VEC4_OPCODE_DOUBLE_TO_D32:
-            dst_type = BRW_REGISTER_TYPE_D;
-            break;
-         case VEC4_OPCODE_DOUBLE_TO_U32:
-            dst_type = BRW_REGISTER_TYPE_UD;
-            break;
-         default:
-            unreachable("Not supported conversion");
-         }
-         dst = retype(dst, dst_type);
-
-         brw_set_default_access_mode(p, BRW_ALIGN_1);
-
-         /* When converting from DF->F, we set destination's stride as 2 as an
-          * alignment requirement. But in IVB/BYT, each DF implicitly writes
-          * two floats, being the first one the converted value. So we don't
-          * need to explicitly set stride 2, but 1.
-          */
-         struct brw_reg spread_dst;
-         if (devinfo->verx10 == 70)
-            spread_dst = stride(dst, 8, 4, 1);
-         else
-            spread_dst = stride(dst, 8, 4, 2);
-
-         brw_MOV(p, spread_dst, src[0]);
-
-         brw_set_default_access_mode(p, BRW_ALIGN_16);
-         break;
-      }
-
-      case VEC4_OPCODE_TO_DOUBLE: {
-         assert(type_sz(src[0].type) == 4);
-         assert(type_sz(dst.type) == 8);
-
-         brw_set_default_access_mode(p, BRW_ALIGN_1);
-
-         brw_MOV(p, dst, src[0]);
-
-         brw_set_default_access_mode(p, BRW_ALIGN_16);
-         break;
-      }
-
-      case VEC4_OPCODE_PICK_LOW_32BIT:
-      case VEC4_OPCODE_PICK_HIGH_32BIT: {
-         /* Stores the low/high 32-bit of each 64-bit element in src[0] into
-          * dst using ALIGN1 mode and a <8,4,2>:UD region on the source.
-          */
-         assert(type_sz(src[0].type) == 8);
-         assert(type_sz(dst.type) == 4);
-
-         brw_set_default_access_mode(p, BRW_ALIGN_1);
-
-         dst = retype(dst, BRW_REGISTER_TYPE_UD);
-         dst.hstride = BRW_HORIZONTAL_STRIDE_1;
-
-         src[0] = retype(src[0], BRW_REGISTER_TYPE_UD);
-         if (inst->opcode == VEC4_OPCODE_PICK_HIGH_32BIT)
-            src[0] = suboffset(src[0], 1);
-         src[0] = spread(src[0], 2);
-         brw_MOV(p, dst, src[0]);
-
-         brw_set_default_access_mode(p, BRW_ALIGN_16);
-         break;
-      }
-
-      case VEC4_OPCODE_SET_LOW_32BIT:
-      case VEC4_OPCODE_SET_HIGH_32BIT: {
-         /* Reads consecutive 32-bit elements from src[0] and writes
-          * them to the low/high 32-bit of each 64-bit element in dst.
-          */
-         assert(type_sz(src[0].type) == 4);
-         assert(type_sz(dst.type) == 8);
-
-         brw_set_default_access_mode(p, BRW_ALIGN_1);
-
-         dst = retype(dst, BRW_REGISTER_TYPE_UD);
-         if (inst->opcode == VEC4_OPCODE_SET_HIGH_32BIT)
-            dst = suboffset(dst, 1);
-         dst.hstride = BRW_HORIZONTAL_STRIDE_2;
-
-         src[0] = retype(src[0], BRW_REGISTER_TYPE_UD);
-         brw_MOV(p, dst, src[0]);
-
-         brw_set_default_access_mode(p, BRW_ALIGN_16);
-         break;
-      }
-
-      case VEC4_OPCODE_PACK_BYTES: {
-         /* Is effectively:
-          *
-          *   mov(8) dst<16,4,1>:UB src<4,1,0>:UB
-          *
-          * but destinations' only regioning is horizontal stride, so instead we
-          * have to use two instructions:
-          *
-          *   mov(4) dst<1>:UB     src<4,1,0>:UB
-          *   mov(4) dst.16<1>:UB  src.16<4,1,0>:UB
-          *
-          * where they pack the four bytes from the low and high four DW.
-          */
-         assert(util_is_power_of_two_nonzero(dst.writemask));
-         unsigned offset = __builtin_ctz(dst.writemask);
-
-         dst.type = BRW_REGISTER_TYPE_UB;
-
-         brw_set_default_access_mode(p, BRW_ALIGN_1);
-
-         src[0].type = BRW_REGISTER_TYPE_UB;
-         src[0].vstride = BRW_VERTICAL_STRIDE_4;
-         src[0].width = BRW_WIDTH_1;
-         src[0].hstride = BRW_HORIZONTAL_STRIDE_0;
-         dst.subnr = offset * 4;
-         struct brw_inst *insn = brw_MOV(p, dst, src[0]);
-         brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_4);
-         brw_inst_set_no_dd_clear(p->devinfo, insn, true);
-         brw_inst_set_no_dd_check(p->devinfo, insn, inst->no_dd_check);
-
-         src[0].subnr = 16;
-         dst.subnr = 16 + offset * 4;
-         insn = brw_MOV(p, dst, src[0]);
-         brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_4);
-         brw_inst_set_no_dd_clear(p->devinfo, insn, inst->no_dd_clear);
-         brw_inst_set_no_dd_check(p->devinfo, insn, true);
-
-         brw_set_default_access_mode(p, BRW_ALIGN_16);
-         break;
-      }
-
-      case VEC4_OPCODE_ZERO_OOB_PUSH_REGS:
-         generate_zero_oob_push_regs(p, &prog_data->base, dst, src[0]);
-         break;
-
-      case VEC4_TCS_OPCODE_URB_WRITE:
-         generate_tcs_urb_write(p, inst, src[0]);
-         send_count++;
-         break;
-
-      case VEC4_OPCODE_URB_READ:
-         generate_vec4_urb_read(p, inst, dst, src[0]);
-         send_count++;
-         break;
-
-      case VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS:
-         generate_tcs_input_urb_offsets(p, dst, src[0], src[1]);
-         break;
-
-      case VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
-         generate_tcs_output_urb_offsets(p, dst, src[0], src[1]);
-         break;
-
-      case TCS_OPCODE_GET_INSTANCE_ID:
-         generate_tcs_get_instance_id(p, dst);
-         break;
-
-      case TCS_OPCODE_GET_PRIMITIVE_ID:
-         generate_tcs_get_primitive_id(p, dst);
-         break;
-
-      case TCS_OPCODE_CREATE_BARRIER_HEADER:
-         generate_tcs_create_barrier_header(p, prog_data, dst);
-         break;
-
-      case TES_OPCODE_CREATE_INPUT_READ_HEADER:
-         generate_tes_create_input_read_header(p, dst);
-         break;
-
-      case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
-         generate_tes_add_indirect_urb_offset(p, dst, src[0], src[1]);
-         break;
-
-      case TES_OPCODE_GET_PRIMITIVE_ID:
-         generate_tes_get_primitive_id(p, dst);
-         break;
-
-      case TCS_OPCODE_SRC0_010_IS_ZERO:
-         /* If src_reg had stride like fs_reg, we wouldn't need this. */
-         brw_MOV(p, brw_null_reg(), stride(src[0], 0, 1, 0));
-         break;
-
-      case TCS_OPCODE_RELEASE_INPUT:
-         generate_tcs_release_input(p, dst, src[0], src[1]);
-         send_count++;
-         break;
-
-      case TCS_OPCODE_THREAD_END:
-         generate_tcs_thread_end(p, inst);
-         send_count++;
-         break;
-
-      case SHADER_OPCODE_BARRIER:
-         brw_barrier(p, src[0]);
-         brw_WAIT(p);
-         send_count++;
-         break;
-
-      case SHADER_OPCODE_MOV_INDIRECT:
-         generate_mov_indirect(p, inst, dst, src[0], src[1]);
-         break;
-
-      case BRW_OPCODE_DIM:
-         assert(devinfo->verx10 == 75);
-         assert(src[0].type == BRW_REGISTER_TYPE_DF);
-         assert(dst.type == BRW_REGISTER_TYPE_DF);
-         brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F));
-         break;
-
-      case SHADER_OPCODE_RND_MODE: {
-         assert(src[0].file == BRW_IMMEDIATE_VALUE);
-         /*
-          * Changes the floating point rounding mode updating the control
-          * register field defined at cr0.0[5-6] bits.
-          */
-         enum brw_rnd_mode mode =
-            (enum brw_rnd_mode) (src[0].d << BRW_CR0_RND_MODE_SHIFT);
-         brw_float_controls_mode(p, mode, BRW_CR0_RND_MODE_MASK);
-      }
-         break;
-
-      default:
-         unreachable("Unsupported opcode");
-      }
-
-      if (inst->opcode == VEC4_OPCODE_PACK_BYTES) {
-         /* Handled dependency hints in the generator. */
-
-         assert(!inst->conditional_mod);
-      } else if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) {
-         assert(p->nr_insn == pre_emit_nr_insn + 1 ||
-                !"conditional_mod, no_dd_check, or no_dd_clear set for IR "
-                 "emitting more than 1 instruction");
-
-         brw_inst *last = &p->store[pre_emit_nr_insn];
-
-         if (inst->conditional_mod)
-            brw_inst_set_cond_modifier(p->devinfo, last, inst->conditional_mod);
-         brw_inst_set_no_dd_clear(p->devinfo, last, inst->no_dd_clear);
-         brw_inst_set_no_dd_check(p->devinfo, last, inst->no_dd_check);
-      }
-   }
-
-   brw_set_uip_jip(p, 0);
-
-   /* end of program sentinel */
-   disasm_new_inst_group(disasm_info, p->next_insn_offset);
-
-#ifndef NDEBUG
-   bool validated =
-#else
-   if (unlikely(debug_enabled))
-#endif
-      brw_validate_instructions(&compiler->isa, p->store,
-                                0, p->next_insn_offset,
-                                disasm_info);
-
-   int before_size = p->next_insn_offset;
-   brw_compact_instructions(p, 0, disasm_info);
-   int after_size = p->next_insn_offset;
-
-   bool dump_shader_bin = brw_should_dump_shader_bin();
-   unsigned char sha1[21];
-   char sha1buf[41];
-
-   if (unlikely(debug_enabled || dump_shader_bin)) {
-      _mesa_sha1_compute(p->store, p->next_insn_offset, sha1);
-      _mesa_sha1_format(sha1buf, sha1);
-   }
-
-   if (unlikely(dump_shader_bin))
-      brw_dump_shader_bin(p->store, 0, p->next_insn_offset, sha1buf);
-
-   if (unlikely(debug_enabled)) {
-      fprintf(stderr, "Native code for %s %s shader %s (src_hash 0x%08x) (sha1 %s):\n",
-            nir->info.label ? nir->info.label : "unnamed",
-            _mesa_shader_stage_to_string(nir->info.stage), nir->info.name,
-            params->source_hash, sha1buf);
-
-      fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles. %d:%d "
-                     "spills:fills, %u sends. Compacted %d to %d bytes (%.0f%%)\n",
-            stage_abbrev, before_size / 16, loop_count, perf.latency,
-            spill_count, fill_count, send_count, before_size, after_size,
-            100.0f * (before_size - after_size) / before_size);
-
-      /* overriding the shader makes disasm_info invalid */
-      if (!brw_try_override_assembly(p, 0, sha1buf)) {
-         dump_assembly(p->store, 0, p->next_insn_offset,
-                       disasm_info, perf.block_latency);
-      } else {
-         fprintf(stderr, "Successfully overrode shader with sha1 %s\n\n", sha1buf);
-      }
-   }
-   ralloc_free(disasm_info);
-   assert(validated);
-
-   brw_shader_debug_log(compiler, params->log_data,
-                        "%s vec4 shader: %d inst, %d loops, %u cycles, "
-                        "%d:%d spills:fills, %u sends, "
-                        "compacted %d to %d bytes.\n",
-                        stage_abbrev, before_size / 16,
-                        loop_count, perf.latency, spill_count,
-                        fill_count, send_count, before_size, after_size);
-   if (stats) {
-      stats->dispatch_width = 0;
-      stats->max_dispatch_width = 0;
-      stats->instructions = before_size / 16;
-      stats->sends = send_count;
-      stats->loops = loop_count;
-      stats->cycles = perf.latency;
-      stats->spills = spill_count;
-      stats->fills = fill_count;
-   }
-}
-
-extern "C" const unsigned *
-brw_vec4_generate_assembly(const struct brw_compiler *compiler,
-                           const struct brw_compile_params *params,
-                           const nir_shader *nir,
-                           struct brw_vue_prog_data *prog_data,
-                           const struct cfg_t *cfg,
-                           const performance &perf,
-                           bool debug_enabled)
-{
-   struct brw_codegen *p = rzalloc(params->mem_ctx, struct brw_codegen);
-   brw_init_codegen(&compiler->isa, p, params->mem_ctx);
-   brw_set_default_access_mode(p, BRW_ALIGN_16);
-
-   generate_code(p, compiler, params,
-                 nir, prog_data, cfg, perf,
-                 params->stats, debug_enabled);
-
-   assert(prog_data->base.const_data_size == 0);
-   if (nir->constant_data_size > 0) {
-      prog_data->base.const_data_size = nir->constant_data_size;
-      prog_data->base.const_data_offset =
-         brw_append_data(p, nir->constant_data, nir->constant_data_size, 32);
-   }
-
-   return brw_get_program(p, &prog_data->base.program_size);
-}
diff --git a/src/intel/compiler/brw_vec4_gs_nir.cpp b/src/intel/compiler/brw_vec4_gs_nir.cpp
deleted file mode 100644
index 60b42da87b9..00000000000
--- a/src/intel/compiler/brw_vec4_gs_nir.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_vec4_gs_visitor.h"
-
-namespace brw {
-
-void
-vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
-{
-   dst_reg dest;
-   src_reg src;
-
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_per_vertex_input: {
-      assert(instr->def.bit_size == 32);
-      /* The EmitNoIndirectInput flag guarantees our vertex index will
-       * be constant.  We should handle indirects someday.
-       */
-      const unsigned vertex = nir_src_as_uint(instr->src[0]);
-      const unsigned offset_reg = nir_src_as_uint(instr->src[1]);
-
-      const unsigned input_array_stride = prog_data->urb_read_length * 2;
-
-      /* Make up a type...we have no way of knowing... */
-      const glsl_type *const type = glsl_ivec_type(instr->num_components);
-
-      src = src_reg(ATTR, input_array_stride * vertex +
-                    nir_intrinsic_base(instr) + offset_reg,
-                    type);
-      src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));
-
-      dest = get_nir_def(instr->def, src.type);
-      dest.writemask = brw_writemask_for_size(instr->num_components);
-      emit(MOV(dest, src));
-      break;
-   }
-
-   case nir_intrinsic_load_input:
-      unreachable("nir_lower_io should have produced per_vertex intrinsics");
-
-   case nir_intrinsic_emit_vertex_with_counter:
-      this->vertex_count =
-         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
-      gs_emit_vertex(nir_intrinsic_stream_id(instr));
-      break;
-
-   case nir_intrinsic_end_primitive_with_counter:
-      this->vertex_count =
-         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
-      gs_end_primitive();
-      break;
-
-   case nir_intrinsic_set_vertex_and_primitive_count:
-      this->vertex_count =
-         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
-      break;
-
-   case nir_intrinsic_load_primitive_id:
-      assert(gs_prog_data->include_primitive_id);
-      dest = get_nir_def(instr->def, BRW_REGISTER_TYPE_D);
-      emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D)));
-      break;
-
-   case nir_intrinsic_load_invocation_id: {
-      dest = get_nir_def(instr->def, BRW_REGISTER_TYPE_D);
-      if (gs_prog_data->invocations > 1)
-         emit(GS_OPCODE_GET_INSTANCE_ID, dest);
-      else
-         emit(MOV(dest, brw_imm_ud(0)));
-      break;
-   }
-
-   default:
-      vec4_visitor::nir_emit_intrinsic(instr);
-   }
-}
-}
diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp
deleted file mode 100644
index d611e50d544..00000000000
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ /dev/null
@@ -1,560 +0,0 @@
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_vec4_gs_visitor.cpp
- *
- * Geometry-shader-specific code derived from the vec4_visitor class.
- */
-
-#include "brw_vec4_gs_visitor.h"
-#include "brw_cfg.h"
-#include "brw_fs.h"
-
-namespace brw {
-
-vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
-                                 const struct brw_compile_params *params,
-                                 struct brw_gs_compile *c,
-                                 struct brw_gs_prog_data *prog_data,
-                                 const nir_shader *shader,
-                                 bool no_spills,
-                                 bool debug_enabled)
-   : vec4_visitor(compiler, params, &c->key.base.tex,
-                  &prog_data->base, shader,
-                  no_spills, debug_enabled),
-     c(c),
-     gs_prog_data(prog_data)
-{
-}
-
-
-static inline struct brw_reg
-attribute_to_hw_reg(int attr, brw_reg_type type, bool interleaved)
-{
-   struct brw_reg reg;
-
-   unsigned width = REG_SIZE / 2 / MAX2(4, type_sz(type));
-   if (interleaved) {
-      reg = stride(brw_vecn_grf(width, attr / 2, (attr % 2) * 4), 0, width, 1);
-   } else {
-      reg = brw_vecn_grf(width, attr, 0);
-   }
-
-   reg.type = type;
-   return reg;
-}
-
-/**
- * Replace each register of type ATTR in this->instructions with a reference
- * to a fixed HW register.
- *
- * If interleaved is true, then each attribute takes up half a register, with
- * register N containing attribute 2*N in its first half and attribute 2*N+1
- * in its second half (this corresponds to the payload setup used by geometry
- * shaders in "single" or "dual instanced" dispatch mode).  If interleaved is
- * false, then each attribute takes up a whole register, with register N
- * containing attribute N (this corresponds to the payload setup used by
- * vertex shaders, and by geometry shaders in "dual object" dispatch mode).
- */
-int
-vec4_gs_visitor::setup_varying_inputs(int payload_reg,
-                                      int attributes_per_reg)
-{
-   /* For geometry shaders there are N copies of the input attributes, where N
-    * is the number of input vertices.  attribute_map[BRW_VARYING_SLOT_COUNT *
-    * i + j] represents attribute j for vertex i.
-    *
-    * Note that GS inputs are read from the VUE 256 bits (2 vec4's) at a time,
-    * so the total number of input slots that will be delivered to the GS (and
-    * thus the stride of the input arrays) is urb_read_length * 2.
-    */
-   const unsigned num_input_vertices = nir->info.gs.vertices_in;
-   assert(num_input_vertices <= MAX_GS_INPUT_VERTICES);
-   unsigned input_array_stride = prog_data->urb_read_length * 2;
-
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file != ATTR)
-            continue;
-
-         assert(inst->src[i].offset % REG_SIZE == 0);
-         int grf = payload_reg * attributes_per_reg +
-                   inst->src[i].nr + inst->src[i].offset / REG_SIZE;
-
-         struct brw_reg reg =
-            attribute_to_hw_reg(grf, inst->src[i].type, attributes_per_reg > 1);
-         reg.swizzle = inst->src[i].swizzle;
-         if (inst->src[i].abs)
-            reg = brw_abs(reg);
-         if (inst->src[i].negate)
-            reg = negate(reg);
-
-         inst->src[i] = reg;
-      }
-   }
-
-   int regs_used = ALIGN(input_array_stride * num_input_vertices,
-                         attributes_per_reg) / attributes_per_reg;
-   return payload_reg + regs_used;
-}
-
-void
-vec4_gs_visitor::setup_payload()
-{
-   /* If we are in dual instanced or single mode, then attributes are going
-    * to be interleaved, so one register contains two attribute slots.
-    */
-   int attributes_per_reg =
-      prog_data->dispatch_mode == INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
-
-   int reg = 0;
-
-   /* The payload always contains important data in r0, which contains
-    * the URB handles that are passed on to the URB write at the end
-    * of the thread.
-    */
-   reg++;
-
-   /* If the shader uses gl_PrimitiveIDIn, that goes in r1. */
-   if (gs_prog_data->include_primitive_id)
-      reg++;
-
-   reg = setup_uniforms(reg);
-
-   reg = setup_varying_inputs(reg, attributes_per_reg);
-
-   this->first_non_payload_grf = reg;
-}
-
-
-void
-vec4_gs_visitor::emit_prolog()
-{
-   /* In vertex shaders, r0.2 is guaranteed to be initialized to zero.  In
-    * geometry shaders, it isn't (it contains a bunch of information we don't
-    * need, like the input primitive type).  We need r0.2 to be zero in order
-    * to build scratch read/write messages correctly (otherwise this value
-    * will be interpreted as a global offset, causing us to do our scratch
-    * reads/writes to garbage memory).  So just set it to zero at the top of
-    * the shader.
-    */
-   this->current_annotation = "clear r0.2";
-   dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD));
-   vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, brw_imm_ud(0u));
-   inst->force_writemask_all = true;
-
-   /* Create a virtual register to hold the vertex count */
-   this->vertex_count = src_reg(this, glsl_uint_type());
-
-   /* Initialize the vertex_count register to 0 */
-   this->current_annotation = "initialize vertex_count";
-   inst = emit(MOV(dst_reg(this->vertex_count), brw_imm_ud(0u)));
-   inst->force_writemask_all = true;
-
-   if (c->control_data_header_size_bits > 0) {
-      /* Create a virtual register to hold the current set of control data
-       * bits.
-       */
-      this->control_data_bits = src_reg(this, glsl_uint_type());
-
-      /* If we're outputting more than 32 control data bits, then EmitVertex()
-       * will set control_data_bits to 0 after emitting the first vertex.
-       * Otherwise, we need to initialize it to 0 here.
-       */
-      if (c->control_data_header_size_bits <= 32) {
-         this->current_annotation = "initialize control data bits";
-         inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u)));
-         inst->force_writemask_all = true;
-      }
-   }
-
-   this->current_annotation = NULL;
-}
-
-void
-vec4_gs_visitor::emit_thread_end()
-{
-   if (c->control_data_header_size_bits > 0) {
-      /* During shader execution, we only ever call emit_control_data_bits()
-       * just prior to outputting a vertex.  Therefore, the control data bits
-       * corresponding to the most recently output vertex still need to be
-       * emitted.
-       */
-      current_annotation = "thread end: emit control data bits";
-      emit_control_data_bits();
-   }
-
-   /* MRF 0 is reserved for the debugger, so start with message header
-    * in MRF 1.
-    */
-   int base_mrf = 1;
-
-   current_annotation = "thread end";
-   dst_reg mrf_reg(MRF, base_mrf);
-   src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
-   vec4_instruction *inst = emit(MOV(mrf_reg, r0));
-   inst->force_writemask_all = true;
-   emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
-   inst = emit(GS_OPCODE_THREAD_END);
-   inst->base_mrf = base_mrf;
-   inst->mlen = 1;
-}
-
-
-void
-vec4_gs_visitor::emit_urb_write_header(int mrf)
-{
-   /* The SEND instruction that writes the vertex data to the VUE will use
-    * per_slot_offset=true, which means that DWORDs 3 and 4 of the message
-    * header specify an offset (in multiples of 256 bits) into the URB entry
-    * at which the write should take place.
-    *
-    * So we have to prepare a message header with the appropriate offset
-    * values.
-    */
-   dst_reg mrf_reg(MRF, mrf);
-   src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
-   this->current_annotation = "URB write header";
-   vec4_instruction *inst = emit(MOV(mrf_reg, r0));
-   inst->force_writemask_all = true;
-   emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count,
-        brw_imm_ud(gs_prog_data->output_vertex_size_hwords));
-}
-
-
-vec4_instruction *
-vec4_gs_visitor::emit_urb_write_opcode(bool complete)
-{
-   /* We don't care whether the vertex is complete, because in general
-    * geometry shaders output multiple vertices, and we don't terminate the
-    * thread until all vertices are complete.
-    */
-   (void) complete;
-
-   vec4_instruction *inst = emit(VEC4_GS_OPCODE_URB_WRITE);
-   inst->offset = gs_prog_data->control_data_header_size_hwords;
-
-   inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
-   return inst;
-}
-
-
-/**
- * Write out a batch of 32 control data bits from the control_data_bits
- * register to the URB.
- *
- * The current value of the vertex_count register determines which DWORD in
- * the URB receives the control data bits.  The control_data_bits register is
- * assumed to contain the correct data for the vertex that was most recently
- * output, and all previous vertices that share the same DWORD.
- *
- * This function takes care of ensuring that if no vertices have been output
- * yet, no control bits are emitted.
- */
-void
-vec4_gs_visitor::emit_control_data_bits()
-{
-   assert(c->control_data_bits_per_vertex != 0);
-
-   /* Since the URB_WRITE_OWORD message operates with 128-bit (vec4 sized)
-    * granularity, we need to use two tricks to ensure that the batch of 32
-    * control data bits is written to the appropriate DWORD in the URB.  To
-    * select which vec4 we are writing to, we use the "slot {0,1} offset"
-    * fields of the message header.  To select which DWORD in the vec4 we are
-    * writing to, we use the channel mask fields of the message header.  To
-    * avoid penalizing geometry shaders that emit a small number of vertices
-    * with extra bookkeeping, we only do each of these tricks when
-    * c->prog_data.control_data_header_size_bits is large enough to make it
-    * necessary.
-    *
-    * Note: this means that if we're outputting just a single DWORD of control
-    * data bits, we'll actually replicate it four times since we won't do any
-    * channel masking.  But that's not a problem since in this case the
-    * hardware only pays attention to the first DWORD.
-    */
-   enum brw_urb_write_flags urb_write_flags = BRW_URB_WRITE_OWORD;
-   if (c->control_data_header_size_bits > 32)
-      urb_write_flags = urb_write_flags | BRW_URB_WRITE_USE_CHANNEL_MASKS;
-   if (c->control_data_header_size_bits > 128)
-      urb_write_flags = urb_write_flags | BRW_URB_WRITE_PER_SLOT_OFFSET;
-
-   /* If we are using either channel masks or a per-slot offset, then we
-    * need to figure out which DWORD we are trying to write to, using the
-    * formula:
-    *
-    *     dword_index = (vertex_count - 1) * bits_per_vertex / 32
-    *
-    * Since bits_per_vertex is a power of two, and is known at compile
-    * time, this can be optimized to:
-    *
-    *     dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex))
-    */
-   src_reg dword_index(this, glsl_uint_type());
-   if (urb_write_flags) {
-      src_reg prev_count(this, glsl_uint_type());
-      emit(ADD(dst_reg(prev_count), this->vertex_count,
-               brw_imm_ud(0xffffffffu)));
-      unsigned log2_bits_per_vertex =
-         util_last_bit(c->control_data_bits_per_vertex);
-      emit(SHR(dst_reg(dword_index), prev_count,
-               brw_imm_ud(6 - log2_bits_per_vertex)));
-   }
-
-   /* Start building the URB write message.  The first MRF gets a copy of
-    * R0.
-    */
-   int base_mrf = 1;
-   dst_reg mrf_reg(MRF, base_mrf);
-   src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
-   vec4_instruction *inst = emit(MOV(mrf_reg, r0));
-   inst->force_writemask_all = true;
-
-   if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) {
-      /* Set the per-slot offset to dword_index / 4, to that we'll write to
-       * the appropriate OWORD within the control data header.
-       */
-      src_reg per_slot_offset(this, glsl_uint_type());
-      emit(SHR(dst_reg(per_slot_offset), dword_index, brw_imm_ud(2u)));
-      emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset,
-           brw_imm_ud(1u));
-   }
-
-   if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) {
-      /* Set the channel masks to 1 << (dword_index % 4), so that we'll
-       * write to the appropriate DWORD within the OWORD.  We need to do
-       * this computation with force_writemask_all, otherwise garbage data
-       * from invocation 0 might clobber the mask for invocation 1 when
-       * GS_OPCODE_PREPARE_CHANNEL_MASKS tries to OR the two masks
-       * together.
-       */
-      src_reg channel(this, glsl_uint_type());
-      inst = emit(AND(dst_reg(channel), dword_index, brw_imm_ud(3u)));
-      inst->force_writemask_all = true;
-      src_reg one(this, glsl_uint_type());
-      inst = emit(MOV(dst_reg(one), brw_imm_ud(1u)));
-      inst->force_writemask_all = true;
-      src_reg channel_mask(this, glsl_uint_type());
-      inst = emit(SHL(dst_reg(channel_mask), one, channel));
-      inst->force_writemask_all = true;
-      emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask),
-                                            channel_mask);
-      emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask);
-   }
-
-   /* Store the control data bits in the message payload and send it. */
-   dst_reg mrf_reg2(MRF, base_mrf + 1);
-   inst = emit(MOV(mrf_reg2, this->control_data_bits));
-   inst->force_writemask_all = true;
-   inst = emit(VEC4_GS_OPCODE_URB_WRITE);
-   inst->urb_write_flags = urb_write_flags;
-   inst->base_mrf = base_mrf;
-   inst->mlen = 2;
-}
-
-void
-vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id)
-{
-   /* control_data_bits |= stream_id << ((2 * (vertex_count - 1)) % 32) */
-
-   /* Note: we are calling this *before* increasing vertex_count, so
-    * this->vertex_count == vertex_count - 1 in the formula above.
-    */
-
-   /* Stream mode uses 2 bits per vertex */
-   assert(c->control_data_bits_per_vertex == 2);
-
-   /* Must be a valid stream */
-   assert(stream_id < 4); /* MAX_VERTEX_STREAMS */
-
-   /* Control data bits are initialized to 0 so we don't have to set any
-    * bits when sending vertices to stream 0.
-    */
-   if (stream_id == 0)
-      return;
-
-   /* reg::sid = stream_id */
-   src_reg sid(this, glsl_uint_type());
-   emit(MOV(dst_reg(sid), brw_imm_ud(stream_id)));
-
-   /* reg:shift_count = 2 * (vertex_count - 1) */
-   src_reg shift_count(this, glsl_uint_type());
-   emit(SHL(dst_reg(shift_count), this->vertex_count, brw_imm_ud(1u)));
-
-   /* Note: we're relying on the fact that the GEN SHL instruction only pays
-    * attention to the lower 5 bits of its second source argument, so on this
-    * architecture, stream_id << 2 * (vertex_count - 1) is equivalent to
-    * stream_id << ((2 * (vertex_count - 1)) % 32).
-    */
-   src_reg mask(this, glsl_uint_type());
-   emit(SHL(dst_reg(mask), sid, shift_count));
-   emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
-}
-
-void
-vec4_gs_visitor::gs_emit_vertex(int stream_id)
-{
-   this->current_annotation = "emit vertex: safety check";
-
-   /* Haswell and later hardware ignores the "Render Stream Select" bits
-    * from the 3DSTATE_STREAMOUT packet when the SOL stage is disabled,
-    * and instead sends all primitives down the pipeline for rasterization.
-    * If the SOL stage is enabled, "Render Stream Select" is honored and
-    * primitives bound to non-zero streams are discarded after stream output.
-    *
-    * Since the only purpose of primives sent to non-zero streams is to
-    * be recorded by transform feedback, we can simply discard all geometry
-    * bound to these streams when transform feedback is disabled.
-    */
-   if (stream_id > 0 && !nir->info.has_transform_feedback_varyings)
-      return;
-
-   /* If we're outputting 32 control data bits or less, then we can wait
-    * until the shader is over to output them all.  Otherwise we need to
-    * output them as we go.  Now is the time to do it, since we're about to
-    * output the vertex_count'th vertex, so it's guaranteed that the
-    * control data bits associated with the (vertex_count - 1)th vertex are
-    * correct.
-    */
-   if (c->control_data_header_size_bits > 32) {
-      this->current_annotation = "emit vertex: emit control data bits";
-      /* Only emit control data bits if we've finished accumulating a batch
-       * of 32 bits.  This is the case when:
-       *
-       *     (vertex_count * bits_per_vertex) % 32 == 0
-       *
-       * (in other words, when the last 5 bits of vertex_count *
-       * bits_per_vertex are 0).  Assuming bits_per_vertex == 2^n for some
-       * integer n (which is always the case, since bits_per_vertex is
-       * always 1 or 2), this is equivalent to requiring that the last 5-n
-       * bits of vertex_count are 0:
-       *
-       *     vertex_count & (2^(5-n) - 1) == 0
-       *
-       * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is
-       * equivalent to:
-       *
-       *     vertex_count & (32 / bits_per_vertex - 1) == 0
-       */
-      vec4_instruction *inst =
-         emit(AND(dst_null_ud(), this->vertex_count,
-                  brw_imm_ud(32 / c->control_data_bits_per_vertex - 1)));
-      inst->conditional_mod = BRW_CONDITIONAL_Z;
-
-      emit(IF(BRW_PREDICATE_NORMAL));
-      {
-         /* If vertex_count is 0, then no control data bits have been
-          * accumulated yet, so we skip emitting them.
-          */
-         emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u),
-                  BRW_CONDITIONAL_NEQ));
-         emit(IF(BRW_PREDICATE_NORMAL));
-         emit_control_data_bits();
-         emit(BRW_OPCODE_ENDIF);
-
-         /* Reset control_data_bits to 0 so we can start accumulating a new
-          * batch.
-          *
-          * Note: in the case where vertex_count == 0, this neutralizes the
-          * effect of any call to EndPrimitive() that the shader may have
-          * made before outputting its first vertex.
-          */
-         inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u)));
-         inst->force_writemask_all = true;
-      }
-      emit(BRW_OPCODE_ENDIF);
-   }
-
-   this->current_annotation = "emit vertex: vertex data";
-   emit_vertex();
-
-   /* In stream mode we have to set control data bits for all vertices
-    * unless we have disabled control data bits completely (which we do
-    * do for MESA_PRIM_POINTS outputs that don't use streams).
-    */
-   if (c->control_data_header_size_bits > 0 &&
-       gs_prog_data->control_data_format ==
-          GFX7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
-       this->current_annotation = "emit vertex: Stream control data bits";
-       set_stream_control_data_bits(stream_id);
-   }
-
-   this->current_annotation = NULL;
-}
-
-void
-vec4_gs_visitor::gs_end_primitive()
-{
-   /* We can only do EndPrimitive() functionality when the control data
-    * consists of cut bits.  Fortunately, the only time it isn't is when the
-    * output type is points, in which case EndPrimitive() is a no-op.
-    */
-   if (gs_prog_data->control_data_format !=
-       GFX7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT) {
-      return;
-   }
-
-   if (c->control_data_header_size_bits == 0)
-      return;
-
-   /* Cut bits use one bit per vertex. */
-   assert(c->control_data_bits_per_vertex == 1);
-
-   /* Cut bit n should be set to 1 if EndPrimitive() was called after emitting
-    * vertex n, 0 otherwise.  So all we need to do here is mark bit
-    * (vertex_count - 1) % 32 in the cut_bits register to indicate that
-    * EndPrimitive() was called after emitting vertex (vertex_count - 1);
-    * vec4_gs_visitor::emit_control_data_bits() will take care of the rest.
-    *
-    * Note that if EndPrimitve() is called before emitting any vertices, this
-    * will cause us to set bit 31 of the control_data_bits register to 1.
-    * That's fine because:
-    *
-    * - If max_vertices < 32, then vertex number 31 (zero-based) will never be
-    *   output, so the hardware will ignore cut bit 31.
-    *
-    * - If max_vertices == 32, then vertex number 31 is guaranteed to be the
-    *   last vertex, so setting cut bit 31 has no effect (since the primitive
-    *   is automatically ended when the GS terminates).
-    *
-    * - If max_vertices > 32, then the ir_emit_vertex visitor will reset the
-    *   control_data_bits register to 0 when the first vertex is emitted.
-    */
-
-   /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */
-   src_reg one(this, glsl_uint_type());
-   emit(MOV(dst_reg(one), brw_imm_ud(1u)));
-   src_reg prev_count(this, glsl_uint_type());
-   emit(ADD(dst_reg(prev_count), this->vertex_count, brw_imm_ud(0xffffffffu)));
-   src_reg mask(this, glsl_uint_type());
-   /* Note: we're relying on the fact that the GEN SHL instruction only pays
-    * attention to the lower 5 bits of its second source argument, so on this
-    * architecture, 1 << (vertex_count - 1) is equivalent to 1 <<
-    * ((vertex_count - 1) % 32).
-    */
-   emit(SHL(dst_reg(mask), one, prev_count));
-   emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
-}
-
-} /* namespace brw */
-
diff --git a/src/intel/compiler/brw_vec4_gs_visitor.h b/src/intel/compiler/brw_vec4_gs_visitor.h
deleted file mode 100644
index ec8b6f7fa8b..00000000000
--- a/src/intel/compiler/brw_vec4_gs_visitor.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_vec4_gs_visitor.h
- *
- * Geometry-shader-specific code derived from the vec4_visitor class.
- */
-
-#ifndef BRW_VEC4_GS_VISITOR_H
-#define BRW_VEC4_GS_VISITOR_H
-
-#include "brw_vec4.h"
-
-#define MAX_GS_INPUT_VERTICES 6
-
-#ifdef __cplusplus
-namespace brw {
-
-class vec4_gs_visitor : public vec4_visitor
-{
-public:
-   vec4_gs_visitor(const struct brw_compiler *compiler,
-                   const struct brw_compile_params *params,
-                   struct brw_gs_compile *c,
-                   struct brw_gs_prog_data *prog_data,
-                   const nir_shader *shader,
-                   bool no_spills,
-                   bool debug_enabled);
-
-protected:
-   virtual void setup_payload();
-   virtual void emit_prolog();
-   virtual void emit_thread_end();
-   virtual void emit_urb_write_header(int mrf);
-   virtual vec4_instruction *emit_urb_write_opcode(bool complete);
-   virtual void gs_emit_vertex(int stream_id);
-   virtual void gs_end_primitive();
-   virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
-
-protected:
-   int setup_varying_inputs(int payload_reg, int attributes_per_reg);
-   void emit_control_data_bits();
-   void set_stream_control_data_bits(unsigned stream_id);
-
-   src_reg vertex_count;
-   src_reg control_data_bits;
-   const struct brw_gs_compile * const c;
-   struct brw_gs_prog_data * const gs_prog_data;
-};
-
-} /* namespace brw */
-#endif /* __cplusplus */
-
-#endif /* BRW_VEC4_GS_VISITOR_H */
diff --git a/src/intel/compiler/brw_vec4_live_variables.cpp b/src/intel/compiler/brw_vec4_live_variables.cpp
deleted file mode 100644
index 88fa179d0f5..00000000000
--- a/src/intel/compiler/brw_vec4_live_variables.cpp
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *
- */
-
-#include "brw_vec4.h"
-#include "brw_vec4_live_variables.h"
-
-using namespace brw;
-
-#define MAX_INSTRUCTION (1 << 30)
-
-/** @file brw_vec4_live_variables.cpp
- *
- * Support for computing at the basic block level which variables
- * (virtual GRFs in our case) are live at entry and exit.
- *
- * See Muchnick's Advanced Compiler Design and Implementation, section
- * 14.1 (p444).
- */
-
-/**
- * Sets up the use/def arrays and block-local approximation of the live ranges.
- *
- * The basic-block-level live variable analysis needs to know which
- * variables get used before they're completely defined, and which
- * variables are completely defined before they're used.
- *
- * We independently track each channel of a vec4.  This is because we need to
- * be able to recognize a sequence like:
- *
- * ...
- * DP4 tmp.x a b;
- * DP4 tmp.y c d;
- * MUL result.xy tmp.xy e.xy
- * ...
- *
- * as having tmp live only across that sequence (assuming it's used nowhere
- * else), because it's a common pattern.  A more conservative approach that
- * doesn't get tmp marked a deffed in this block will tend to result in
- * spilling.
- */
-void
-vec4_live_variables::setup_def_use()
-{
-   int ip = 0;
-
-   foreach_block (block, cfg) {
-      assert(ip == block->start_ip);
-      if (block->num > 0)
-	 assert(cfg->blocks[block->num - 1]->end_ip == ip - 1);
-
-      foreach_inst_in_block(vec4_instruction, inst, block) {
-         struct block_data *bd = &block_data[block->num];
-
-         /* Set up the instruction uses. */
-	 for (unsigned int i = 0; i < 3; i++) {
-	    if (inst->src[i].file == VGRF) {
-               for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) {
-                  for (int c = 0; c < 4; c++) {
-                     const unsigned v = var_from_reg(alloc, inst->src[i], c, j);
-
-                     start[v] = MIN2(start[v], ip);
-                     end[v] = ip;
-
-                     if (!BITSET_TEST(bd->def, v))
-                        BITSET_SET(bd->use, v);
-                  }
-               }
-	    }
-	 }
-         for (unsigned c = 0; c < 4; c++) {
-            if (inst->reads_flag(c) &&
-                !BITSET_TEST(bd->flag_def, c)) {
-               BITSET_SET(bd->flag_use, c);
-            }
-         }
-
-         /* Set up the instruction defs. */
-         if (inst->dst.file == VGRF) {
-            for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
-               for (int c = 0; c < 4; c++) {
-                  if (inst->dst.writemask & (1 << c)) {
-                     const unsigned v = var_from_reg(alloc, inst->dst, c, i);
-
-                     start[v] = MIN2(start[v], ip);
-                     end[v] = ip;
-
-                     /* Check for unconditional register writes, these are the
-                      * things that screen off preceding definitions of a
-                      * variable, and thus qualify for being in def[].
-                      */
-                     if ((!inst->predicate || inst->opcode == BRW_OPCODE_SEL) &&
-                         !BITSET_TEST(bd->use, v))
-                        BITSET_SET(bd->def, v);
-                  }
-               }
-            }
-         }
-         if (inst->writes_flag(devinfo)) {
-            for (unsigned c = 0; c < 4; c++) {
-               if ((inst->dst.writemask & (1 << c)) &&
-                   !BITSET_TEST(bd->flag_use, c)) {
-                  BITSET_SET(bd->flag_def, c);
-               }
-            }
-         }
-
-	 ip++;
-      }
-   }
-}
-
-/**
- * The algorithm incrementally sets bits in liveout and livein,
- * propagating it through control flow.  It will eventually terminate
- * because it only ever adds bits, and stops when no bits are added in
- * a pass.
- */
-void
-vec4_live_variables::compute_live_variables()
-{
-   bool cont = true;
-
-   while (cont) {
-      cont = false;
-
-      foreach_block_reverse (block, cfg) {
-         struct block_data *bd = &block_data[block->num];
-
-	 /* Update liveout */
-	 foreach_list_typed(bblock_link, child_link, link, &block->children) {
-       struct block_data *child_bd = &block_data[child_link->block->num];
-
-	    for (int i = 0; i < bitset_words; i++) {
-               BITSET_WORD new_liveout = (child_bd->livein[i] &
-                                          ~bd->liveout[i]);
-               if (new_liveout) {
-                  bd->liveout[i] |= new_liveout;
-		  cont = true;
-	       }
-	    }
-            BITSET_WORD new_liveout = (child_bd->flag_livein[0] &
-                                       ~bd->flag_liveout[0]);
-            if (new_liveout) {
-               bd->flag_liveout[0] |= new_liveout;
-               cont = true;
-            }
-	 }
-
-         /* Update livein */
-         for (int i = 0; i < bitset_words; i++) {
-            BITSET_WORD new_livein = (bd->use[i] |
-                                      (bd->liveout[i] &
-                                       ~bd->def[i]));
-            if (new_livein & ~bd->livein[i]) {
-               bd->livein[i] |= new_livein;
-               cont = true;
-            }
-         }
-         BITSET_WORD new_livein = (bd->flag_use[0] |
-                                   (bd->flag_liveout[0] &
-                                    ~bd->flag_def[0]));
-         if (new_livein & ~bd->flag_livein[0]) {
-            bd->flag_livein[0] |= new_livein;
-            cont = true;
-         }
-      }
-   }
-}
-
-/**
- * Extend the start/end ranges for each variable to account for the
- * new information calculated from control flow.
- */
-void
-vec4_live_variables::compute_start_end()
-{
-   foreach_block (block, cfg) {
-      const struct block_data &bd = block_data[block->num];
-
-      for (int i = 0; i < num_vars; i++) {
-         if (BITSET_TEST(bd.livein, i)) {
-            start[i] = MIN2(start[i], block->start_ip);
-            end[i] = MAX2(end[i], block->start_ip);
-         }
-
-         if (BITSET_TEST(bd.liveout, i)) {
-            start[i] = MIN2(start[i], block->end_ip);
-            end[i] = MAX2(end[i], block->end_ip);
-         }
-      }
-   }
-}
-
-vec4_live_variables::vec4_live_variables(const backend_shader *s)
-   : alloc(s->alloc), cfg(s->cfg)
-{
-   mem_ctx = ralloc_context(NULL);
-
-   num_vars = alloc.total_size * 8;
-   start = ralloc_array(mem_ctx, int, num_vars);
-   end = ralloc_array(mem_ctx, int, num_vars);
-
-   for (int i = 0; i < num_vars; i++) {
-      start[i] = MAX_INSTRUCTION;
-      end[i] = -1;
-   }
-
-   devinfo = s->compiler->devinfo;
-
-   block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
-
-   bitset_words = BITSET_WORDS(num_vars);
-   for (int i = 0; i < cfg->num_blocks; i++) {
-      block_data[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
-      block_data[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
-      block_data[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
-      block_data[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
-
-      block_data[i].flag_def[0] = 0;
-      block_data[i].flag_use[0] = 0;
-      block_data[i].flag_livein[0] = 0;
-      block_data[i].flag_liveout[0] = 0;
-   }
-
-   setup_def_use();
-   compute_live_variables();
-   compute_start_end();
-}
-
-vec4_live_variables::~vec4_live_variables()
-{
-   ralloc_free(mem_ctx);
-}
-
-static bool
-check_register_live_range(const vec4_live_variables *live, int ip,
-                          unsigned var, unsigned n)
-{
-   for (unsigned j = 0; j < n; j += 4) {
-      if (var + j >= unsigned(live->num_vars) ||
-          live->start[var + j] > ip || live->end[var + j] < ip)
-         return false;
-   }
-
-   return true;
-}
-
-bool
-vec4_live_variables::validate(const backend_shader *s) const
-{
-   unsigned ip = 0;
-
-   foreach_block_and_inst(block, vec4_instruction, inst, s->cfg) {
-      for (unsigned c = 0; c < 4; c++) {
-         if (inst->dst.writemask & (1 << c)) {
-            for (unsigned i = 0; i < 3; i++) {
-               if (inst->src[i].file == VGRF &&
-                   !check_register_live_range(this, ip,
-                                              var_from_reg(alloc, inst->src[i], c),
-                                              regs_read(inst, i)))
-                  return false;
-            }
-
-            if (inst->dst.file == VGRF &&
-                !check_register_live_range(this, ip,
-                                           var_from_reg(alloc, inst->dst, c),
-                                           regs_written(inst)))
-               return false;
-         }
-      }
-
-      ip++;
-   }
-
-   return true;
-}
-
-int
-vec4_live_variables::var_range_start(unsigned v, unsigned n) const
-{
-   int ip = INT_MAX;
-
-   for (unsigned i = 0; i < n; i++)
-      ip = MIN2(ip, start[v + i]);
-
-   return ip;
-}
-
-int
-vec4_live_variables::var_range_end(unsigned v, unsigned n) const
-{
-   int ip = INT_MIN;
-
-   for (unsigned i = 0; i < n; i++)
-      ip = MAX2(ip, end[v + i]);
-
-   return ip;
-}
-
-bool
-vec4_live_variables::vgrfs_interfere(int a, int b) const
-{
-   return !((var_range_end(8 * alloc.offsets[a], 8 * alloc.sizes[a]) <=
-             var_range_start(8 * alloc.offsets[b], 8 * alloc.sizes[b])) ||
-            (var_range_end(8 * alloc.offsets[b], 8 * alloc.sizes[b]) <=
-             var_range_start(8 * alloc.offsets[a], 8 * alloc.sizes[a])));
-}
diff --git a/src/intel/compiler/brw_vec4_live_variables.h b/src/intel/compiler/brw_vec4_live_variables.h
deleted file mode 100644
index 39d97c8a521..00000000000
--- a/src/intel/compiler/brw_vec4_live_variables.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *
- */
-
-#ifndef BRW_VEC4_LIVE_VARIABLES_H
-#define BRW_VEC4_LIVE_VARIABLES_H
-
-#include "brw_ir_vec4.h"
-#include "brw_ir_analysis.h"
-#include "util/bitset.h"
-
-struct backend_shader;
-
-namespace brw {
-
-class vec4_live_variables {
-public:
-   struct block_data {
-      /**
-       * Which variables are defined before being used in the block.
-       *
-       * Note that for our purposes, "defined" means unconditionally, completely
-       * defined.
-       */
-      BITSET_WORD *def;
-
-      /**
-       * Which variables are used before being defined in the block.
-       */
-      BITSET_WORD *use;
-
-      /** Which defs reach the entry point of the block. */
-      BITSET_WORD *livein;
-
-      /** Which defs reach the exit point of the block. */
-      BITSET_WORD *liveout;
-
-      BITSET_WORD flag_def[1];
-      BITSET_WORD flag_use[1];
-      BITSET_WORD flag_livein[1];
-      BITSET_WORD flag_liveout[1];
-   };
-
-   vec4_live_variables(const backend_shader *s);
-   ~vec4_live_variables();
-
-   bool
-   validate(const backend_shader *s) const;
-
-   analysis_dependency_class
-   dependency_class() const
-   {
-      return (DEPENDENCY_INSTRUCTION_IDENTITY |
-              DEPENDENCY_INSTRUCTION_DATA_FLOW |
-              DEPENDENCY_VARIABLES);
-   }
-
-   int num_vars;
-   int bitset_words;
-
-   const struct intel_device_info *devinfo;
-
-   /** Per-basic-block information on live variables */
-   struct block_data *block_data;
-
-   /** @{
-    * Final computed live ranges for each variable.
-    */
-   int *start;
-   int *end;
-   /** @} */
-
-   int var_range_start(unsigned v, unsigned n) const;
-   int var_range_end(unsigned v, unsigned n) const;
-   bool vgrfs_interfere(int a, int b) const;
-
-protected:
-   void setup_def_use();
-   void compute_live_variables();
-   void compute_start_end();
-
-   const simple_allocator &alloc;
-   cfg_t *cfg;
-   void *mem_ctx;
-};
-
-/* Returns the variable index for the k-th dword of the c-th component of
- * register reg.
- */
-inline unsigned
-var_from_reg(const simple_allocator &alloc, const src_reg &reg,
-             unsigned c = 0, unsigned k = 0)
-{
-   assert(reg.file == VGRF && reg.nr < alloc.count && c < 4);
-   const unsigned csize = DIV_ROUND_UP(type_sz(reg.type), 4);
-   unsigned result =
-      8 * alloc.offsets[reg.nr] + reg.offset / 4 +
-      (BRW_GET_SWZ(reg.swizzle, c) + k / csize * 4) * csize + k % csize;
-   /* Do not exceed the limit for this register */
-   assert(result < 8 * (alloc.offsets[reg.nr] + alloc.sizes[reg.nr]));
-   return result;
-}
-
-inline unsigned
-var_from_reg(const simple_allocator &alloc, const dst_reg &reg,
-             unsigned c = 0, unsigned k = 0)
-{
-   assert(reg.file == VGRF && reg.nr < alloc.count && c < 4);
-   const unsigned csize = DIV_ROUND_UP(type_sz(reg.type), 4);
-   unsigned result =
-      8 * alloc.offsets[reg.nr] + reg.offset / 4 +
-      (c + k / csize * 4) * csize + k % csize;
-   /* Do not exceed the limit for this register */
-   assert(result < 8 * (alloc.offsets[reg.nr] + alloc.sizes[reg.nr]));
-   return result;
-}
-
-} /* namespace brw */
-
-#endif /* BRW_VEC4_LIVE_VARIABLES_H */
diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp
deleted file mode 100644
index 9121f8e10f2..00000000000
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ /dev/null
@@ -1,2307 +0,0 @@
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_nir.h"
-#include "brw_vec4.h"
-#include "brw_vec4_builder.h"
-#include "brw_vec4_surface_builder.h"
-#include "brw_eu.h"
-#include "nir.h"
-#include "nir_intrinsics.h"
-#include "nir_intrinsics_indices.h"
-
-using namespace brw;
-using namespace brw::surface_access;
-
-namespace brw {
-
-void
-vec4_visitor::emit_nir_code()
-{
-   /* Globally set the rounding mode based on the float controls.  gen7 doesn't
-    * support 16-bit floats, and gen8 switches to scalar VS.  So we don't need
-    * to do any per-instruction mode switching the way the scalar FS handles.
-    */
-   emit_shader_float_controls_execution_mode();
-   if (nir->num_uniforms > 0)
-      nir_setup_uniforms();
-
-   nir_emit_impl(nir_shader_get_entrypoint((nir_shader *)nir));
-}
-
-void
-vec4_visitor::nir_setup_uniforms()
-{
-   uniforms = nir->num_uniforms / 16;
-}
-
-void
-vec4_visitor::nir_emit_impl(nir_function_impl *impl)
-{
-   nir_ssa_values = ralloc_array(mem_ctx, dst_reg, impl->ssa_alloc);
-
-   nir_emit_cf_list(&impl->body);
-}
-
-void
-vec4_visitor::nir_emit_cf_list(exec_list *list)
-{
-   exec_list_validate(list);
-   foreach_list_typed(nir_cf_node, node, node, list) {
-      switch (node->type) {
-      case nir_cf_node_if:
-         nir_emit_if(nir_cf_node_as_if(node));
-         break;
-
-      case nir_cf_node_loop:
-         nir_emit_loop(nir_cf_node_as_loop(node));
-         break;
-
-      case nir_cf_node_block:
-         nir_emit_block(nir_cf_node_as_block(node));
-         break;
-
-      default:
-         unreachable("Invalid CFG node block");
-      }
-   }
-}
-
-void
-vec4_visitor::nir_emit_if(nir_if *if_stmt)
-{
-   /* First, put the condition in f0 */
-   src_reg condition = get_nir_src(if_stmt->condition, BRW_REGISTER_TYPE_D, 1);
-   vec4_instruction *inst = emit(MOV(dst_null_d(), condition));
-   inst->conditional_mod = BRW_CONDITIONAL_NZ;
-
-   /* We can just predicate based on the X channel, as the condition only
-    * goes on its own line */
-   emit(IF(BRW_PREDICATE_ALIGN16_REPLICATE_X));
-
-   nir_emit_cf_list(&if_stmt->then_list);
-
-   if (!nir_cf_list_is_empty_block(&if_stmt->else_list)) {
-      emit(BRW_OPCODE_ELSE);
-      nir_emit_cf_list(&if_stmt->else_list);
-   }
-
-   emit(BRW_OPCODE_ENDIF);
-}
-
-void
-vec4_visitor::nir_emit_loop(nir_loop *loop)
-{
-   assert(!nir_loop_has_continue_construct(loop));
-   emit(BRW_OPCODE_DO);
-
-   nir_emit_cf_list(&loop->body);
-
-   emit(BRW_OPCODE_WHILE);
-}
-
-void
-vec4_visitor::nir_emit_block(nir_block *block)
-{
-   nir_foreach_instr(instr, block) {
-      nir_emit_instr(instr);
-   }
-}
-
-void
-vec4_visitor::nir_emit_instr(nir_instr *instr)
-{
-   base_ir = instr;
-
-   switch (instr->type) {
-   case nir_instr_type_load_const:
-      nir_emit_load_const(nir_instr_as_load_const(instr));
-      break;
-
-   case nir_instr_type_intrinsic:
-      nir_emit_intrinsic(nir_instr_as_intrinsic(instr));
-      break;
-
-   case nir_instr_type_alu:
-      nir_emit_alu(nir_instr_as_alu(instr));
-      break;
-
-   case nir_instr_type_jump:
-      nir_emit_jump(nir_instr_as_jump(instr));
-      break;
-
-   case nir_instr_type_tex:
-      nir_emit_texture(nir_instr_as_tex(instr));
-      break;
-
-   case nir_instr_type_undef:
-      nir_emit_undef(nir_instr_as_undef(instr));
-      break;
-
-   default:
-      unreachable("VS instruction not yet implemented by NIR->vec4");
-   }
-}
-
-static dst_reg
-dst_reg_for_nir_reg(vec4_visitor *v, nir_def *handle,
-                    unsigned base_offset, nir_src *indirect)
-{
-   nir_intrinsic_instr *decl = nir_reg_get_decl(handle);
-   dst_reg reg = v->nir_ssa_values[handle->index];
-   if (nir_intrinsic_bit_size(decl) == 64)
-      reg.type = BRW_REGISTER_TYPE_DF;
-
-   reg = offset(reg, 8, base_offset);
-   if (indirect) {
-      reg.reladdr =
-         new(v->mem_ctx) src_reg(v->get_nir_src(*indirect,
-                                                BRW_REGISTER_TYPE_D,
-                                                1));
-   }
-   return reg;
-}
-
-dst_reg
-vec4_visitor::get_nir_def(const nir_def &def)
-{
-   nir_intrinsic_instr *store_reg = nir_store_reg_for_def(&def);
-   if (!store_reg) {
-      dst_reg dst =
-         dst_reg(VGRF, alloc.allocate(DIV_ROUND_UP(def.bit_size, 32)));
-      if (def.bit_size == 64)
-         dst.type = BRW_REGISTER_TYPE_DF;
-      nir_ssa_values[def.index] = dst;
-      return dst;
-   } else {
-      nir_src *indirect =
-         (store_reg->intrinsic == nir_intrinsic_store_reg_indirect) ?
-         &store_reg->src[2] : NULL;
-
-      dst_reg dst = dst_reg_for_nir_reg(this, store_reg->src[1].ssa,
-                                        nir_intrinsic_base(store_reg),
-                                        indirect);
-      dst.writemask = nir_intrinsic_write_mask(store_reg);
-      return dst;
-   }
-}
-
-dst_reg
-vec4_visitor::get_nir_def(const nir_def &def, enum brw_reg_type type)
-{
-   return retype(get_nir_def(def), type);
-}
-
-dst_reg
-vec4_visitor::get_nir_def(const nir_def &def, nir_alu_type type)
-{
-   return get_nir_def(def, brw_type_for_nir_type(devinfo, type));
-}
-
-src_reg
-vec4_visitor::get_nir_src(const nir_src &src, enum brw_reg_type type,
-                          unsigned num_components)
-{
-   nir_intrinsic_instr *load_reg = nir_load_reg_for_def(src.ssa);
-
-   dst_reg reg;
-   if (load_reg) {
-      nir_src *indirect =
-         (load_reg->intrinsic == nir_intrinsic_load_reg_indirect) ?
-         &load_reg->src[1] : NULL;
-
-      reg = dst_reg_for_nir_reg(this, load_reg->src[0].ssa,
-                                      nir_intrinsic_base(load_reg),
-                                      indirect);
-   } else {
-      reg = nir_ssa_values[src.ssa->index];
-   }
-
-   reg = retype(reg, type);
-
-   src_reg reg_as_src = src_reg(reg);
-   reg_as_src.swizzle = brw_swizzle_for_size(num_components);
-   return reg_as_src;
-}
-
-src_reg
-vec4_visitor::get_nir_src(const nir_src &src, nir_alu_type type,
-                          unsigned num_components)
-{
-   return get_nir_src(src, brw_type_for_nir_type(devinfo, type),
-                      num_components);
-}
-
-src_reg
-vec4_visitor::get_nir_src(const nir_src &src, unsigned num_components)
-{
-   /* if type is not specified, default to signed int */
-   return get_nir_src(src, nir_type_int32, num_components);
-}
-
-src_reg
-vec4_visitor::get_nir_src_imm(const nir_src &src)
-{
-   assert(nir_src_num_components(src) == 1);
-   assert(nir_src_bit_size(src) == 32);
-   return nir_src_is_const(src) ? src_reg(brw_imm_d(nir_src_as_int(src))) :
-                                  get_nir_src(src, 1);
-}
-
-src_reg
-vec4_visitor::get_indirect_offset(nir_intrinsic_instr *instr)
-{
-   nir_src *offset_src = nir_get_io_offset_src(instr);
-
-   if (nir_src_is_const(*offset_src)) {
-      /* The only constant offset we should find is 0.  brw_nir.c's
-       * add_const_offset_to_base() will fold other constant offsets
-       * into the base index.
-       */
-      assert(nir_src_as_uint(*offset_src) == 0);
-      return src_reg();
-   }
-
-   return get_nir_src(*offset_src, BRW_REGISTER_TYPE_UD, 1);
-}
-
-static src_reg
-setup_imm_df(const vec4_builder &bld, double v)
-{
-   const intel_device_info *devinfo = bld.shader->devinfo;
-   assert(devinfo->ver == 7);
-
-   /* gfx7.5 does not support DF immediates straightforward but the DIM
-    * instruction allows to set the 64-bit immediate value.
-    */
-   if (devinfo->verx10 == 75) {
-      const vec4_builder ubld = bld.exec_all();
-      const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_DF);
-      ubld.DIM(dst, brw_imm_df(v));
-      return swizzle(src_reg(dst), BRW_SWIZZLE_XXXX);
-   }
-
-   /* gfx7 does not support DF immediates */
-   union {
-      double d;
-      struct {
-         uint32_t i1;
-         uint32_t i2;
-      };
-   } di;
-
-   di.d = v;
-
-   /* Write the low 32-bit of the constant to the X:UD channel and the
-    * high 32-bit to the Y:UD channel to build the constant in a VGRF.
-    * We have to do this twice (offset 0 and offset 1), since a DF VGRF takes
-    * two SIMD8 registers in SIMD4x2 execution. Finally, return a swizzle
-    * XXXX so any access to the VGRF only reads the constant data in these
-    * channels.
-    */
-   const dst_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
-   for (unsigned n = 0; n < 2; n++) {
-      const vec4_builder ubld = bld.exec_all().group(4, n);
-      ubld.MOV(writemask(offset(tmp, 8, n), WRITEMASK_X), brw_imm_ud(di.i1));
-      ubld.MOV(writemask(offset(tmp, 8, n), WRITEMASK_Y), brw_imm_ud(di.i2));
-   }
-
-   return swizzle(src_reg(retype(tmp, BRW_REGISTER_TYPE_DF)), BRW_SWIZZLE_XXXX);
-}
-
-void
-vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
-{
-   dst_reg reg;
-
-   if (instr->def.bit_size == 64) {
-      reg = dst_reg(VGRF, alloc.allocate(2));
-      reg.type = BRW_REGISTER_TYPE_DF;
-   } else {
-      reg = dst_reg(VGRF, alloc.allocate(1));
-      reg.type = BRW_REGISTER_TYPE_D;
-   }
-
-   const vec4_builder ibld = vec4_builder(this).at_end();
-   unsigned remaining = brw_writemask_for_size(instr->def.num_components);
-
-   /* @FIXME: consider emitting vector operations to save some MOVs in
-    * cases where the components are representable in 8 bits.
-    * For now, we emit a MOV for each distinct value.
-    */
-   for (unsigned i = 0; i < instr->def.num_components; i++) {
-      unsigned writemask = 1 << i;
-
-      if ((remaining & writemask) == 0)
-         continue;
-
-      for (unsigned j = i; j < instr->def.num_components; j++) {
-         if ((instr->def.bit_size == 32 &&
-              instr->value[i].u32 == instr->value[j].u32) ||
-             (instr->def.bit_size == 64 &&
-              instr->value[i].f64 == instr->value[j].f64)) {
-            writemask |= 1 << j;
-         }
-      }
-
-      reg.writemask = writemask;
-      if (instr->def.bit_size == 64) {
-         emit(MOV(reg, setup_imm_df(ibld, instr->value[i].f64)));
-      } else {
-         emit(MOV(reg, brw_imm_d(instr->value[i].i32)));
-      }
-
-      remaining &= ~writemask;
-   }
-
-   /* Set final writemask */
-   reg.writemask = brw_writemask_for_size(instr->def.num_components);
-
-   nir_ssa_values[instr->def.index] = reg;
-}
-
-src_reg
-vec4_visitor::get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr)
-{
-   /* SSBO stores are weird in that their index is in src[1] */
-   const unsigned src = instr->intrinsic == nir_intrinsic_store_ssbo ? 1 : 0;
-
-   if (nir_src_is_const(instr->src[src])) {
-      return brw_imm_ud(nir_src_as_uint(instr->src[src]));
-   } else {
-      return emit_uniformize(get_nir_src(instr->src[src]));
-   }
-}
-
-void
-vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
-{
-   dst_reg dest;
-   src_reg src;
-
-   switch (instr->intrinsic) {
-   case nir_intrinsic_decl_reg: {
-      unsigned bit_size = nir_intrinsic_bit_size(instr);
-      unsigned array_elems = nir_intrinsic_num_array_elems(instr);
-      if (array_elems == 0)
-         array_elems = 1;
-
-      const unsigned num_regs = array_elems * DIV_ROUND_UP(bit_size, 32);
-      dst_reg reg(VGRF, alloc.allocate(num_regs));
-      if (bit_size == 64)
-         reg.type = BRW_REGISTER_TYPE_DF;
-
-      nir_ssa_values[instr->def.index] = reg;
-      break;
-   }
-
-   case nir_intrinsic_load_reg:
-   case nir_intrinsic_load_reg_indirect:
-   case nir_intrinsic_store_reg:
-   case nir_intrinsic_store_reg_indirect:
-      /* Nothing to do with these. */
-      break;
-
-   case nir_intrinsic_load_input: {
-      assert(instr->def.bit_size == 32);
-      /* We set EmitNoIndirectInput for VS */
-      unsigned load_offset = nir_src_as_uint(instr->src[0]);
-
-      dest = get_nir_def(instr->def);
-
-      src = src_reg(ATTR, nir_intrinsic_base(instr) + load_offset,
-                    glsl_uvec4_type());
-      src = retype(src, dest.type);
-
-      /* Swizzle source based on component layout qualifier */
-      src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));
-      emit(MOV(dest, src));
-      break;
-   }
-
-   case nir_intrinsic_store_output: {
-      assert(nir_src_bit_size(instr->src[0]) == 32);
-      unsigned store_offset = nir_src_as_uint(instr->src[1]);
-      int varying = nir_intrinsic_base(instr) + store_offset;
-      src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
-                        instr->num_components);
-
-      unsigned c = nir_intrinsic_component(instr);
-      output_reg[varying][c] = dst_reg(src);
-      output_num_components[varying][c] = instr->num_components;
-      break;
-   }
-
-   case nir_intrinsic_get_ssbo_size: {
-      assert(nir_src_num_components(instr->src[0]) == 1);
-      unsigned ssbo_index = nir_src_is_const(instr->src[0]) ?
-                            nir_src_as_uint(instr->src[0]) : 0;
-
-      dst_reg result_dst = get_nir_def(instr->def);
-      vec4_instruction *inst = new(mem_ctx)
-         vec4_instruction(SHADER_OPCODE_GET_BUFFER_SIZE, result_dst);
-
-      inst->base_mrf = 2;
-      inst->mlen = 1; /* always at least one */
-      inst->src[1] = brw_imm_ud(ssbo_index);
-
-      /* MRF for the first parameter */
-      src_reg lod = brw_imm_d(0);
-      int param_base = inst->base_mrf;
-      int writemask = WRITEMASK_X;
-      emit(MOV(dst_reg(MRF, param_base, glsl_int_type(), writemask), lod));
-
-      emit(inst);
-      break;
-   }
-
-   case nir_intrinsic_store_ssbo: {
-      assert(devinfo->ver == 7);
-
-      /* brw_nir_lower_mem_access_bit_sizes takes care of this */
-      assert(nir_src_bit_size(instr->src[0]) == 32);
-      assert(nir_intrinsic_write_mask(instr) ==
-             (1u << instr->num_components) - 1);
-
-      src_reg surf_index = get_nir_ssbo_intrinsic_index(instr);
-      src_reg offset_reg = retype(get_nir_src_imm(instr->src[2]),
-                                  BRW_REGISTER_TYPE_UD);
-
-      /* Value */
-      src_reg val_reg = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, 4);
-
-      /* IvyBridge does not have a native SIMD4x2 untyped write message so untyped
-       * writes will use SIMD8 mode. In order to hide this and keep symmetry across
-       * typed and untyped messages and across hardware platforms, the
-       * current implementation of the untyped messages will transparently convert
-       * the SIMD4x2 payload into an equivalent SIMD8 payload by transposing it
-       * and enabling only channel X on the SEND instruction.
-       *
-       * The above, works well for full vector writes, but not for partial writes
-       * where we want to write some channels and not others, like when we have
-       * code such as v.xyw = vec3(1,2,4). Because the untyped write messages are
-       * quite restrictive with regards to the channel enables we can configure in
-       * the message descriptor (not all combinations are allowed) we cannot simply
-       * implement these scenarios with a single message while keeping the
-       * aforementioned symmetry in the implementation. For now we de decided that
-       * it is better to keep the symmetry to reduce complexity, so in situations
-       * such as the one described we end up emitting two untyped write messages
-       * (one for xy and another for w).
-       *
-       * The code below packs consecutive channels into a single write message,
-       * detects gaps in the vector write and if needed, sends a second message
-       * with the remaining channels. If in the future we decide that we want to
-       * emit a single message at the expense of losing the symmetry in the
-       * implementation we can:
-       *
-       * 1) For IvyBridge: Only use the red channel of the untyped write SIMD8
-       *    message payload. In this mode we can write up to 8 offsets and dwords
-       *    to the red channel only (for the two vec4s in the SIMD4x2 execution)
-       *    and select which of the 8 channels carry data to write by setting the
-       *    appropriate writemask in the dst register of the SEND instruction.
-       *    It would require to write a new generator opcode specifically for
-       *    IvyBridge since we would need to prepare a SIMD8 payload that could
-       *    use any channel, not just X.
-       *
-       * 2) For Haswell+: Simply send a single write message but set the writemask
-       *    on the dst of the SEND instruction to select the channels we want to
-       *    write. It would require to modify the current messages to receive
-       *    and honor the writemask provided.
-       */
-      const vec4_builder bld = vec4_builder(this).at_end()
-                               .annotate(current_annotation, base_ir);
-
-      emit_untyped_write(bld, surf_index, offset_reg, val_reg,
-                         1 /* dims */, instr->num_components /* size */,
-                         BRW_PREDICATE_NONE);
-      break;
-   }
-
-   case nir_intrinsic_load_ssbo: {
-      assert(devinfo->ver == 7);
-
-      /* brw_nir_lower_mem_access_bit_sizes takes care of this */
-      assert(instr->def.bit_size == 32);
-
-      src_reg surf_index = get_nir_ssbo_intrinsic_index(instr);
-      src_reg offset_reg = retype(get_nir_src_imm(instr->src[1]),
-                                  BRW_REGISTER_TYPE_UD);
-
-      /* Read the vector */
-      const vec4_builder bld = vec4_builder(this).at_end()
-         .annotate(current_annotation, base_ir);
-
-      src_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
-                                              1 /* dims */, 4 /* size*/,
-                                              BRW_PREDICATE_NONE);
-      dst_reg dest = get_nir_def(instr->def);
-      read_result.type = dest.type;
-      read_result.swizzle = brw_swizzle_for_size(instr->num_components);
-      emit(MOV(dest, read_result));
-      break;
-   }
-
-   case nir_intrinsic_ssbo_atomic:
-   case nir_intrinsic_ssbo_atomic_swap:
-      nir_emit_ssbo_atomic(lsc_op_to_legacy_atomic(lsc_aop_for_nir_intrinsic(instr)), instr);
-      break;
-
-   case nir_intrinsic_load_vertex_id:
-      unreachable("should be lowered by vertex_id_zero_based");
-
-   case nir_intrinsic_load_vertex_id_zero_base:
-   case nir_intrinsic_load_base_vertex:
-   case nir_intrinsic_load_instance_id:
-   case nir_intrinsic_load_base_instance:
-   case nir_intrinsic_load_draw_id:
-   case nir_intrinsic_load_invocation_id:
-      unreachable("should be lowered by brw_nir_lower_vs_inputs()");
-
-   case nir_intrinsic_load_uniform: {
-      /* Offsets are in bytes but they should always be multiples of 4 */
-      assert(nir_intrinsic_base(instr) % 4 == 0);
-
-      dest = get_nir_def(instr->def);
-
-      src = src_reg(dst_reg(UNIFORM, nir_intrinsic_base(instr) / 16));
-      src.type = dest.type;
-
-      /* Uniforms don't actually have to be vec4 aligned.  In the case that
-       * it isn't, we have to use a swizzle to shift things around.  They
-       * do still have the std140 alignment requirement that vec2's have to
-       * be vec2-aligned and vec3's and vec4's have to be vec4-aligned.
-       *
-       * The swizzle also works in the indirect case as the generator adds
-       * the swizzle to the offset for us.
-       */
-      const int type_size = type_sz(src.type);
-      unsigned shift = (nir_intrinsic_base(instr) % 16) / type_size;
-      assert(shift + instr->num_components <= 4);
-
-      if (nir_src_is_const(instr->src[0])) {
-         const unsigned load_offset = nir_src_as_uint(instr->src[0]);
-         /* Offsets are in bytes but they should always be multiples of 4 */
-         assert(load_offset % 4 == 0);
-
-         src.swizzle = brw_swizzle_for_size(instr->num_components);
-         dest.writemask = brw_writemask_for_size(instr->num_components);
-         unsigned offset = load_offset + shift * type_size;
-         src.offset = ROUND_DOWN_TO(offset, 16);
-         shift = (offset % 16) / type_size;
-         assert(shift + instr->num_components <= 4);
-         src.swizzle += BRW_SWIZZLE4(shift, shift, shift, shift);
-
-         emit(MOV(dest, src));
-      } else {
-         /* Uniform arrays are vec4 aligned, because of std140 alignment
-          * rules.
-          */
-         assert(shift == 0);
-
-         src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);
-
-         /* MOV_INDIRECT is going to stomp the whole thing anyway */
-         dest.writemask = WRITEMASK_XYZW;
-
-         emit(SHADER_OPCODE_MOV_INDIRECT, dest, src,
-              indirect, brw_imm_ud(nir_intrinsic_range(instr)));
-      }
-      break;
-   }
-
-   case nir_intrinsic_load_ubo: {
-      src_reg surf_index;
-
-      dest = get_nir_def(instr->def);
-
-      if (nir_src_is_const(instr->src[0])) {
-         /* The block index is a constant, so just emit the binding table entry
-          * as an immediate.
-          */
-         const unsigned index = nir_src_as_uint(instr->src[0]);
-         surf_index = brw_imm_ud(index);
-      } else {
-         /* The block index is not a constant. Evaluate the index expression
-          * per-channel and add the base UBO index; we have to select a value
-          * from any live channel.
-          */
-         surf_index = src_reg(this, glsl_uint_type());
-         emit(MOV(dst_reg(surf_index), get_nir_src(instr->src[0], nir_type_int32,
-                                                   instr->num_components)));
-         surf_index = emit_uniformize(surf_index);
-      }
-
-      src_reg push_reg;
-      src_reg offset_reg;
-      if (nir_src_is_const(instr->src[1])) {
-         unsigned load_offset = nir_src_as_uint(instr->src[1]);
-         unsigned aligned_offset = load_offset & ~15;
-         offset_reg = brw_imm_ud(aligned_offset);
-
-         /* See if we've selected this as a push constant candidate */
-         if (nir_src_is_const(instr->src[0])) {
-            const unsigned ubo_block = nir_src_as_uint(instr->src[0]);
-            const unsigned offset_256b = aligned_offset / 32;
-
-            for (int i = 0; i < 4; i++) {
-               const struct brw_ubo_range *range = &prog_data->base.ubo_ranges[i];
-               if (range->block == ubo_block &&
-                   offset_256b >= range->start &&
-                   offset_256b < range->start + range->length) {
-
-                  push_reg = src_reg(dst_reg(UNIFORM, UBO_START + i));
-                  push_reg.type = dest.type;
-                  push_reg.offset = aligned_offset - 32 * range->start;
-                  break;
-               }
-            }
-         }
-      } else {
-         offset_reg = src_reg(this, glsl_uint_type());
-         emit(MOV(dst_reg(offset_reg),
-                  get_nir_src(instr->src[1], nir_type_uint32, 1)));
-      }
-
-      src_reg packed_consts;
-      if (push_reg.file != BAD_FILE) {
-         packed_consts = push_reg;
-      } else if (instr->def.bit_size == 32) {
-         packed_consts = src_reg(this, glsl_vec4_type());
-         emit_pull_constant_load_reg(dst_reg(packed_consts),
-                                     surf_index,
-                                     offset_reg,
-                                     NULL, NULL /* before_block/inst */);
-         prog_data->base.has_ubo_pull = true;
-      } else {
-         src_reg temp = src_reg(this, glsl_dvec4_type());
-         src_reg temp_float = retype(temp, BRW_REGISTER_TYPE_F);
-
-         emit_pull_constant_load_reg(dst_reg(temp_float),
-                                     surf_index, offset_reg, NULL, NULL);
-         if (offset_reg.file == IMM)
-            offset_reg.ud += 16;
-         else
-            emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(16u)));
-         emit_pull_constant_load_reg(dst_reg(byte_offset(temp_float, REG_SIZE)),
-                                     surf_index, offset_reg, NULL, NULL);
-         prog_data->base.has_ubo_pull = true;
-
-         packed_consts = src_reg(this, glsl_dvec4_type());
-         shuffle_64bit_data(dst_reg(packed_consts), temp, false);
-      }
-
-      packed_consts.swizzle = brw_swizzle_for_size(instr->num_components);
-      if (nir_src_is_const(instr->src[1])) {
-         unsigned load_offset = nir_src_as_uint(instr->src[1]);
-         unsigned type_size = type_sz(dest.type);
-         packed_consts.swizzle +=
-            BRW_SWIZZLE4(load_offset % 16 / type_size,
-                         load_offset % 16 / type_size,
-                         load_offset % 16 / type_size,
-                         load_offset % 16 / type_size);
-      }
-
-      emit(MOV(dest, retype(packed_consts, dest.type)));
-
-      break;
-   }
-
-   case nir_intrinsic_barrier: {
-      if (nir_intrinsic_memory_scope(instr) == SCOPE_NONE)
-         break;
-      const vec4_builder bld =
-         vec4_builder(this).at_end().annotate(current_annotation, base_ir);
-      const dst_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
-      vec4_instruction *fence =
-         bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp, brw_vec8_grf(0, 0));
-      fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
-      break;
-   }
-
-   case nir_intrinsic_shader_clock: {
-      /* We cannot do anything if there is an event, so ignore it for now */
-      const src_reg shader_clock = get_timestamp();
-      const enum brw_reg_type type = brw_type_for_base_type(glsl_uvec2_type());
-
-      dest = get_nir_def(instr->def, type);
-      emit(MOV(dest, shader_clock));
-      break;
-   }
-
-   default:
-      unreachable("Unknown intrinsic");
-   }
-}
-
-void
-vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
-{
-   dst_reg dest;
-   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
-      dest = get_nir_def(instr->def);
-
-   src_reg surface = get_nir_ssbo_intrinsic_index(instr);
-   src_reg offset = get_nir_src(instr->src[1], 1);
-   src_reg data1;
-   if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
-      data1 = get_nir_src(instr->src[2], 1);
-   src_reg data2;
-   if (op == BRW_AOP_CMPWR)
-      data2 = get_nir_src(instr->src[3], 1);
-
-   /* Emit the actual atomic operation operation */
-   const vec4_builder bld =
-      vec4_builder(this).at_end().annotate(current_annotation, base_ir);
-
-   src_reg atomic_result = emit_untyped_atomic(bld, surface, offset,
-                                               data1, data2,
-                                               1 /* dims */, 1 /* rsize */,
-                                               op,
-                                               BRW_PREDICATE_NONE);
-   dest.type = atomic_result.type;
-   bld.MOV(dest, atomic_result);
-}
-
-static unsigned
-brw_swizzle_for_nir_swizzle(uint8_t swizzle[4])
-{
-   return BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
-}
-
-bool
-vec4_visitor::optimize_predicate(nir_alu_instr *instr,
-                                 enum brw_predicate *predicate)
-{
-   if (instr->src[0].src.ssa->parent_instr->type != nir_instr_type_alu)
-      return false;
-
-   nir_alu_instr *cmp_instr =
-      nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
-
-   switch (cmp_instr->op) {
-   case nir_op_b32any_fnequal2:
-   case nir_op_b32any_inequal2:
-   case nir_op_b32any_fnequal3:
-   case nir_op_b32any_inequal3:
-   case nir_op_b32any_fnequal4:
-   case nir_op_b32any_inequal4:
-      *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
-      break;
-   case nir_op_b32all_fequal2:
-   case nir_op_b32all_iequal2:
-   case nir_op_b32all_fequal3:
-   case nir_op_b32all_iequal3:
-   case nir_op_b32all_fequal4:
-   case nir_op_b32all_iequal4:
-      *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
-      break;
-   default:
-      return false;
-   }
-
-   unsigned size_swizzle =
-      brw_swizzle_for_size(nir_op_infos[cmp_instr->op].input_sizes[0]);
-
-   src_reg op[2];
-   assert(nir_op_infos[cmp_instr->op].num_inputs == 2);
-   for (unsigned i = 0; i < 2; i++) {
-      nir_alu_type type = nir_op_infos[cmp_instr->op].input_types[i];
-      unsigned bit_size = nir_src_bit_size(cmp_instr->src[i].src);
-      type = (nir_alu_type) (((unsigned) type) | bit_size);
-      op[i] = get_nir_src(cmp_instr->src[i].src, type, 4);
-      unsigned base_swizzle =
-         brw_swizzle_for_nir_swizzle(cmp_instr->src[i].swizzle);
-      op[i].swizzle = brw_compose_swizzle(size_swizzle, base_swizzle);
-   }
-
-   emit(CMP(dst_null_d(), op[0], op[1],
-            brw_cmod_for_nir_comparison(cmp_instr->op)));
-
-   return true;
-}
-
-void
-vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src)
-{
-   enum opcode op;
-   switch (dst.type) {
-   case BRW_REGISTER_TYPE_D:
-      op = VEC4_OPCODE_DOUBLE_TO_D32;
-      break;
-   case BRW_REGISTER_TYPE_UD:
-      op = VEC4_OPCODE_DOUBLE_TO_U32;
-      break;
-   case BRW_REGISTER_TYPE_F:
-      op = VEC4_OPCODE_DOUBLE_TO_F32;
-      break;
-   default:
-      unreachable("Unknown conversion");
-   }
-
-   dst_reg temp = dst_reg(this, glsl_dvec4_type());
-   emit(MOV(temp, src));
-   dst_reg temp2 = dst_reg(this, glsl_dvec4_type());
-   emit(op, temp2, src_reg(temp));
-
-   emit(VEC4_OPCODE_PICK_LOW_32BIT, retype(temp2, dst.type), src_reg(temp2));
-   emit(MOV(dst, src_reg(retype(temp2, dst.type))));
-}
-
-void
-vec4_visitor::emit_conversion_to_double(dst_reg dst, src_reg src)
-{
-   dst_reg tmp_dst = dst_reg(src_reg(this, glsl_dvec4_type()));
-   src_reg tmp_src = retype(src_reg(this, glsl_vec4_type()), src.type);
-   emit(MOV(dst_reg(tmp_src), src));
-   emit(VEC4_OPCODE_TO_DOUBLE, tmp_dst, tmp_src);
-   emit(MOV(dst, src_reg(tmp_dst)));
-}
-
-/**
- * Try to use an immediate value for a source
- *
- * In cases of flow control, constant propagation is sometimes unable to
- * determine that a register contains a constant value.  To work around this,
- * try to emit a literal as one of the sources.  If \c try_src0_also is set,
- * \c op[0] will also be tried for an immediate value.
- *
- * If \c op[0] is modified, the operands will be exchanged so that \c op[1]
- * will always be the immediate value.
- *
- * \return The index of the source that was modified, 0 or 1, if successful.
- * Otherwise, -1.
- *
- * \param op - Operands to the instruction
- * \param try_src0_also - True if \c op[0] should also be a candidate for
- *                        getting an immediate value.  This should only be set
- *                        for commutative operations.
- */
-static int
-try_immediate_source(const nir_alu_instr *instr, src_reg *op,
-                     bool try_src0_also)
-{
-   unsigned idx;
-
-   /* MOV should be the only single-source instruction passed to this
-    * function.  Any other unary instruction with a constant source should
-    * have been constant-folded away!
-    */
-   assert(nir_op_infos[instr->op].num_inputs > 1 ||
-          instr->op == nir_op_mov);
-
-   if (instr->op != nir_op_mov &&
-       nir_src_bit_size(instr->src[1].src) == 32 &&
-       nir_src_is_const(instr->src[1].src)) {
-      idx = 1;
-   } else if (try_src0_also &&
-         nir_src_bit_size(instr->src[0].src) == 32 &&
-         nir_src_is_const(instr->src[0].src)) {
-      idx = 0;
-   } else {
-      return -1;
-   }
-
-   const enum brw_reg_type old_type = op[idx].type;
-
-   switch (old_type) {
-   case BRW_REGISTER_TYPE_D:
-   case BRW_REGISTER_TYPE_UD: {
-      int first_comp = -1;
-      int d = 0;
-
-      for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
-         if (nir_alu_instr_channel_used(instr, idx, i)) {
-            if (first_comp < 0) {
-               first_comp = i;
-               d = nir_src_comp_as_int(instr->src[idx].src,
-                                       instr->src[idx].swizzle[i]);
-            } else if (d != nir_src_comp_as_int(instr->src[idx].src,
-                                                instr->src[idx].swizzle[i])) {
-               return -1;
-            }
-         }
-      }
-
-      assert(first_comp >= 0);
-
-      if (op[idx].abs)
-         d = MAX2(-d, d);
-
-      if (op[idx].negate)
-         d = -d;
-
-      op[idx] = retype(src_reg(brw_imm_d(d)), old_type);
-      break;
-   }
-
-   case BRW_REGISTER_TYPE_F: {
-      int first_comp = -1;
-      float f[NIR_MAX_VEC_COMPONENTS] = { 0.0f };
-      bool is_scalar = true;
-
-      for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
-         if (nir_alu_instr_channel_used(instr, idx, i)) {
-            f[i] = nir_src_comp_as_float(instr->src[idx].src,
-                                         instr->src[idx].swizzle[i]);
-            if (first_comp < 0) {
-               first_comp = i;
-            } else if (f[first_comp] != f[i]) {
-               is_scalar = false;
-            }
-         }
-      }
-
-      if (is_scalar) {
-         if (op[idx].abs)
-            f[first_comp] = fabs(f[first_comp]);
-
-         if (op[idx].negate)
-            f[first_comp] = -f[first_comp];
-
-         op[idx] = src_reg(brw_imm_f(f[first_comp]));
-         assert(op[idx].type == old_type);
-      } else {
-         uint8_t vf_values[4] = { 0, 0, 0, 0 };
-
-         for (unsigned i = 0; i < ARRAY_SIZE(vf_values); i++) {
-
-            if (op[idx].abs)
-               f[i] = fabs(f[i]);
-
-            if (op[idx].negate)
-               f[i] = -f[i];
-
-            const int vf = brw_float_to_vf(f[i]);
-            if (vf == -1)
-               return -1;
-
-            vf_values[i] = vf;
-         }
-
-         op[idx] = src_reg(brw_imm_vf4(vf_values[0], vf_values[1],
-                                       vf_values[2], vf_values[3]));
-      }
-      break;
-   }
-
-   default:
-      unreachable("Non-32bit type.");
-   }
-
-   /* If the instruction has more than one source, the instruction format only
-    * allows source 1 to be an immediate value.  If the immediate value was
-    * source 0, then the sources must be exchanged.
-    */
-   if (idx == 0 && instr->op != nir_op_mov) {
-      src_reg tmp = op[0];
-      op[0] = op[1];
-      op[1] = tmp;
-   }
-
-   return idx;
-}
-
-void
-vec4_visitor::fix_float_operands(src_reg op[3], nir_alu_instr *instr)
-{
-   bool fixed[3] = { false, false, false };
-
-   for (unsigned i = 0; i < 2; i++) {
-      if (!nir_src_is_const(instr->src[i].src))
-         continue;
-
-      for (unsigned j = i + 1; j < 3; j++) {
-         if (fixed[j])
-            continue;
-
-         if (!nir_src_is_const(instr->src[j].src))
-            continue;
-
-         if (nir_alu_srcs_equal(instr, instr, i, j)) {
-            if (!fixed[i])
-               op[i] = fix_3src_operand(op[i]);
-
-            op[j] = op[i];
-
-            fixed[i] = true;
-            fixed[j] = true;
-         } else if (nir_alu_srcs_negative_equal(instr, instr, i, j)) {
-            if (!fixed[i])
-               op[i] = fix_3src_operand(op[i]);
-
-            op[j] = op[i];
-            op[j].negate = !op[j].negate;
-
-            fixed[i] = true;
-            fixed[j] = true;
-         }
-      }
-   }
-
-   for (unsigned i = 0; i < 3; i++) {
-      if (!fixed[i])
-         op[i] = fix_3src_operand(op[i]);
-   }
-}
-
-static bool
-const_src_fits_in_16_bits(const nir_src &src, brw_reg_type type)
-{
-   assert(nir_src_is_const(src));
-   if (brw_reg_type_is_unsigned_integer(type)) {
-      return nir_src_comp_as_uint(src, 0) <= UINT16_MAX;
-   } else {
-      const int64_t c = nir_src_comp_as_int(src, 0);
-      return c <= INT16_MAX && c >= INT16_MIN;
-   }
-}
-
-void
-vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
-{
-   vec4_instruction *inst;
-
-   nir_alu_type dst_type = (nir_alu_type) (nir_op_infos[instr->op].output_type |
-                                           instr->def.bit_size);
-   dst_reg dst = get_nir_def(instr->def, dst_type);
-   dst.writemask &= nir_component_mask(instr->def.num_components);
-
-   src_reg op[4];
-   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
-      nir_alu_type src_type = (nir_alu_type)
-         (nir_op_infos[instr->op].input_types[i] |
-          nir_src_bit_size(instr->src[i].src));
-      op[i] = get_nir_src(instr->src[i].src, src_type, 4);
-      op[i].swizzle = brw_swizzle_for_nir_swizzle(instr->src[i].swizzle);
-   }
-
-#ifndef NDEBUG
-   /* On Gen7 and earlier, no functionality is exposed that should allow 8-bit
-    * integer types to ever exist.
-    */
-   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
-      assert(type_sz(op[i].type) > 1);
-#endif
-
-   switch (instr->op) {
-   case nir_op_mov:
-      try_immediate_source(instr, &op[0], true);
-      inst = emit(MOV(dst, op[0]));
-      break;
-
-   case nir_op_vec2:
-   case nir_op_vec3:
-   case nir_op_vec4:
-      unreachable("not reached: should be handled by lower_vec_to_movs()");
-
-   case nir_op_i2f32:
-   case nir_op_u2f32:
-      inst = emit(MOV(dst, op[0]));
-      break;
-
-   case nir_op_f2f32:
-   case nir_op_f2i32:
-   case nir_op_f2u32:
-      if (nir_src_bit_size(instr->src[0].src) == 64)
-         emit_conversion_from_double(dst, op[0]);
-      else
-         inst = emit(MOV(dst, op[0]));
-      break;
-
-   case nir_op_f2f64:
-   case nir_op_i2f64:
-   case nir_op_u2f64:
-      emit_conversion_to_double(dst, op[0]);
-      break;
-
-   case nir_op_fsat:
-      inst = emit(MOV(dst, op[0]));
-      inst->saturate = true;
-      break;
-
-   case nir_op_fneg:
-   case nir_op_ineg:
-      op[0].negate = true;
-      inst = emit(MOV(dst, op[0]));
-      break;
-
-   case nir_op_fabs:
-   case nir_op_iabs:
-      op[0].negate = false;
-      op[0].abs = true;
-      inst = emit(MOV(dst, op[0]));
-      break;
-
-   case nir_op_iadd:
-      assert(instr->def.bit_size < 64);
-      FALLTHROUGH;
-   case nir_op_fadd:
-      try_immediate_source(instr, op, true);
-      inst = emit(ADD(dst, op[0], op[1]));
-      break;
-
-   case nir_op_uadd_sat:
-      assert(instr->def.bit_size < 64);
-      inst = emit(ADD(dst, op[0], op[1]));
-      inst->saturate = true;
-      break;
-
-   case nir_op_fmul:
-      try_immediate_source(instr, op, true);
-      inst = emit(MUL(dst, op[0], op[1]));
-      break;
-
-   case nir_op_imul: {
-      assert(instr->def.bit_size < 64);
-
-      /* For integer multiplication, the MUL uses the low 16 bits of one of
-       * the operands (src0 through SNB, src1 on IVB and later). The MACH
-       * accumulates in the contribution of the upper 16 bits of that
-       * operand. If we can determine that one of the args is in the low
-       * 16 bits, though, we can just emit a single MUL.
-       */
-      if (nir_src_is_const(instr->src[0].src) &&
-          nir_alu_instr_src_read_mask(instr, 0) == 1 &&
-          const_src_fits_in_16_bits(instr->src[0].src, op[0].type)) {
-         if (devinfo->ver < 7)
-            emit(MUL(dst, op[0], op[1]));
-         else
-            emit(MUL(dst, op[1], op[0]));
-      } else if (nir_src_is_const(instr->src[1].src) &&
-                 nir_alu_instr_src_read_mask(instr, 1) == 1 &&
-                 const_src_fits_in_16_bits(instr->src[1].src, op[1].type)) {
-         if (devinfo->ver < 7)
-            emit(MUL(dst, op[1], op[0]));
-         else
-            emit(MUL(dst, op[0], op[1]));
-      } else {
-         struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
-
-         emit(MUL(acc, op[0], op[1]));
-         emit(MACH(dst_null_d(), op[0], op[1]));
-         emit(MOV(dst, src_reg(acc)));
-      }
-      break;
-   }
-
-   case nir_op_imul_high:
-   case nir_op_umul_high: {
-      assert(instr->def.bit_size < 64);
-      struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
-
-      emit(MUL(acc, op[0], op[1]));
-      emit(MACH(dst, op[0], op[1]));
-      break;
-   }
-
-   case nir_op_frcp:
-      inst = emit_math(SHADER_OPCODE_RCP, dst, op[0]);
-      break;
-
-   case nir_op_fexp2:
-      inst = emit_math(SHADER_OPCODE_EXP2, dst, op[0]);
-      break;
-
-   case nir_op_flog2:
-      inst = emit_math(SHADER_OPCODE_LOG2, dst, op[0]);
-      break;
-
-   case nir_op_fsin:
-      inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
-      break;
-
-   case nir_op_fcos:
-      inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
-      break;
-
-   case nir_op_idiv:
-   case nir_op_udiv:
-      assert(instr->def.bit_size < 64);
-      emit_math(SHADER_OPCODE_INT_QUOTIENT, dst, op[0], op[1]);
-      break;
-
-   case nir_op_umod:
-   case nir_op_irem:
-      /* According to the sign table for INT DIV in the Ivy Bridge PRM, it
-       * appears that our hardware just does the right thing for signed
-       * remainder.
-       */
-      assert(instr->def.bit_size < 64);
-      emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]);
-      break;
-
-   case nir_op_imod: {
-      /* Get a regular C-style remainder.  If a % b == 0, set the predicate. */
-      inst = emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]);
-
-      /* Math instructions don't support conditional mod */
-      inst = emit(MOV(dst_null_d(), src_reg(dst)));
-      inst->conditional_mod = BRW_CONDITIONAL_NZ;
-
-      /* Now, we need to determine if signs of the sources are different.
-       * When we XOR the sources, the top bit is 0 if they are the same and 1
-       * if they are different.  We can then use a conditional modifier to
-       * turn that into a predicate.  This leads us to an XOR.l instruction.
-       *
-       * Technically, according to the PRM, you're not allowed to use .l on a
-       * XOR instruction.  However, empirical experiments and Curro's reading
-       * of the simulator source both indicate that it's safe.
-       */
-      src_reg tmp = src_reg(this, glsl_ivec4_type());
-      inst = emit(XOR(dst_reg(tmp), op[0], op[1]));
-      inst->predicate = BRW_PREDICATE_NORMAL;
-      inst->conditional_mod = BRW_CONDITIONAL_L;
-
-      /* If the result of the initial remainder operation is non-zero and the
-       * two sources have different signs, add in a copy of op[1] to get the
-       * final integer modulus value.
-       */
-      inst = emit(ADD(dst, src_reg(dst), op[1]));
-      inst->predicate = BRW_PREDICATE_NORMAL;
-      break;
-   }
-
-   case nir_op_ldexp:
-      unreachable("not reached: should be handled by ldexp_to_arith()");
-
-   case nir_op_fsqrt:
-      inst = emit_math(SHADER_OPCODE_SQRT, dst, op[0]);
-      break;
-
-   case nir_op_frsq:
-      inst = emit_math(SHADER_OPCODE_RSQ, dst, op[0]);
-      break;
-
-   case nir_op_fpow:
-      inst = emit_math(SHADER_OPCODE_POW, dst, op[0], op[1]);
-      break;
-
-   case nir_op_uadd_carry: {
-      assert(instr->def.bit_size < 64);
-      struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
-
-      emit(ADDC(dst_null_ud(), op[0], op[1]));
-      emit(MOV(dst, src_reg(acc)));
-      break;
-   }
-
-   case nir_op_usub_borrow: {
-      assert(instr->def.bit_size < 64);
-      struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
-
-      emit(SUBB(dst_null_ud(), op[0], op[1]));
-      emit(MOV(dst, src_reg(acc)));
-      break;
-   }
-
-   case nir_op_ftrunc:
-      inst = emit(RNDZ(dst, op[0]));
-      if (devinfo->ver < 6) {
-         inst->conditional_mod = BRW_CONDITIONAL_R;
-         inst = emit(ADD(dst, src_reg(dst), brw_imm_f(1.0f)));
-         inst->predicate = BRW_PREDICATE_NORMAL;
-         inst = emit(MOV(dst, src_reg(dst))); /* for potential saturation */
-      }
-      break;
-
-   case nir_op_fceil: {
-      src_reg tmp = src_reg(this, glsl_float_type());
-      tmp.swizzle = brw_swizzle_for_size(nir_src_num_components(instr->src[0].src));
-
-      op[0].negate = !op[0].negate;
-      emit(RNDD(dst_reg(tmp), op[0]));
-      tmp.negate = true;
-      inst = emit(MOV(dst, tmp));
-      break;
-   }
-
-   case nir_op_ffloor:
-      inst = emit(RNDD(dst, op[0]));
-      break;
-
-   case nir_op_ffract:
-      inst = emit(FRC(dst, op[0]));
-      break;
-
-   case nir_op_fround_even:
-      inst = emit(RNDE(dst, op[0]));
-      if (devinfo->ver < 6) {
-         inst->conditional_mod = BRW_CONDITIONAL_R;
-         inst = emit(ADD(dst, src_reg(dst), brw_imm_f(1.0f)));
-         inst->predicate = BRW_PREDICATE_NORMAL;
-         inst = emit(MOV(dst, src_reg(dst))); /* for potential saturation */
-      }
-      break;
-
-   case nir_op_fquantize2f16: {
-      /* See also vec4_visitor::emit_pack_half_2x16() */
-      src_reg tmp16 = src_reg(this, glsl_uvec4_type());
-      src_reg tmp32 = src_reg(this, glsl_vec4_type());
-      src_reg zero = src_reg(this, glsl_vec4_type());
-
-      /* Check for denormal */
-      src_reg abs_src0 = op[0];
-      abs_src0.abs = true;
-      emit(CMP(dst_null_f(), abs_src0, brw_imm_f(ldexpf(1.0, -14)),
-               BRW_CONDITIONAL_L));
-      /* Get the appropriately signed zero */
-      emit(AND(retype(dst_reg(zero), BRW_REGISTER_TYPE_UD),
-               retype(op[0], BRW_REGISTER_TYPE_UD),
-               brw_imm_ud(0x80000000)));
-      /* Do the actual F32 -> F16 -> F32 conversion */
-      emit(F32TO16(dst_reg(tmp16), op[0]));
-      emit(F16TO32(dst_reg(tmp32), tmp16));
-      /* Select that or zero based on normal status */
-      inst = emit(BRW_OPCODE_SEL, dst, zero, tmp32);
-      inst->predicate = BRW_PREDICATE_NORMAL;
-      break;
-   }
-
-   case nir_op_imin:
-   case nir_op_umin:
-      assert(instr->def.bit_size < 64);
-      FALLTHROUGH;
-   case nir_op_fmin:
-      try_immediate_source(instr, op, true);
-      inst = emit_minmax(BRW_CONDITIONAL_L, dst, op[0], op[1]);
-      break;
-
-   case nir_op_imax:
-   case nir_op_umax:
-      assert(instr->def.bit_size < 64);
-      FALLTHROUGH;
-   case nir_op_fmax:
-      try_immediate_source(instr, op, true);
-      inst = emit_minmax(BRW_CONDITIONAL_GE, dst, op[0], op[1]);
-      break;
-
-   case nir_op_fddx:
-   case nir_op_fddx_coarse:
-   case nir_op_fddx_fine:
-   case nir_op_fddy:
-   case nir_op_fddy_coarse:
-   case nir_op_fddy_fine:
-      unreachable("derivatives are not valid in vertex shaders");
-
-   case nir_op_ilt32:
-   case nir_op_ult32:
-   case nir_op_ige32:
-   case nir_op_uge32:
-   case nir_op_ieq32:
-   case nir_op_ine32:
-      assert(instr->def.bit_size < 64);
-      FALLTHROUGH;
-   case nir_op_flt32:
-   case nir_op_fge32:
-   case nir_op_feq32:
-   case nir_op_fneu32: {
-      enum brw_conditional_mod conditional_mod =
-         brw_cmod_for_nir_comparison(instr->op);
-
-      if (nir_src_bit_size(instr->src[0].src) < 64) {
-         /* If the order of the sources is changed due to an immediate value,
-          * then the condition must also be changed.
-          */
-         if (try_immediate_source(instr, op, true) == 0)
-            conditional_mod = brw_swap_cmod(conditional_mod);
-
-         emit(CMP(dst, op[0], op[1], conditional_mod));
-      } else {
-         /* Produce a 32-bit boolean result from the DF comparison by selecting
-          * only the low 32-bit in each DF produced. Do this in a temporary
-          * so we can then move from there to the result using align16 again
-          * to honor the original writemask.
-          */
-         dst_reg temp = dst_reg(this, glsl_dvec4_type());
-         emit(CMP(temp, op[0], op[1], conditional_mod));
-         dst_reg result = dst_reg(this, glsl_bvec4_type());
-         emit(VEC4_OPCODE_PICK_LOW_32BIT, result, src_reg(temp));
-         emit(MOV(dst, src_reg(result)));
-      }
-      break;
-   }
-
-   case nir_op_b32all_iequal2:
-   case nir_op_b32all_iequal3:
-   case nir_op_b32all_iequal4:
-      assert(instr->def.bit_size < 64);
-      FALLTHROUGH;
-   case nir_op_b32all_fequal2:
-   case nir_op_b32all_fequal3:
-   case nir_op_b32all_fequal4: {
-      unsigned swiz =
-         brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);
-
-      emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz),
-               brw_cmod_for_nir_comparison(instr->op)));
-      emit(MOV(dst, brw_imm_d(0)));
-      inst = emit(MOV(dst, brw_imm_d(~0)));
-      inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
-      break;
-   }
-
-   case nir_op_b32any_inequal2:
-   case nir_op_b32any_inequal3:
-   case nir_op_b32any_inequal4:
-      assert(instr->def.bit_size < 64);
-      FALLTHROUGH;
-   case nir_op_b32any_fnequal2:
-   case nir_op_b32any_fnequal3:
-   case nir_op_b32any_fnequal4: {
-      unsigned swiz =
-         brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);
-
-      emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz),
-               brw_cmod_for_nir_comparison(instr->op)));
-
-      emit(MOV(dst, brw_imm_d(0)));
-      inst = emit(MOV(dst, brw_imm_d(~0)));
-      inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
-      break;
-   }
-
-   case nir_op_inot:
-      assert(instr->def.bit_size < 64);
-      emit(NOT(dst, op[0]));
-      break;
-
-   case nir_op_ixor:
-      assert(instr->def.bit_size < 64);
-      try_immediate_source(instr, op, true);
-      emit(XOR(dst, op[0], op[1]));
-      break;
-
-   case nir_op_ior:
-      assert(instr->def.bit_size < 64);
-      try_immediate_source(instr, op, true);
-      emit(OR(dst, op[0], op[1]));
-      break;
-
-   case nir_op_iand:
-      assert(instr->def.bit_size < 64);
-      try_immediate_source(instr, op, true);
-      emit(AND(dst, op[0], op[1]));
-      break;
-
-   case nir_op_b2i32:
-   case nir_op_b2f32:
-   case nir_op_b2f64:
-      if (instr->def.bit_size > 32) {
-         assert(dst.type == BRW_REGISTER_TYPE_DF);
-         emit_conversion_to_double(dst, negate(op[0]));
-      } else {
-         emit(MOV(dst, negate(op[0])));
-      }
-      break;
-
-   case nir_op_unpack_half_2x16_split_x:
-   case nir_op_unpack_half_2x16_split_y:
-   case nir_op_pack_half_2x16_split:
-      unreachable("not reached: should not occur in vertex shader");
-
-   case nir_op_unpack_snorm_2x16:
-   case nir_op_unpack_unorm_2x16:
-   case nir_op_pack_snorm_2x16:
-   case nir_op_pack_unorm_2x16:
-      unreachable("not reached: should be handled by lower_packing_builtins");
-
-   case nir_op_pack_uvec4_to_uint:
-      unreachable("not reached");
-
-   case nir_op_pack_uvec2_to_uint: {
-      dst_reg tmp1 = dst_reg(this, glsl_uint_type());
-      tmp1.writemask = WRITEMASK_X;
-      op[0].swizzle = BRW_SWIZZLE_YYYY;
-      emit(SHL(tmp1, op[0], src_reg(brw_imm_ud(16u))));
-
-      dst_reg tmp2 = dst_reg(this, glsl_uint_type());
-      tmp2.writemask = WRITEMASK_X;
-      op[0].swizzle = BRW_SWIZZLE_XXXX;
-      emit(AND(tmp2, op[0], src_reg(brw_imm_ud(0xffffu))));
-
-      emit(OR(dst, src_reg(tmp1), src_reg(tmp2)));
-      break;
-   }
-
-   case nir_op_pack_64_2x32_split: {
-      dst_reg result = dst_reg(this, glsl_dvec4_type());
-      dst_reg tmp = dst_reg(this, glsl_uvec4_type());
-      emit(MOV(tmp, retype(op[0], BRW_REGISTER_TYPE_UD)));
-      emit(VEC4_OPCODE_SET_LOW_32BIT, result, src_reg(tmp));
-      emit(MOV(tmp, retype(op[1], BRW_REGISTER_TYPE_UD)));
-      emit(VEC4_OPCODE_SET_HIGH_32BIT, result, src_reg(tmp));
-      emit(MOV(dst, src_reg(result)));
-      break;
-   }
-
-   case nir_op_unpack_64_2x32_split_x:
-   case nir_op_unpack_64_2x32_split_y: {
-      enum opcode oper = (instr->op == nir_op_unpack_64_2x32_split_x) ?
-         VEC4_OPCODE_PICK_LOW_32BIT : VEC4_OPCODE_PICK_HIGH_32BIT;
-      dst_reg tmp = dst_reg(this, glsl_dvec4_type());
-      emit(MOV(tmp, op[0]));
-      dst_reg tmp2 = dst_reg(this, glsl_uvec4_type());
-      emit(oper, tmp2, src_reg(tmp));
-      emit(MOV(dst, src_reg(tmp2)));
-      break;
-   }
-
-   case nir_op_unpack_half_2x16:
-      /* As NIR does not guarantee that we have a correct swizzle outside the
-       * boundaries of a vector, and the implementation of emit_unpack_half_2x16
-       * uses the source operand in an operation with WRITEMASK_Y while our
-       * source operand has only size 1, it accessed incorrect data producing
-       * regressions in Piglit. We repeat the swizzle of the first component on the
-       * rest of components to avoid regressions. In the vec4_visitor IR code path
-       * this is not needed because the operand has already the correct swizzle.
-       */
-      op[0].swizzle = brw_compose_swizzle(BRW_SWIZZLE_XXXX, op[0].swizzle);
-      emit_unpack_half_2x16(dst, op[0]);
-      break;
-
-   case nir_op_pack_half_2x16:
-      emit_pack_half_2x16(dst, op[0]);
-      break;
-
-   case nir_op_unpack_unorm_4x8:
-      assert(instr->def.bit_size < 64);
-      emit_unpack_unorm_4x8(dst, op[0]);
-      break;
-
-   case nir_op_pack_unorm_4x8:
-      assert(instr->def.bit_size < 64);
-      emit_pack_unorm_4x8(dst, op[0]);
-      break;
-
-   case nir_op_unpack_snorm_4x8:
-      assert(instr->def.bit_size < 64);
-      emit_unpack_snorm_4x8(dst, op[0]);
-      break;
-
-   case nir_op_pack_snorm_4x8:
-      assert(instr->def.bit_size < 64);
-      emit_pack_snorm_4x8(dst, op[0]);
-      break;
-
-   case nir_op_bitfield_reverse:
-      assert(instr->def.bit_size == 32);
-      assert(nir_src_bit_size(instr->src[0].src) == 32);
-      emit(BFREV(dst, op[0]));
-      break;
-
-   case nir_op_bit_count:
-      assert(instr->def.bit_size == 32);
-      assert(nir_src_bit_size(instr->src[0].src) < 64);
-      emit(CBIT(dst, op[0]));
-      break;
-
-   case nir_op_ifind_msb: {
-      assert(instr->def.bit_size == 32);
-      assert(nir_src_bit_size(instr->src[0].src) == 32);
-      assert(devinfo->ver >= 7);
-
-      vec4_builder bld = vec4_builder(this).at_end();
-      src_reg src(dst);
-
-      emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
-
-      /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
-       * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
-       * subtract the result from 31 to convert the MSB count into an LSB
-       * count.
-       */
-      bld.CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
-
-      inst = bld.ADD(dst, src, brw_imm_d(31));
-      inst->predicate = BRW_PREDICATE_NORMAL;
-      inst->src[0].negate = true;
-      break;
-   }
-
-   case nir_op_uclz:
-      assert(instr->def.bit_size == 32);
-      assert(nir_src_bit_size(instr->src[0].src) == 32);
-      emit(LZD(dst, op[0]));
-      break;
-
-   case nir_op_find_lsb:
-      assert(instr->def.bit_size == 32);
-      assert(nir_src_bit_size(instr->src[0].src) == 32);
-      assert(devinfo->ver >= 7);
-      emit(FBL(dst, op[0]));
-      break;
-
-   case nir_op_ubitfield_extract:
-   case nir_op_ibitfield_extract:
-      unreachable("should have been lowered");
-   case nir_op_ubfe:
-   case nir_op_ibfe:
-      assert(instr->def.bit_size < 64);
-      op[0] = fix_3src_operand(op[0]);
-      op[1] = fix_3src_operand(op[1]);
-      op[2] = fix_3src_operand(op[2]);
-
-      emit(BFE(dst, op[2], op[1], op[0]));
-      break;
-
-   case nir_op_bfm:
-      assert(instr->def.bit_size < 64);
-      emit(BFI1(dst, op[0], op[1]));
-      break;
-
-   case nir_op_bfi:
-      assert(instr->def.bit_size < 64);
-      op[0] = fix_3src_operand(op[0]);
-      op[1] = fix_3src_operand(op[1]);
-      op[2] = fix_3src_operand(op[2]);
-
-      emit(BFI2(dst, op[0], op[1], op[2]));
-      break;
-
-   case nir_op_bitfield_insert:
-      unreachable("not reached: should have been lowered");
-
-   case nir_op_fsign:
-       if (type_sz(op[0].type) < 8) {
-         /* AND(val, 0x80000000) gives the sign bit.
-          *
-          * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
-          * zero.
-          */
-         emit(CMP(dst_null_f(), op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ));
-
-         op[0].type = BRW_REGISTER_TYPE_UD;
-         dst.type = BRW_REGISTER_TYPE_UD;
-         emit(AND(dst, op[0], brw_imm_ud(0x80000000u)));
-
-         inst = emit(OR(dst, src_reg(dst), brw_imm_ud(0x3f800000u)));
-         inst->predicate = BRW_PREDICATE_NORMAL;
-         dst.type = BRW_REGISTER_TYPE_F;
-      } else {
-         /* For doubles we do the same but we need to consider:
-          *
-          * - We use a MOV with conditional_mod instead of a CMP so that we can
-          *   skip loading a 0.0 immediate. We use a source modifier on the
-          *   source of the MOV so that we flush denormalized values to 0.
-          *   Since we want to compare against 0, this won't alter the result.
-          * - We need to extract the high 32-bit of each DF where the sign
-          *   is stored.
-          * - We need to produce a DF result.
-          */
-
-         /* Check for zero */
-         src_reg value = op[0];
-         value.abs = true;
-         inst = emit(MOV(dst_null_df(), value));
-         inst->conditional_mod = BRW_CONDITIONAL_NZ;
-
-         /* AND each high 32-bit channel with 0x80000000u */
-         dst_reg tmp = dst_reg(this, glsl_uvec4_type());
-         emit(VEC4_OPCODE_PICK_HIGH_32BIT, tmp, op[0]);
-         emit(AND(tmp, src_reg(tmp), brw_imm_ud(0x80000000u)));
-
-         /* Add 1.0 to each channel, predicated to skip the cases where the
-          * channel's value was 0
-          */
-         inst = emit(OR(tmp, src_reg(tmp), brw_imm_ud(0x3f800000u)));
-         inst->predicate = BRW_PREDICATE_NORMAL;
-
-         /* Now convert the result from float to double */
-         emit_conversion_to_double(dst, retype(src_reg(tmp),
-                                               BRW_REGISTER_TYPE_F));
-      }
-      break;
-
-   case nir_op_ishl:
-      assert(instr->def.bit_size < 64);
-      try_immediate_source(instr, op, false);
-      emit(SHL(dst, op[0], op[1]));
-      break;
-
-   case nir_op_ishr:
-      assert(instr->def.bit_size < 64);
-      try_immediate_source(instr, op, false);
-      emit(ASR(dst, op[0], op[1]));
-      break;
-
-   case nir_op_ushr:
-      assert(instr->def.bit_size < 64);
-      try_immediate_source(instr, op, false);
-      emit(SHR(dst, op[0], op[1]));
-      break;
-
-   case nir_op_ffma:
-      if (type_sz(dst.type) == 8) {
-         dst_reg mul_dst = dst_reg(this, glsl_dvec4_type());
-         emit(MUL(mul_dst, op[1], op[0]));
-         inst = emit(ADD(dst, src_reg(mul_dst), op[2]));
-      } else {
-         fix_float_operands(op, instr);
-         inst = emit(MAD(dst, op[2], op[1], op[0]));
-      }
-      break;
-
-   case nir_op_flrp:
-      fix_float_operands(op, instr);
-      inst = emit(LRP(dst, op[2], op[1], op[0]));
-      break;
-
-   case nir_op_b32csel:
-      enum brw_predicate predicate;
-      if (!optimize_predicate(instr, &predicate)) {
-         emit(CMP(dst_null_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ));
-         switch (dst.writemask) {
-         case WRITEMASK_X:
-            predicate = BRW_PREDICATE_ALIGN16_REPLICATE_X;
-            break;
-         case WRITEMASK_Y:
-            predicate = BRW_PREDICATE_ALIGN16_REPLICATE_Y;
-            break;
-         case WRITEMASK_Z:
-            predicate = BRW_PREDICATE_ALIGN16_REPLICATE_Z;
-            break;
-         case WRITEMASK_W:
-            predicate = BRW_PREDICATE_ALIGN16_REPLICATE_W;
-            break;
-         default:
-            predicate = BRW_PREDICATE_NORMAL;
-            break;
-         }
-      }
-      inst = emit(BRW_OPCODE_SEL, dst, op[1], op[2]);
-      inst->predicate = predicate;
-      break;
-
-   case nir_op_fdot2_replicated:
-      try_immediate_source(instr, op, true);
-      inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
-      break;
-
-   case nir_op_fdot3_replicated:
-      try_immediate_source(instr, op, true);
-      inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
-      break;
-
-   case nir_op_fdot4_replicated:
-      try_immediate_source(instr, op, true);
-      inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
-      break;
-
-   case nir_op_fdph_replicated:
-      try_immediate_source(instr, op, false);
-      inst = emit(BRW_OPCODE_DPH, dst, op[0], op[1]);
-      break;
-
-   case nir_op_fdiv:
-      unreachable("not reached: should be lowered by lower_fdiv in the compiler");
-
-   case nir_op_fmod:
-      unreachable("not reached: should be lowered by lower_fmod in the compiler");
-
-   case nir_op_fsub:
-   case nir_op_isub:
-      unreachable("not reached: should be handled by ir_sub_to_add_neg");
-
-   default:
-      unreachable("Unimplemented ALU operation");
-   }
-
-   /* If we need to do a boolean resolve, replace the result with -(x & 1)
-    * to sign extend the low bit to 0/~0
-    */
-   if (devinfo->ver <= 5 &&
-       (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) ==
-       BRW_NIR_BOOLEAN_NEEDS_RESOLVE) {
-      dst_reg masked = dst_reg(this, glsl_int_type());
-      masked.writemask = dst.writemask;
-      emit(AND(masked, src_reg(dst), brw_imm_d(1)));
-      src_reg masked_neg = src_reg(masked);
-      masked_neg.negate = true;
-      emit(MOV(retype(dst, BRW_REGISTER_TYPE_D), masked_neg));
-   }
-}
-
-void
-vec4_visitor::nir_emit_jump(nir_jump_instr *instr)
-{
-   switch (instr->type) {
-   case nir_jump_break:
-      emit(BRW_OPCODE_BREAK);
-      break;
-
-   case nir_jump_continue:
-      emit(BRW_OPCODE_CONTINUE);
-      break;
-
-   case nir_jump_return:
-      FALLTHROUGH;
-   default:
-      unreachable("unknown jump");
-   }
-}
-
-static bool
-is_high_sampler(const struct intel_device_info *devinfo, src_reg sampler)
-{
-   if (devinfo->verx10 != 75)
-      return false;
-
-   return sampler.file != IMM || sampler.ud >= 16;
-}
-
-void
-vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
-{
-   unsigned texture = instr->texture_index;
-   unsigned sampler = instr->sampler_index;
-   src_reg texture_reg = brw_imm_ud(texture);
-   src_reg sampler_reg = brw_imm_ud(sampler);
-   src_reg coordinate;
-   const glsl_type *coord_type = NULL;
-   src_reg shadow_comparator;
-   src_reg offset_value;
-   src_reg lod, lod2;
-   src_reg sample_index;
-   src_reg mcs;
-
-   dst_reg dest = get_nir_def(instr->def, instr->dest_type);
-
-   /* The hardware requires a LOD for buffer textures */
-   if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
-      lod = brw_imm_d(0);
-
-   /* Load the texture operation sources */
-   uint32_t constant_offset = 0;
-   for (unsigned i = 0; i < instr->num_srcs; i++) {
-      switch (instr->src[i].src_type) {
-      case nir_tex_src_comparator:
-         shadow_comparator = get_nir_src(instr->src[i].src,
-                                         BRW_REGISTER_TYPE_F, 1);
-         break;
-
-      case nir_tex_src_coord: {
-         unsigned src_size = nir_tex_instr_src_size(instr, i);
-
-         switch (instr->op) {
-         case nir_texop_txf:
-         case nir_texop_txf_ms:
-         case nir_texop_samples_identical:
-            coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D,
-                                     src_size);
-            coord_type = glsl_ivec_type(src_size);
-            break;
-
-         default:
-            coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F,
-                                     src_size);
-            coord_type = glsl_vec_type(src_size);
-            break;
-         }
-         break;
-      }
-
-      case nir_tex_src_ddx:
-         lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F,
-                           nir_tex_instr_src_size(instr, i));
-         break;
-
-      case nir_tex_src_ddy:
-         lod2 = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F,
-                           nir_tex_instr_src_size(instr, i));
-         break;
-
-      case nir_tex_src_lod:
-         switch (instr->op) {
-         case nir_texop_txs:
-         case nir_texop_txf:
-            lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1);
-            break;
-
-         default:
-            lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F, 1);
-            break;
-         }
-         break;
-
-      case nir_tex_src_ms_index: {
-         sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1);
-         break;
-      }
-
-      case nir_tex_src_offset:
-         if (!brw_texture_offset(instr, i, &constant_offset)) {
-            offset_value =
-               get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2);
-         }
-         break;
-
-      case nir_tex_src_texture_offset: {
-         assert(texture_reg.is_zero());
-         texture_reg = emit_uniformize(get_nir_src(instr->src[i].src,
-                                                   BRW_REGISTER_TYPE_UD, 1));
-         break;
-      }
-
-      case nir_tex_src_sampler_offset: {
-         assert(sampler_reg.is_zero());
-         sampler_reg = emit_uniformize(get_nir_src(instr->src[i].src,
-                                                   BRW_REGISTER_TYPE_UD, 1));
-         break;
-      }
-
-      case nir_tex_src_projector:
-         unreachable("Should be lowered by nir_lower_tex");
-
-      case nir_tex_src_bias:
-         unreachable("LOD bias is not valid for vertex shaders.\n");
-
-      default:
-         unreachable("unknown texture source");
-      }
-   }
-
-   if (instr->op == nir_texop_txf_ms ||
-       instr->op == nir_texop_samples_identical) {
-      assert(coord_type != NULL);
-      if (devinfo->ver >= 7) {
-         mcs = emit_mcs_fetch(coord_type, coordinate, texture_reg);
-      } else {
-         mcs = brw_imm_ud(0u);
-      }
-   }
-
-   /* Stuff the channel select bits in the top of the texture offset */
-   if (instr->op == nir_texop_tg4) {
-      if (instr->component == 1 &&
-          (key_tex->gather_channel_quirk_mask & (1 << texture))) {
-         /* gather4 sampler is broken for green channel on RG32F --
-          * we must ask for blue instead.
-          */
-         constant_offset |= 2 << 16;
-      } else {
-         constant_offset |= instr->component << 16;
-      }
-   }
-
-   enum opcode opcode;
-   switch (instr->op) {
-   case nir_texop_tex:             opcode = SHADER_OPCODE_TXL;        break;
-   case nir_texop_txl:             opcode = SHADER_OPCODE_TXL;        break;
-   case nir_texop_txd:             opcode = SHADER_OPCODE_TXD;        break;
-   case nir_texop_txf:             opcode = SHADER_OPCODE_TXF;        break;
-   case nir_texop_txf_ms:          opcode = SHADER_OPCODE_TXF_CMS;    break;
-   case nir_texop_txs:             opcode = SHADER_OPCODE_TXS;        break;
-   case nir_texop_query_levels:    opcode = SHADER_OPCODE_TXS;        break;
-   case nir_texop_texture_samples: opcode = SHADER_OPCODE_SAMPLEINFO; break;
-   case nir_texop_tg4:
-      opcode = offset_value.file != BAD_FILE ? SHADER_OPCODE_TG4_OFFSET
-                                             : SHADER_OPCODE_TG4;
-      break;
-   case nir_texop_samples_identical: {
-      /* There are some challenges implementing this for vec4, and it seems
-       * unlikely to be used anyway.  For now, just return false ways.
-       */
-      emit(MOV(dest, brw_imm_ud(0u)));
-      return;
-   }
-   case nir_texop_txb:
-   case nir_texop_lod:
-      unreachable("Implicit LOD is only valid inside fragment shaders.");
-   default:
-      unreachable("Unrecognized tex op");
-   }
-
-   vec4_instruction *inst = new(mem_ctx) vec4_instruction(opcode, dest);
-
-   inst->offset = constant_offset;
-
-   /* The message header is necessary for:
-    * - Gfx4 (always)
-    * - Texel offsets
-    * - Gather channel selection
-    * - Sampler indices too large to fit in a 4-bit value.
-    * - Sampleinfo message - takes no parameters, but mlen = 0 is illegal
-    */
-   inst->header_size =
-      (devinfo->ver < 5 ||
-       inst->offset != 0 ||
-       opcode == SHADER_OPCODE_TG4 ||
-       opcode == SHADER_OPCODE_TG4_OFFSET ||
-       opcode == SHADER_OPCODE_SAMPLEINFO ||
-       is_high_sampler(devinfo, sampler_reg)) ? 1 : 0;
-   inst->base_mrf = 2;
-   inst->mlen = inst->header_size;
-   inst->dst.writemask = WRITEMASK_XYZW;
-   inst->shadow_compare = shadow_comparator.file != BAD_FILE;
-
-   inst->src[1] = texture_reg;
-   inst->src[2] = sampler_reg;
-
-   /* MRF for the first parameter */
-   int param_base = inst->base_mrf + inst->header_size;
-
-   if (opcode == SHADER_OPCODE_TXS) {
-      int writemask = devinfo->ver == 4 ? WRITEMASK_W : WRITEMASK_X;
-      emit(MOV(dst_reg(MRF, param_base, lod.type, writemask), lod));
-      inst->mlen++;
-   } else if (opcode == SHADER_OPCODE_SAMPLEINFO) {
-      inst->dst.writemask = WRITEMASK_X;
-   } else {
-      /* Load the coordinate */
-      /* FINISHME: gl_clamp_mask and saturate */
-      int coord_mask = (1 << instr->coord_components) - 1;
-      int zero_mask = 0xf & ~coord_mask;
-
-      emit(MOV(dst_reg(MRF, param_base, coordinate.type, coord_mask),
-               coordinate));
-      inst->mlen++;
-
-      if (zero_mask != 0) {
-         emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
-                  brw_imm_d(0)));
-      }
-      /* Load the shadow comparator */
-      if (shadow_comparator.file != BAD_FILE &&
-          opcode != SHADER_OPCODE_TXD &&
-          opcode != SHADER_OPCODE_TG4_OFFSET) {
-	 emit(MOV(dst_reg(MRF, param_base + 1, shadow_comparator.type,
-			  WRITEMASK_X),
-		  shadow_comparator));
-	 inst->mlen++;
-      }
-
-      /* Load the LOD info */
-      switch (opcode) {
-      case SHADER_OPCODE_TXL: {
-	 int mrf, writemask;
-	 if (devinfo->ver >= 5) {
-	    mrf = param_base + 1;
-	    if (shadow_comparator.file != BAD_FILE) {
-	       writemask = WRITEMASK_Y;
-	       /* mlen already incremented */
-	    } else {
-	       writemask = WRITEMASK_X;
-	       inst->mlen++;
-	    }
-	 } else /* devinfo->ver == 4 */ {
-	    mrf = param_base;
-	    writemask = WRITEMASK_W;
-	 }
-	 emit(MOV(dst_reg(MRF, mrf, lod.type, writemask), lod));
-         break;
-      }
-
-      case SHADER_OPCODE_TXF:
-         emit(MOV(dst_reg(MRF, param_base, lod.type, WRITEMASK_W), lod));
-         break;
-
-      case SHADER_OPCODE_TXF_CMS:
-         emit(MOV(dst_reg(MRF, param_base + 1, sample_index.type, WRITEMASK_X),
-                  sample_index));
-         if (devinfo->ver >= 7) {
-            /* MCS data is in the first channel of `mcs`, but we need to get it into
-             * the .y channel of the second vec4 of params, so replicate .x across
-             * the whole vec4 and then mask off everything except .y
-             */
-            mcs.swizzle = BRW_SWIZZLE_XXXX;
-            emit(MOV(dst_reg(MRF, param_base + 1, glsl_uint_type(), WRITEMASK_Y),
-                     mcs));
-         }
-         inst->mlen++;
-         break;
-
-      case SHADER_OPCODE_TXD: {
-         const brw_reg_type type = lod.type;
-
-	 if (devinfo->ver >= 5) {
-	    lod.swizzle = BRW_SWIZZLE4(BRW_SWIZZLE_X,BRW_SWIZZLE_X,BRW_SWIZZLE_Y,BRW_SWIZZLE_Y);
-	    lod2.swizzle = BRW_SWIZZLE4(BRW_SWIZZLE_X,BRW_SWIZZLE_X,BRW_SWIZZLE_Y,BRW_SWIZZLE_Y);
-	    emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), lod));
-	    emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), lod2));
-	    inst->mlen++;
-
-	    if (nir_tex_instr_dest_size(instr) == 3 ||
-                shadow_comparator.file != BAD_FILE) {
-	       lod.swizzle = BRW_SWIZZLE_ZZZZ;
-	       lod2.swizzle = BRW_SWIZZLE_ZZZZ;
-	       emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), lod));
-	       emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), lod2));
-	       inst->mlen++;
-
-               if (shadow_comparator.file != BAD_FILE) {
-                  emit(MOV(dst_reg(MRF, param_base + 2,
-                                   shadow_comparator.type, WRITEMASK_Z),
-                           shadow_comparator));
-               }
-	    }
-	 } else /* devinfo->ver == 4 */ {
-	    emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), lod));
-	    emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), lod2));
-	    inst->mlen += 2;
-	 }
-         break;
-      }
-
-      case SHADER_OPCODE_TG4_OFFSET:
-         if (shadow_comparator.file != BAD_FILE) {
-            emit(MOV(dst_reg(MRF, param_base, shadow_comparator.type, WRITEMASK_W),
-                     shadow_comparator));
-         }
-
-         emit(MOV(dst_reg(MRF, param_base + 1, glsl_ivec2_type(), WRITEMASK_XY),
-                  offset_value));
-         inst->mlen++;
-         break;
-
-      default:
-         break;
-      }
-   }
-
-   emit(inst);
-
-   /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
-    * spec requires layers.
-    */
-   if (instr->op == nir_texop_txs && devinfo->ver < 7) {
-      /* Gfx4-6 return 0 instead of 1 for single layer surfaces. */
-      emit_minmax(BRW_CONDITIONAL_GE, writemask(inst->dst, WRITEMASK_Z),
-                  src_reg(inst->dst), brw_imm_d(1));
-   }
-
-   if (instr->op == nir_texop_query_levels) {
-      /* # levels is in .w */
-      src_reg swizzled(dest);
-      swizzled.swizzle = BRW_SWIZZLE4(BRW_SWIZZLE_W, BRW_SWIZZLE_W,
-                                      BRW_SWIZZLE_W, BRW_SWIZZLE_W);
-      emit(MOV(dest, swizzled));
-   }
-}
-
-src_reg
-vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
-                             src_reg coordinate, src_reg surface)
-{
-   vec4_instruction *inst =
-      new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS,
-                                    dst_reg(this, glsl_uvec4_type()));
-   inst->base_mrf = 2;
-   inst->src[1] = surface;
-   inst->src[2] = brw_imm_ud(0); /* sampler */
-   inst->mlen = 1;
-
-   const int param_base = inst->base_mrf;
-
-   /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
-   int coord_mask = (1 << coordinate_type->vector_elements) - 1;
-   int zero_mask = 0xf & ~coord_mask;
-
-   emit(MOV(dst_reg(MRF, param_base, coordinate_type, coord_mask),
-            coordinate));
-
-   emit(MOV(dst_reg(MRF, param_base, coordinate_type, zero_mask),
-            brw_imm_d(0)));
-
-   emit(inst);
-   return src_reg(inst->dst);
-}
-
-void
-vec4_visitor::nir_emit_undef(nir_undef_instr *instr)
-{
-   nir_ssa_values[instr->def.index] =
-      dst_reg(VGRF, alloc.allocate(DIV_ROUND_UP(instr->def.bit_size, 32)));
-}
-
-/* SIMD4x2 64bit data is stored in register space like this:
- *
- * r0.0:DF  x0 y0 z0 w0
- * r1.0:DF  x1 y1 z1 w1
- *
- * When we need to write data such as this to memory using 32-bit write
- * messages we need to shuffle it in this fashion:
- *
- * r0.0:DF  x0 y0 x1 y1 (to be written at base offset)
- * r0.0:DF  z0 w0 z1 w1 (to be written at base offset + 16)
- *
- * We need to do the inverse operation when we read using 32-bit messages,
- * which we can do by applying the same exact shuffling on the 64-bit data
- * read, only that because the data for each vertex is positioned differently
- * we need to apply different channel enables.
- *
- * This function takes 64bit data and shuffles it as explained above.
- *
- * The @for_write parameter is used to specify if the shuffling is being done
- * for proper SIMD4x2 64-bit data that needs to be shuffled prior to a 32-bit
- * write message (for_write = true), or instead we are doing the inverse
- * operation and we have just read 64-bit data using a 32-bit messages that we
- * need to shuffle to create valid SIMD4x2 64-bit data (for_write = false).
- *
- * If @block and @ref are non-NULL, then the shuffling is done after @ref,
- * otherwise the instructions are emitted normally at the end. The function
- * returns the last instruction inserted.
- *
- * Notice that @src and @dst cannot be the same register.
- */
-vec4_instruction *
-vec4_visitor::shuffle_64bit_data(dst_reg dst, src_reg src, bool for_write,
-                                 bool for_scratch,
-                                 bblock_t *block, vec4_instruction *ref)
-{
-   assert(type_sz(src.type) == 8);
-   assert(type_sz(dst.type) == 8);
-   assert(!regions_overlap(dst, 2 * REG_SIZE, src, 2 * REG_SIZE));
-   assert(!ref == !block);
-
-   opcode mov_op = for_scratch ? VEC4_OPCODE_MOV_FOR_SCRATCH : BRW_OPCODE_MOV;
-
-   const vec4_builder bld = !ref ? vec4_builder(this).at_end() :
-                                   vec4_builder(this).at(block, ref->next);
-
-   /* Resolve swizzle in src */
-   if (src.swizzle != BRW_SWIZZLE_XYZW) {
-      dst_reg data = dst_reg(this, glsl_dvec4_type());
-      bld.emit(mov_op, data, src);
-      src = src_reg(data);
-   }
-
-   /* dst+0.XY = src+0.XY */
-   bld.group(4, 0).emit(mov_op, writemask(dst, WRITEMASK_XY), src);
-
-   /* dst+0.ZW = src+1.XY */
-   bld.group(4, for_write ? 1 : 0)
-            .emit(mov_op, writemask(dst, WRITEMASK_ZW),
-                  swizzle(byte_offset(src, REG_SIZE), BRW_SWIZZLE_XYXY));
-
-   /* dst+1.XY = src+0.ZW */
-   bld.group(4, for_write ? 0 : 1)
-            .emit(mov_op, writemask(byte_offset(dst, REG_SIZE), WRITEMASK_XY),
-                  swizzle(src, BRW_SWIZZLE_ZWZW));
-
-   /* dst+1.ZW = src+1.ZW */
-   return bld.group(4, 1)
-            .emit(mov_op, writemask(byte_offset(dst, REG_SIZE), WRITEMASK_ZW),
-                  byte_offset(src, REG_SIZE));
-}
-
-}
diff --git a/src/intel/compiler/brw_vec4_reg_allocate.cpp b/src/intel/compiler/brw_vec4_reg_allocate.cpp
deleted file mode 100644
index 8ba1e80b9a5..00000000000
--- a/src/intel/compiler/brw_vec4_reg_allocate.cpp
+++ /dev/null
@@ -1,512 +0,0 @@
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "util/register_allocate.h"
-#include "brw_vec4.h"
-#include "brw_cfg.h"
-
-using namespace brw;
-
-#define REG_CLASS_COUNT 20
-
-namespace brw {
-
-static void
-assign(unsigned int *reg_hw_locations, backend_reg *reg)
-{
-   if (reg->file == VGRF) {
-      reg->nr = reg_hw_locations[reg->nr] + reg->offset / REG_SIZE;
-      reg->offset %= REG_SIZE;
-   }
-}
-
-bool
-vec4_visitor::reg_allocate_trivial()
-{
-   unsigned int hw_reg_mapping[this->alloc.count];
-   bool virtual_grf_used[this->alloc.count];
-   int next;
-
-   /* Calculate which virtual GRFs are actually in use after whatever
-    * optimization passes have occurred.
-    */
-   for (unsigned i = 0; i < this->alloc.count; i++) {
-      virtual_grf_used[i] = false;
-   }
-
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      if (inst->dst.file == VGRF)
-         virtual_grf_used[inst->dst.nr] = true;
-
-      for (unsigned i = 0; i < 3; i++) {
-	 if (inst->src[i].file == VGRF)
-            virtual_grf_used[inst->src[i].nr] = true;
-      }
-   }
-
-   hw_reg_mapping[0] = this->first_non_payload_grf;
-   next = hw_reg_mapping[0] + this->alloc.sizes[0];
-   for (unsigned i = 1; i < this->alloc.count; i++) {
-      if (virtual_grf_used[i]) {
-	 hw_reg_mapping[i] = next;
-	 next += this->alloc.sizes[i];
-      }
-   }
-   prog_data->total_grf = next;
-
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      assign(hw_reg_mapping, &inst->dst);
-      assign(hw_reg_mapping, &inst->src[0]);
-      assign(hw_reg_mapping, &inst->src[1]);
-      assign(hw_reg_mapping, &inst->src[2]);
-   }
-
-   if (prog_data->total_grf > max_grf) {
-      fail("Ran out of regs on trivial allocator (%d/%d)\n",
-	   prog_data->total_grf, max_grf);
-      return false;
-   }
-
-   return true;
-}
-
-extern "C" void
-brw_vec4_alloc_reg_set(struct brw_compiler *compiler)
-{
-   int base_reg_count =
-      compiler->devinfo->ver >= 7 ? GFX7_MRF_HACK_START : BRW_MAX_GRF;
-
-   assert(compiler->devinfo->ver < 8);
-
-   /* After running split_virtual_grfs(), almost all VGRFs will be of size 1.
-    * SEND-from-GRF sources cannot be split, so we also need classes for each
-    * potential message length.
-    */
-   assert(REG_CLASS_COUNT == MAX_VGRF_SIZE(compiler->devinfo));
-   int class_sizes[REG_CLASS_COUNT];
-
-   for (int i = 0; i < REG_CLASS_COUNT; i++)
-      class_sizes[i] = i + 1;
-
-
-   ralloc_free(compiler->vec4_reg_set.regs);
-   compiler->vec4_reg_set.regs = ra_alloc_reg_set(compiler, base_reg_count, false);
-   if (compiler->devinfo->ver >= 6)
-      ra_set_allocate_round_robin(compiler->vec4_reg_set.regs);
-   ralloc_free(compiler->vec4_reg_set.classes);
-   compiler->vec4_reg_set.classes = ralloc_array(compiler, struct ra_class *, REG_CLASS_COUNT);
-
-   /* Now, add the registers to their classes, and add the conflicts
-    * between them and the base GRF registers (and also each other).
-    */
-   for (int i = 0; i < REG_CLASS_COUNT; i++) {
-      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
-      compiler->vec4_reg_set.classes[i] =
-         ra_alloc_contig_reg_class(compiler->vec4_reg_set.regs, class_sizes[i]);
-
-      for (int j = 0; j < class_reg_count; j++)
-         ra_class_add_reg(compiler->vec4_reg_set.classes[i], j);
-   }
-
-   ra_set_finalize(compiler->vec4_reg_set.regs, NULL);
-}
-
-void
-vec4_visitor::setup_payload_interference(struct ra_graph *g,
-                                         int first_payload_node,
-                                         int reg_node_count)
-{
-   int payload_node_count = this->first_non_payload_grf;
-
-   for (int i = 0; i < payload_node_count; i++) {
-      /* Mark each payload reg node as being allocated to its physical register.
-       *
-       * The alternative would be to have per-physical register classes, which
-       * would just be silly.
-       */
-      ra_set_node_reg(g, first_payload_node + i, i);
-
-      /* For now, just mark each payload node as interfering with every other
-       * node to be allocated.
-       */
-      for (int j = 0; j < reg_node_count; j++) {
-         ra_add_node_interference(g, first_payload_node + i, j);
-      }
-   }
-}
-
-bool
-vec4_visitor::reg_allocate()
-{
-   unsigned int hw_reg_mapping[alloc.count];
-   int payload_reg_count = this->first_non_payload_grf;
-
-   /* Using the trivial allocator can be useful in debugging undefined
-    * register access as a result of broken optimization passes.
-    */
-   if (0)
-      return reg_allocate_trivial();
-
-   assert(devinfo->ver < 8);
-
-   const vec4_live_variables &live = live_analysis.require();
-   int node_count = alloc.count;
-   int first_payload_node = node_count;
-   node_count += payload_reg_count;
-   struct ra_graph *g =
-      ra_alloc_interference_graph(compiler->vec4_reg_set.regs, node_count);
-
-   for (unsigned i = 0; i < alloc.count; i++) {
-      int size = this->alloc.sizes[i];
-      assert(size >= 1 && size <= MAX_VGRF_SIZE(devinfo));
-      ra_set_node_class(g, i, compiler->vec4_reg_set.classes[size - 1]);
-
-      for (unsigned j = 0; j < i; j++) {
-	 if (live.vgrfs_interfere(i, j)) {
-	    ra_add_node_interference(g, i, j);
-	 }
-      }
-   }
-
-   /* Certain instructions can't safely use the same register for their
-    * sources and destination.  Add interference.
-    */
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      if (inst->dst.file == VGRF && inst->has_source_and_destination_hazard()) {
-         for (unsigned i = 0; i < 3; i++) {
-            if (inst->src[i].file == VGRF) {
-               ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr);
-            }
-         }
-      }
-   }
-
-   setup_payload_interference(g, first_payload_node, node_count);
-
-   if (!ra_allocate(g)) {
-      /* Failed to allocate registers.  Spill a reg, and the caller will
-       * loop back into here to try again.
-       */
-      int reg = choose_spill_reg(g);
-      if (this->no_spills) {
-         fail("Failure to register allocate.  Reduce number of live "
-              "values to avoid this.");
-      } else if (reg == -1) {
-         fail("no register to spill\n");
-      } else {
-         spill_reg(reg);
-      }
-      ralloc_free(g);
-      return false;
-   }
-
-   /* Get the chosen virtual registers for each node, and map virtual
-    * regs in the register classes back down to real hardware reg
-    * numbers.
-    */
-   prog_data->total_grf = payload_reg_count;
-   for (unsigned i = 0; i < alloc.count; i++) {
-      hw_reg_mapping[i] = ra_get_node_reg(g, i);
-      prog_data->total_grf = MAX2(prog_data->total_grf,
-				  hw_reg_mapping[i] + alloc.sizes[i]);
-   }
-
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      assign(hw_reg_mapping, &inst->dst);
-      assign(hw_reg_mapping, &inst->src[0]);
-      assign(hw_reg_mapping, &inst->src[1]);
-      assign(hw_reg_mapping, &inst->src[2]);
-   }
-
-   ralloc_free(g);
-
-   return true;
-}
-
-/**
- * When we decide to spill a register, instead of blindly spilling every use,
- * save unspills when the spill register is used (read) in consecutive
- * instructions. This can potentially save a bunch of unspills that would
- * have very little impact in register allocation anyway.
- *
- * Notice that we need to account for this behavior when spilling a register
- * and when evaluating spilling costs. This function is designed so it can
- * be called from both places and avoid repeating the logic.
- *
- *  - When we call this function from spill_reg(), we pass in scratch_reg the
- *    actual unspill/spill register that we want to reuse in the current
- *    instruction.
- *
- *  - When we call this from evaluate_spill_costs(), we pass the register for
- *    which we are evaluating spilling costs.
- *
- * In either case, we check if the previous instructions read scratch_reg until
- * we find one that writes to it with a compatible mask or does not read/write
- * scratch_reg at all.
- */
-static bool
-can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
-                           unsigned scratch_reg)
-{
-   assert(inst->src[i].file == VGRF);
-   bool prev_inst_read_scratch_reg = false;
-
-   /* See if any previous source in the same instructions reads scratch_reg */
-   for (unsigned n = 0; n < i; n++) {
-      if (inst->src[n].file == VGRF && inst->src[n].nr == scratch_reg)
-         prev_inst_read_scratch_reg = true;
-   }
-
-   /* Now check if previous instructions read/write scratch_reg */
-   for (vec4_instruction *prev_inst = (vec4_instruction *) inst->prev;
-        !prev_inst->is_head_sentinel();
-        prev_inst = (vec4_instruction *) prev_inst->prev) {
-
-      /* If the previous instruction writes to scratch_reg then we can reuse
-       * it if the write is not conditional and the channels we write are
-       * compatible with our read mask
-       */
-      if (prev_inst->dst.file == VGRF && prev_inst->dst.nr == scratch_reg) {
-         return (!prev_inst->predicate || prev_inst->opcode == BRW_OPCODE_SEL) &&
-                (brw_mask_for_swizzle(inst->src[i].swizzle) &
-                 ~prev_inst->dst.writemask) == 0;
-      }
-
-      /* Skip scratch read/writes so that instructions generated by spilling
-       * other registers (that won't read/write scratch_reg) do not stop us from
-       * reusing scratch_reg for this instruction.
-       */
-      if (prev_inst->opcode == SHADER_OPCODE_GFX4_SCRATCH_WRITE ||
-          prev_inst->opcode == SHADER_OPCODE_GFX4_SCRATCH_READ)
-         continue;
-
-      /* If the previous instruction does not write to scratch_reg, then check
-       * if it reads it
-       */
-      int n;
-      for (n = 0; n < 3; n++) {
-         if (prev_inst->src[n].file == VGRF &&
-             prev_inst->src[n].nr == scratch_reg) {
-            prev_inst_read_scratch_reg = true;
-            break;
-         }
-      }
-      if (n == 3) {
-         /* The previous instruction does not read scratch_reg. At this point,
-          * if no previous instruction has read scratch_reg it means that we
-          * will need to unspill it here and we can't reuse it (so we return
-          * false). Otherwise, if we found at least one consecutive instruction
-          * that read scratch_reg, then we know that we got here from
-          * evaluate_spill_costs (since for the spill_reg path any block of
-          * consecutive instructions using scratch_reg must start with a write
-          * to that register, so we would've exited the loop in the check for
-          * the write that we have at the start of this loop), and in that case
-          * it means that we found the point at which the scratch_reg would be
-          * unspilled. Since we always unspill a full vec4, it means that we
-          * have all the channels available and we can just return true to
-          * signal that we can reuse the register in the current instruction
-          * too.
-          */
-         return prev_inst_read_scratch_reg;
-      }
-   }
-
-   return prev_inst_read_scratch_reg;
-}
-
-static inline float
-spill_cost_for_type(enum brw_reg_type type)
-{
-   /* Spilling of a 64-bit register involves emitting 2 32-bit scratch
-    * messages plus the 64b/32b shuffling code.
-    */
-   return type_sz(type) == 8 ? 2.25f : 1.0f;
-}
-
-void
-vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
-{
-   float loop_scale = 1.0;
-
-   unsigned *reg_type_size = (unsigned *)
-      ralloc_size(NULL, this->alloc.count * sizeof(unsigned));
-
-   for (unsigned i = 0; i < this->alloc.count; i++) {
-      spill_costs[i] = 0.0;
-      no_spill[i] = alloc.sizes[i] != 1 && alloc.sizes[i] != 2;
-      reg_type_size[i] = 0;
-   }
-
-   /* Calculate costs for spilling nodes.  Call it a cost of 1 per
-    * spill/unspill we'll have to do, and guess that the insides of
-    * loops run 10 times.
-    */
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      for (unsigned int i = 0; i < 3; i++) {
-         if (inst->src[i].file == VGRF && !no_spill[inst->src[i].nr]) {
-            /* We will only unspill src[i] it it wasn't unspilled for the
-             * previous instruction, in which case we'll just reuse the scratch
-             * reg for this instruction.
-             */
-            if (!can_use_scratch_for_source(inst, i, inst->src[i].nr)) {
-               spill_costs[inst->src[i].nr] +=
-                  loop_scale * spill_cost_for_type(inst->src[i].type);
-               if (inst->src[i].reladdr ||
-                   inst->src[i].offset >= REG_SIZE)
-                  no_spill[inst->src[i].nr] = true;
-
-               /* We don't support unspills of partial DF reads.
-                *
-                * Our 64-bit unspills are implemented with two 32-bit scratch
-                * messages, each one reading that for both SIMD4x2 threads that
-                * we need to shuffle into correct 64-bit data. Ensure that we
-                * are reading data for both threads.
-                */
-               if (type_sz(inst->src[i].type) == 8 && inst->exec_size != 8)
-                  no_spill[inst->src[i].nr] = true;
-            }
-
-            /* We can't spill registers that mix 32-bit and 64-bit access (that
-             * contain 64-bit data that is operated on via 32-bit instructions)
-             */
-            unsigned type_size = type_sz(inst->src[i].type);
-            if (reg_type_size[inst->src[i].nr] == 0)
-               reg_type_size[inst->src[i].nr] = type_size;
-            else if (reg_type_size[inst->src[i].nr] != type_size)
-               no_spill[inst->src[i].nr] = true;
-         }
-      }
-
-      if (inst->dst.file == VGRF && !no_spill[inst->dst.nr]) {
-         spill_costs[inst->dst.nr] +=
-            loop_scale * spill_cost_for_type(inst->dst.type);
-         if (inst->dst.reladdr || inst->dst.offset >= REG_SIZE)
-            no_spill[inst->dst.nr] = true;
-
-         /* We don't support spills of partial DF writes.
-          *
-          * Our 64-bit spills are implemented with two 32-bit scratch messages,
-          * each one writing that for both SIMD4x2 threads. Ensure that we
-          * are writing data for both threads.
-          */
-         if (type_sz(inst->dst.type) == 8 && inst->exec_size != 8)
-            no_spill[inst->dst.nr] = true;
-
-         /* We can't spill registers that mix 32-bit and 64-bit access (that
-          * contain 64-bit data that is operated on via 32-bit instructions)
-          */
-         unsigned type_size = type_sz(inst->dst.type);
-         if (reg_type_size[inst->dst.nr] == 0)
-            reg_type_size[inst->dst.nr] = type_size;
-         else if (reg_type_size[inst->dst.nr] != type_size)
-            no_spill[inst->dst.nr] = true;
-      }
-
-      switch (inst->opcode) {
-
-      case BRW_OPCODE_DO:
-         loop_scale *= 10;
-         break;
-
-      case BRW_OPCODE_WHILE:
-         loop_scale /= 10;
-         break;
-
-      case SHADER_OPCODE_GFX4_SCRATCH_READ:
-      case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
-      case VEC4_OPCODE_MOV_FOR_SCRATCH:
-         for (int i = 0; i < 3; i++) {
-            if (inst->src[i].file == VGRF)
-               no_spill[inst->src[i].nr] = true;
-         }
-         if (inst->dst.file == VGRF)
-            no_spill[inst->dst.nr] = true;
-         break;
-
-      default:
-         break;
-      }
-   }
-
-   ralloc_free(reg_type_size);
-}
-
-int
-vec4_visitor::choose_spill_reg(struct ra_graph *g)
-{
-   float spill_costs[this->alloc.count];
-   bool no_spill[this->alloc.count];
-
-   evaluate_spill_costs(spill_costs, no_spill);
-
-   for (unsigned i = 0; i < this->alloc.count; i++) {
-      if (!no_spill[i])
-         ra_set_node_spill_cost(g, i, spill_costs[i]);
-   }
-
-   return ra_get_best_spill_node(g);
-}
-
-void
-vec4_visitor::spill_reg(unsigned spill_reg_nr)
-{
-   assert(alloc.sizes[spill_reg_nr] == 1 || alloc.sizes[spill_reg_nr] == 2);
-   unsigned spill_offset = last_scratch;
-   last_scratch += alloc.sizes[spill_reg_nr];
-
-   /* Generate spill/unspill instructions for the objects being spilled. */
-   unsigned scratch_reg = ~0u;
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      for (unsigned i = 0; i < 3; i++) {
-         if (inst->src[i].file == VGRF && inst->src[i].nr == spill_reg_nr) {
-            if (scratch_reg == ~0u ||
-                !can_use_scratch_for_source(inst, i, scratch_reg)) {
-               /* We need to unspill anyway so make sure we read the full vec4
-                * in any case. This way, the cached register can be reused
-                * for consecutive instructions that read different channels of
-                * the same vec4.
-                */
-               scratch_reg = alloc.allocate(alloc.sizes[spill_reg_nr]);
-               src_reg temp = inst->src[i];
-               temp.nr = scratch_reg;
-               temp.offset = 0;
-               temp.swizzle = BRW_SWIZZLE_XYZW;
-               emit_scratch_read(block, inst,
-                                 dst_reg(temp), inst->src[i], spill_offset);
-               temp.offset = inst->src[i].offset;
-            }
-            assert(scratch_reg != ~0u);
-            inst->src[i].nr = scratch_reg;
-         }
-      }
-
-      if (inst->dst.file == VGRF && inst->dst.nr == spill_reg_nr) {
-         emit_scratch_write(block, inst, spill_offset);
-         scratch_reg = inst->dst.nr;
-      }
-   }
-
-   invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
-}
-
-} /* namespace brw */
diff --git a/src/intel/compiler/brw_vec4_surface_builder.cpp b/src/intel/compiler/brw_vec4_surface_builder.cpp
deleted file mode 100644
index fce3133bef8..00000000000
--- a/src/intel/compiler/brw_vec4_surface_builder.cpp
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Copyright © 2013-2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_vec4_surface_builder.h"
-
-using namespace brw;
-
-namespace {
-   namespace array_utils {
-      /**
-       * Copy one every \p src_stride logical components of the argument into
-       * one every \p dst_stride logical components of the result.
-       */
-      static src_reg
-      emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size,
-                  unsigned dst_stride, unsigned src_stride)
-      {
-         if (src_stride == 1 && dst_stride == 1) {
-            return src;
-         } else {
-            const dst_reg dst = bld.vgrf(src.type,
-                                         DIV_ROUND_UP(size * dst_stride, 4));
-
-            for (unsigned i = 0; i < size; ++i)
-               bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4),
-                                 1 << (i * dst_stride % 4)),
-                       swizzle(offset(src, 8, i * src_stride / 4),
-                               brw_swizzle_for_mask(1 << (i * src_stride % 4))));
-
-            return src_reg(dst);
-         }
-      }
-
-      /**
-       * Convert a VEC4 into an array of registers with the layout expected by
-       * the recipient shared unit.  If \p has_simd4x2 is true the argument is
-       * left unmodified in SIMD4x2 form, otherwise it will be rearranged into
-       * a SIMD8 vector.
-       */
-      static src_reg
-      emit_insert(const vec4_builder &bld, const src_reg &src,
-                  unsigned n, bool has_simd4x2)
-      {
-         if (src.file == BAD_FILE || n == 0) {
-            return src_reg();
-
-         } else {
-            /* Pad unused components with zeroes. */
-            const unsigned mask = (1 << n) - 1;
-            const dst_reg tmp = bld.vgrf(src.type);
-
-            bld.MOV(writemask(tmp, mask), src);
-            if (n < 4)
-               bld.MOV(writemask(tmp, ~mask), brw_imm_d(0));
-
-            return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
-         }
-      }
-   }
-}
-
-namespace brw {
-   namespace surface_access {
-      namespace {
-         using namespace array_utils;
-
-         /**
-          * Generate a send opcode for a surface message and return the
-          * result.
-          */
-         src_reg
-         emit_send(const vec4_builder &bld, enum opcode op,
-                   const src_reg &header,
-                   const src_reg &addr, unsigned addr_sz,
-                   const src_reg &src, unsigned src_sz,
-                   const src_reg &surface,
-                   unsigned arg, unsigned ret_sz,
-                   brw_predicate pred = BRW_PREDICATE_NONE)
-         {
-            /* Calculate the total number of components of the payload. */
-            const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1);
-            const unsigned sz = header_sz + addr_sz + src_sz;
-
-            /* Construct the payload. */
-            const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
-            unsigned n = 0;
-
-            if (header_sz)
-               bld.exec_all().MOV(offset(payload, 8, n++),
-                                  retype(header, BRW_REGISTER_TYPE_UD));
-
-            for (unsigned i = 0; i < addr_sz; i++)
-               bld.MOV(offset(payload, 8, n++),
-                       offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i));
-
-            for (unsigned i = 0; i < src_sz; i++)
-               bld.MOV(offset(payload, 8, n++),
-                       offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i));
-
-            /* Reduce the dynamically uniform surface index to a single
-             * scalar.
-             */
-            const src_reg usurface = bld.emit_uniformize(surface);
-
-            /* Emit the message send instruction. */
-            const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz);
-            vec4_instruction *inst =
-               bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg));
-            inst->mlen = sz;
-            inst->size_written = ret_sz * REG_SIZE;
-            inst->header_size = header_sz;
-            inst->predicate = pred;
-
-            return src_reg(dst);
-         }
-      }
-
-      /**
-       * Emit an untyped surface read opcode.  \p dims determines the number
-       * of components of the address and \p size the number of components of
-       * the returned value.
-       */
-      src_reg
-      emit_untyped_read(const vec4_builder &bld,
-                        const src_reg &surface, const src_reg &addr,
-                        unsigned dims, unsigned size,
-                        brw_predicate pred)
-      {
-         return emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_READ, src_reg(),
-                          emit_insert(bld, addr, dims, true), 1,
-                          src_reg(), 0,
-                          surface, size, 1, pred);
-      }
-
-      /**
-       * Emit an untyped surface write opcode.  \p dims determines the number
-       * of components of the address and \p size the number of components of
-       * the argument.
-       */
-      void
-      emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
-                         const src_reg &addr, const src_reg &src,
-                         unsigned dims, unsigned size,
-                         brw_predicate pred)
-      {
-         const bool has_simd4x2 = bld.shader->devinfo->verx10 == 75;
-         emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
-                   emit_insert(bld, addr, dims, has_simd4x2),
-                   has_simd4x2 ? 1 : dims,
-                   emit_insert(bld, src, size, has_simd4x2),
-                   has_simd4x2 ? 1 : size,
-                   surface, size, 0, pred);
-      }
-
-      /**
-       * Emit an untyped surface atomic opcode.  \p dims determines the number
-       * of components of the address and \p rsize the number of components of
-       * the returned value (either zero or one).
-       */
-      src_reg
-      emit_untyped_atomic(const vec4_builder &bld,
-                          const src_reg &surface, const src_reg &addr,
-                          const src_reg &src0, const src_reg &src1,
-                          unsigned dims, unsigned rsize, unsigned op,
-                          brw_predicate pred)
-      {
-         const bool has_simd4x2 = bld.shader->devinfo->verx10 == 75;
-
-         /* Zip the components of both sources, they are represented as the X
-          * and Y components of the same vector.
-          */
-         const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
-         const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
-
-         if (size >= 1) {
-            bld.MOV(writemask(srcs, WRITEMASK_X),
-                    swizzle(src0, BRW_SWIZZLE_XXXX));
-         }
-
-         if (size >= 2) {
-            bld.MOV(writemask(srcs, WRITEMASK_Y),
-                    swizzle(src1, BRW_SWIZZLE_XXXX));
-         }
-
-         return emit_send(bld, VEC4_OPCODE_UNTYPED_ATOMIC, src_reg(),
-                          emit_insert(bld, addr, dims, has_simd4x2),
-                          has_simd4x2 ? 1 : dims,
-                          emit_insert(bld, src_reg(srcs), size, has_simd4x2),
-                          has_simd4x2 && size ? 1 : size,
-                          surface, op, rsize, pred);
-      }
-   }
-}
diff --git a/src/intel/compiler/brw_vec4_surface_builder.h b/src/intel/compiler/brw_vec4_surface_builder.h
deleted file mode 100644
index 2821685a361..00000000000
--- a/src/intel/compiler/brw_vec4_surface_builder.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright © 2013-2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_VEC4_SURFACE_BUILDER_H
-#define BRW_VEC4_SURFACE_BUILDER_H
-
-#include "brw_vec4_builder.h"
-
-namespace brw {
-   namespace surface_access {
-      src_reg
-      emit_untyped_read(const vec4_builder &bld,
-                        const src_reg &surface, const src_reg &addr,
-                        unsigned dims, unsigned size,
-                        brw_predicate pred = BRW_PREDICATE_NONE);
-
-      void
-      emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
-                         const src_reg &addr, const src_reg &src,
-                         unsigned dims, unsigned size,
-                         brw_predicate pred = BRW_PREDICATE_NONE);
-
-      src_reg
-      emit_untyped_atomic(const vec4_builder &bld,
-                          const src_reg &surface, const src_reg &addr,
-                          const src_reg &src0, const src_reg &src1,
-                          unsigned dims, unsigned rsize, unsigned op,
-                          brw_predicate pred = BRW_PREDICATE_NONE);
-   }
-}
-
-#endif
diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp
deleted file mode 100644
index d3dceb38922..00000000000
--- a/src/intel/compiler/brw_vec4_tcs.cpp
+++ /dev/null
@@ -1,320 +0,0 @@
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_vec4_tcs.cpp
- *
- * Tessellaton control shader specific code derived from the vec4_visitor class.
- */
-
-#include "intel_nir.h"
-#include "brw_vec4_tcs.h"
-
-namespace brw {
-
-vec4_tcs_visitor::vec4_tcs_visitor(const struct brw_compiler *compiler,
-                                   const struct brw_compile_params *params,
-                                   const struct brw_tcs_prog_key *key,
-                                   struct brw_tcs_prog_data *prog_data,
-                                   const nir_shader *nir,
-                                   bool debug_enabled)
-   : vec4_visitor(compiler, params, &key->base.tex, &prog_data->base,
-                  nir, false, debug_enabled),
-     key(key)
-{
-}
-
-
-void
-vec4_tcs_visitor::setup_payload()
-{
-   int reg = 0;
-
-   /* The payload always contains important data in r0, which contains
-    * the URB handles that are passed on to the URB write at the end
-    * of the thread.
-    */
-   reg++;
-
-   /* r1.0 - r4.7 may contain the input control point URB handles,
-    * which we use to pull vertex data.
-    */
-   reg += 4;
-
-   /* Push constants may start at r5.0 */
-   reg = setup_uniforms(reg);
-
-   this->first_non_payload_grf = reg;
-}
-
-
-void
-vec4_tcs_visitor::emit_prolog()
-{
-   invocation_id = src_reg(this, glsl_uint_type());
-   emit(TCS_OPCODE_GET_INSTANCE_ID, dst_reg(invocation_id));
-
-   /* HS threads are dispatched with the dispatch mask set to 0xFF.
-    * If there are an odd number of output vertices, then the final
-    * HS instance dispatched will only have its bottom half doing real
-    * work, and so we need to disable the upper half:
-    */
-   if (nir->info.tess.tcs_vertices_out % 2) {
-      emit(CMP(dst_null_d(), invocation_id,
-               brw_imm_ud(nir->info.tess.tcs_vertices_out),
-               BRW_CONDITIONAL_L));
-
-      /* Matching ENDIF is in emit_thread_end() */
-      emit(IF(BRW_PREDICATE_NORMAL));
-   }
-}
-
-
-void
-vec4_tcs_visitor::emit_thread_end()
-{
-   vec4_instruction *inst;
-   current_annotation = "thread end";
-
-   if (nir->info.tess.tcs_vertices_out % 2) {
-      emit(BRW_OPCODE_ENDIF);
-   }
-
-   if (devinfo->ver == 7) {
-      struct brw_tcs_prog_data *tcs_prog_data =
-         (struct brw_tcs_prog_data *) prog_data;
-
-      current_annotation = "release input vertices";
-
-      /* Synchronize all threads, so we know that no one is still
-       * using the input URB handles.
-       */
-      if (tcs_prog_data->instances > 1) {
-         dst_reg header = dst_reg(this, glsl_uvec4_type());
-         emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header);
-         emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header));
-      }
-
-      /* Make thread 0 (invocations <1, 0>) release pairs of ICP handles.
-       * We want to compare the bottom half of invocation_id with 0, but
-       * use that truth value for the top half as well.  Unfortunately,
-       * we don't have stride in the vec4 world, nor UV immediates in
-       * align16, so we need an opcode to get invocation_id<0,4,0>.
-       */
-      set_condmod(BRW_CONDITIONAL_Z,
-                  emit(TCS_OPCODE_SRC0_010_IS_ZERO, dst_null_d(),
-                       invocation_id));
-      emit(IF(BRW_PREDICATE_NORMAL));
-      for (unsigned i = 0; i < key->input_vertices; i += 2) {
-         /* If we have an odd number of input vertices, the last will be
-          * unpaired.  We don't want to use an interleaved URB write in
-          * that case.
-          */
-         const bool is_unpaired = i == key->input_vertices - 1;
-
-         dst_reg header(this, glsl_uvec4_type());
-         emit(TCS_OPCODE_RELEASE_INPUT, header, brw_imm_ud(i),
-              brw_imm_ud(is_unpaired));
-      }
-      emit(BRW_OPCODE_ENDIF);
-   }
-
-   inst = emit(TCS_OPCODE_THREAD_END);
-   inst->base_mrf = 14;
-   inst->mlen = 2;
-}
-
-
-void
-vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst,
-                                      const src_reg &vertex_index,
-                                      unsigned base_offset,
-                                      unsigned first_component,
-                                      const src_reg &indirect_offset)
-{
-   vec4_instruction *inst;
-   dst_reg temp(this, glsl_ivec4_type());
-   temp.type = dst.type;
-
-   /* Set up the message header to reference the proper parts of the URB */
-   dst_reg header = dst_reg(this, glsl_uvec4_type());
-   inst = emit(VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS, header, vertex_index,
-               indirect_offset);
-   inst->force_writemask_all = true;
-
-   /* Read into a temporary, ignoring writemasking. */
-   inst = emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
-   inst->offset = base_offset;
-   inst->mlen = 1;
-   inst->base_mrf = -1;
-
-   /* Copy the temporary to the destination to deal with writemasking.
-    *
-    * Also attempt to deal with gl_PointSize being in the .w component.
-    */
-   if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
-      emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WWWW)));
-   } else {
-      src_reg src = src_reg(temp);
-      src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
-      emit(MOV(dst, src));
-   }
-}
-
-void
-vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst,
-                                       unsigned base_offset,
-                                       unsigned first_component,
-                                       const src_reg &indirect_offset)
-{
-   vec4_instruction *inst;
-
-   /* Set up the message header to reference the proper parts of the URB */
-   dst_reg header = dst_reg(this, glsl_uvec4_type());
-   inst = emit(VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, header,
-               brw_imm_ud(dst.writemask << first_component), indirect_offset);
-   inst->force_writemask_all = true;
-
-   vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, dst, src_reg(header));
-   read->offset = base_offset;
-   read->mlen = 1;
-   read->base_mrf = -1;
-
-   if (first_component) {
-      /* Read into a temporary and copy with a swizzle and writemask. */
-      read->dst = retype(dst_reg(this, glsl_ivec4_type()), dst.type);
-      emit(MOV(dst, swizzle(src_reg(read->dst),
-                            BRW_SWZ_COMP_INPUT(first_component))));
-   }
-}
-
-void
-vec4_tcs_visitor::emit_urb_write(const src_reg &value,
-                                 unsigned writemask,
-                                 unsigned base_offset,
-                                 const src_reg &indirect_offset)
-{
-   if (writemask == 0)
-      return;
-
-   src_reg message(this, glsl_uvec4_type(), 2);
-   vec4_instruction *inst;
-
-   inst = emit(VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, dst_reg(message),
-               brw_imm_ud(writemask), indirect_offset);
-   inst->force_writemask_all = true;
-   inst = emit(MOV(byte_offset(dst_reg(retype(message, value.type)), REG_SIZE),
-                   value));
-   inst->force_writemask_all = true;
-
-   inst = emit(VEC4_TCS_OPCODE_URB_WRITE, dst_null_f(), message);
-   inst->offset = base_offset;
-   inst->mlen = 2;
-   inst->base_mrf = -1;
-}
-
-void
-vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
-{
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_invocation_id:
-      emit(MOV(get_nir_def(instr->def, BRW_REGISTER_TYPE_UD),
-               invocation_id));
-      break;
-   case nir_intrinsic_load_primitive_id:
-      emit(TCS_OPCODE_GET_PRIMITIVE_ID,
-           get_nir_def(instr->def, BRW_REGISTER_TYPE_UD));
-      break;
-   case nir_intrinsic_load_patch_vertices_in:
-      emit(MOV(get_nir_def(instr->def, BRW_REGISTER_TYPE_D),
-               brw_imm_d(key->input_vertices)));
-      break;
-   case nir_intrinsic_load_per_vertex_input: {
-      assert(instr->def.bit_size == 32);
-      src_reg indirect_offset = get_indirect_offset(instr);
-      unsigned imm_offset = nir_intrinsic_base(instr);
-
-      src_reg vertex_index = retype(get_nir_src_imm(instr->src[0]),
-                                    BRW_REGISTER_TYPE_UD);
-
-      unsigned first_component = nir_intrinsic_component(instr);
-      dst_reg dst = get_nir_def(instr->def, BRW_REGISTER_TYPE_D);
-      dst.writemask = brw_writemask_for_size(instr->num_components);
-      emit_input_urb_read(dst, vertex_index, imm_offset,
-                          first_component, indirect_offset);
-      break;
-   }
-   case nir_intrinsic_load_input:
-      unreachable("nir_lower_io should use load_per_vertex_input intrinsics");
-      break;
-   case nir_intrinsic_load_output:
-   case nir_intrinsic_load_per_vertex_output: {
-      src_reg indirect_offset = get_indirect_offset(instr);
-      unsigned imm_offset = nir_intrinsic_base(instr);
-
-      dst_reg dst = get_nir_def(instr->def, BRW_REGISTER_TYPE_D);
-      dst.writemask = brw_writemask_for_size(instr->num_components);
-
-      emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr),
-                           indirect_offset);
-      break;
-   }
-   case nir_intrinsic_store_output:
-   case nir_intrinsic_store_per_vertex_output: {
-      assert(nir_src_bit_size(instr->src[0]) == 32);
-      src_reg value = get_nir_src(instr->src[0]);
-      unsigned mask = nir_intrinsic_write_mask(instr);
-      unsigned swiz = BRW_SWIZZLE_XYZW;
-
-      src_reg indirect_offset = get_indirect_offset(instr);
-      unsigned imm_offset = nir_intrinsic_base(instr);
-
-      unsigned first_component = nir_intrinsic_component(instr);
-      if (first_component) {
-         assert(swiz == BRW_SWIZZLE_XYZW);
-         swiz = BRW_SWZ_COMP_OUTPUT(first_component);
-         mask = mask << first_component;
-      }
-
-      emit_urb_write(swizzle(value, swiz), mask,
-                     imm_offset, indirect_offset);
-      break;
-   }
-
-   case nir_intrinsic_barrier:
-      if (nir_intrinsic_memory_scope(instr) != SCOPE_NONE)
-         vec4_visitor::nir_emit_intrinsic(instr);
-      if (nir_intrinsic_execution_scope(instr) == SCOPE_WORKGROUP) {
-         dst_reg header = dst_reg(this, glsl_uvec4_type());
-         emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header);
-         emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header));
-      }
-      break;
-
-   default:
-      vec4_visitor::nir_emit_intrinsic(instr);
-   }
-}
-
-} /* namespace brw */
-
diff --git a/src/intel/compiler/brw_vec4_tcs.h b/src/intel/compiler/brw_vec4_tcs.h
deleted file mode 100644
index e5de6c4945b..00000000000
--- a/src/intel/compiler/brw_vec4_tcs.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_vec4_tcs.h
- *
- * The vec4-mode tessellation control shader compiler backend.
- */
-
-#ifndef BRW_VEC4_TCS_H
-#define BRW_VEC4_TCS_H
-
-#include "brw_compiler.h"
-#include "brw_eu.h"
-#include "brw_vec4.h"
-
-#ifdef __cplusplus
-namespace brw {
-
-class vec4_tcs_visitor : public vec4_visitor
-{
-public:
-   vec4_tcs_visitor(const struct brw_compiler *compiler,
-                    const struct brw_compile_params *params,
-                    const struct brw_tcs_prog_key *key,
-                    struct brw_tcs_prog_data *prog_data,
-                    const nir_shader *nir,
-                    bool debug_enabled);
-
-protected:
-   virtual void setup_payload();
-   virtual void emit_prolog();
-   virtual void emit_thread_end();
-
-   virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
-
-   void emit_input_urb_read(const dst_reg &dst,
-                            const src_reg &vertex_index,
-                            unsigned base_offset,
-                            unsigned first_component,
-                            const src_reg &indirect_offset);
-   void emit_output_urb_read(const dst_reg &dst,
-                             unsigned base_offset,
-                             unsigned first_component,
-                             const src_reg &indirect_offset);
-
-   void emit_urb_write(const src_reg &value, unsigned writemask,
-                       unsigned base_offset, const src_reg &indirect_offset);
-
-   /* we do not use the normal end-of-shader URB write mechanism -- but every
-    * vec4 stage must provide implementations of these:
-    */
-   virtual void emit_urb_write_header(int /* mrf */) {}
-   virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */) { return NULL; }
-
-   const struct brw_tcs_prog_key *key;
-   src_reg invocation_id;
-};
-
-} /* namespace brw */
-#endif /* __cplusplus */
-
-#endif /* BRW_VEC4_TCS_H */
diff --git a/src/intel/compiler/brw_vec4_tes.cpp b/src/intel/compiler/brw_vec4_tes.cpp
deleted file mode 100644
index 7af5220be75..00000000000
--- a/src/intel/compiler/brw_vec4_tes.cpp
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_vec4_tes.cpp
- *
- * Tessellaton evaluation shader specific code derived from the vec4_visitor class.
- */
-
-#include "brw_vec4_tes.h"
-#include "brw_cfg.h"
-#include "dev/intel_debug.h"
-
-namespace brw {
-
-vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
-                                   const struct brw_compile_params *params,
-                                  const struct brw_tes_prog_key *key,
-                                  struct brw_tes_prog_data *prog_data,
-                                  const nir_shader *shader,
-                                  bool debug_enabled)
-   : vec4_visitor(compiler, params, &key->base.tex, &prog_data->base,
-                  shader, false, debug_enabled)
-{
-}
-
-void
-vec4_tes_visitor::setup_payload()
-{
-   int reg = 0;
-
-   /* The payload always contains important data in r0 and r1, which contains
-    * the URB handles that are passed on to the URB write at the end
-    * of the thread.
-    */
-   reg += 2;
-
-   reg = setup_uniforms(reg);
-
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file != ATTR)
-            continue;
-
-         unsigned slot = inst->src[i].nr + inst->src[i].offset / 16;
-         struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2));
-         grf = stride(grf, 0, 4, 1);
-         grf.swizzle = inst->src[i].swizzle;
-         grf.type = inst->src[i].type;
-         grf.abs = inst->src[i].abs;
-         grf.negate = inst->src[i].negate;
-         inst->src[i] = grf;
-      }
-   }
-
-   reg += 8 * prog_data->urb_read_length;
-
-   this->first_non_payload_grf = reg;
-}
-
-
-void
-vec4_tes_visitor::emit_prolog()
-{
-   input_read_header = src_reg(this, glsl_uvec4_type());
-   emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
-
-   this->current_annotation = NULL;
-}
-
-
-void
-vec4_tes_visitor::emit_urb_write_header(int mrf)
-{
-   /* No need to do anything for DS; an implied write to this MRF will be
-    * performed by VEC4_VS_OPCODE_URB_WRITE.
-    */
-   (void) mrf;
-}
-
-
-vec4_instruction *
-vec4_tes_visitor::emit_urb_write_opcode(bool complete)
-{
-   vec4_instruction *inst = emit(VEC4_VS_OPCODE_URB_WRITE);
-   inst->urb_write_flags = complete ?
-      BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
-
-   return inst;
-}
-
-void
-vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
-{
-   const struct brw_tes_prog_data *tes_prog_data =
-      (const struct brw_tes_prog_data *) prog_data;
-
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_tess_coord:
-      /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
-      emit(MOV(get_nir_def(instr->def, BRW_REGISTER_TYPE_F),
-               src_reg(brw_vec8_grf(1, 0))));
-      break;
-   case nir_intrinsic_load_tess_level_outer:
-      if (tes_prog_data->domain == INTEL_TESS_DOMAIN_ISOLINE) {
-         emit(MOV(get_nir_def(instr->def, BRW_REGISTER_TYPE_F),
-                  swizzle(src_reg(ATTR, 1, glsl_vec4_type()),
-                          BRW_SWIZZLE_ZWZW)));
-      } else {
-         emit(MOV(get_nir_def(instr->def, BRW_REGISTER_TYPE_F),
-                  swizzle(src_reg(ATTR, 1, glsl_vec4_type()),
-                          BRW_SWIZZLE_WZYX)));
-      }
-      break;
-   case nir_intrinsic_load_tess_level_inner:
-      if (tes_prog_data->domain == INTEL_TESS_DOMAIN_QUAD) {
-         emit(MOV(get_nir_def(instr->def, BRW_REGISTER_TYPE_F),
-                  swizzle(src_reg(ATTR, 0, glsl_vec4_type()),
-                          BRW_SWIZZLE_WZYX)));
-      } else {
-         emit(MOV(get_nir_def(instr->def, BRW_REGISTER_TYPE_F),
-                  src_reg(ATTR, 1, glsl_float_type())));
-      }
-      break;
-   case nir_intrinsic_load_primitive_id:
-      emit(TES_OPCODE_GET_PRIMITIVE_ID,
-           get_nir_def(instr->def, BRW_REGISTER_TYPE_UD));
-      break;
-
-   case nir_intrinsic_load_input:
-   case nir_intrinsic_load_per_vertex_input: {
-      assert(instr->def.bit_size == 32);
-      src_reg indirect_offset = get_indirect_offset(instr);
-      unsigned imm_offset = instr->const_index[0];
-      src_reg header = input_read_header;
-      unsigned first_component = nir_intrinsic_component(instr);
-
-      if (indirect_offset.file != BAD_FILE) {
-         src_reg clamped_indirect_offset = src_reg(this, glsl_uvec4_type());
-
-         /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the
-          * valid range of the offset is [0, 0FFFFFFFh].
-          */
-         emit_minmax(BRW_CONDITIONAL_L,
-                     dst_reg(clamped_indirect_offset),
-                     retype(indirect_offset, BRW_REGISTER_TYPE_UD),
-                     brw_imm_ud(0x0fffffffu));
-
-         header = src_reg(this, glsl_uvec4_type());
-         emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
-              input_read_header, clamped_indirect_offset);
-      } else {
-         /* Arbitrarily only push up to 24 vec4 slots worth of data,
-          * which is 12 registers (since each holds 2 vec4 slots).
-          */
-         const unsigned max_push_slots = 24;
-         if (imm_offset < max_push_slots) {
-            src_reg src = src_reg(ATTR, imm_offset, glsl_ivec4_type());
-            src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
-
-            emit(MOV(get_nir_def(instr->def, BRW_REGISTER_TYPE_D), src));
-
-            prog_data->urb_read_length =
-               MAX2(prog_data->urb_read_length,
-                    DIV_ROUND_UP(imm_offset + 1, 2));
-            break;
-         }
-      }
-
-      dst_reg temp(this, glsl_ivec4_type());
-      vec4_instruction *read =
-         emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
-      read->offset = imm_offset;
-      read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
-
-      src_reg src = src_reg(temp);
-      src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
-
-      /* Copy to target.  We might end up with some funky writemasks landing
-       * in here, but we really don't want them in the above pseudo-ops.
-       */
-      dst_reg dst = get_nir_def(instr->def, BRW_REGISTER_TYPE_D);
-      dst.writemask = brw_writemask_for_size(instr->num_components);
-      emit(MOV(dst, src));
-      break;
-   }
-   default:
-      vec4_visitor::nir_emit_intrinsic(instr);
-   }
-}
-
-
-void
-vec4_tes_visitor::emit_thread_end()
-{
-   /* For DS, we always end the thread by emitting a single vertex.
-    * emit_urb_write_opcode() will take care of setting the eot flag on the
-    * SEND instruction.
-    */
-   emit_vertex();
-}
-
-} /* namespace brw */
diff --git a/src/intel/compiler/brw_vec4_tes.h b/src/intel/compiler/brw_vec4_tes.h
deleted file mode 100644
index 23a11956681..00000000000
--- a/src/intel/compiler/brw_vec4_tes.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_vec4_tes.h
- *
- * The vec4 mode tessellation evaluation shader compiler backend.
- */
-
-#ifndef BRW_VEC4_TES_H
-#define BRW_VEC4_TES_H
-
-#include "brw_vec4.h"
-
-#ifdef __cplusplus
-namespace brw {
-
-class vec4_tes_visitor : public vec4_visitor
-{
-public:
-   vec4_tes_visitor(const struct brw_compiler *compiler,
-                    const struct brw_compile_params *params,
-                   const struct brw_tes_prog_key *key,
-                   struct brw_tes_prog_data *prog_data,
-                   const nir_shader *nir,
-                   bool debug_enabled);
-
-protected:
-   virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
-
-   virtual void setup_payload();
-   virtual void emit_prolog();
-   virtual void emit_thread_end();
-
-   virtual void emit_urb_write_header(int mrf);
-   virtual vec4_instruction *emit_urb_write_opcode(bool complete);
-
-private:
-   src_reg input_read_header;
-};
-
-} /* namespace brw */
-#endif /* __cplusplus */
-
-#endif /* BRW_VEC4_TES_H */
diff --git a/src/intel/compiler/brw_vec4_visitor.cpp b/src/intel/compiler/brw_vec4_visitor.cpp
deleted file mode 100644
index 236c7bae3ba..00000000000
--- a/src/intel/compiler/brw_vec4_visitor.cpp
+++ /dev/null
@@ -1,1319 +0,0 @@
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_nir.h"
-#include "brw_vec4.h"
-#include "brw_cfg.h"
-#include "brw_eu.h"
-#include "util/u_math.h"
-
-namespace brw {
-
-vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
-                                   const src_reg &src0, const src_reg &src1,
-                                   const src_reg &src2)
-{
-   this->opcode = opcode;
-   this->dst = dst;
-   this->src[0] = src0;
-   this->src[1] = src1;
-   this->src[2] = src2;
-   this->saturate = false;
-   this->force_writemask_all = false;
-   this->no_dd_clear = false;
-   this->no_dd_check = false;
-   this->writes_accumulator = false;
-   this->conditional_mod = BRW_CONDITIONAL_NONE;
-   this->predicate = BRW_PREDICATE_NONE;
-   this->predicate_inverse = false;
-   this->target = 0;
-   this->shadow_compare = false;
-   this->eot = false;
-   this->ir = NULL;
-   this->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
-   this->header_size = 0;
-   this->flag_subreg = 0;
-   this->mlen = 0;
-   this->base_mrf = 0;
-   this->offset = 0;
-   this->exec_size = 8;
-   this->group = 0;
-   this->size_written = (dst.file == BAD_FILE ?
-                         0 : this->exec_size * type_sz(dst.type));
-   this->annotation = NULL;
-}
-
-vec4_instruction *
-vec4_visitor::emit(vec4_instruction *inst)
-{
-   inst->ir = this->base_ir;
-   inst->annotation = this->current_annotation;
-
-   this->instructions.push_tail(inst);
-
-   return inst;
-}
-
-vec4_instruction *
-vec4_visitor::emit_before(bblock_t *block, vec4_instruction *inst,
-                          vec4_instruction *new_inst)
-{
-   new_inst->ir = inst->ir;
-   new_inst->annotation = inst->annotation;
-
-   inst->insert_before(block, new_inst);
-
-   return inst;
-}
-
-vec4_instruction *
-vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
-                   const src_reg &src1, const src_reg &src2)
-{
-   return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1, src2));
-}
-
-
-vec4_instruction *
-vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
-                   const src_reg &src1)
-{
-   return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1));
-}
-
-vec4_instruction *
-vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0)
-{
-   return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0));
-}
-
-vec4_instruction *
-vec4_visitor::emit(enum opcode opcode, const dst_reg &dst)
-{
-   return emit(new(mem_ctx) vec4_instruction(opcode, dst));
-}
-
-vec4_instruction *
-vec4_visitor::emit(enum opcode opcode)
-{
-   return emit(new(mem_ctx) vec4_instruction(opcode, dst_reg()));
-}
-
-#define ALU1(op)							\
-   vec4_instruction *							\
-   vec4_visitor::op(const dst_reg &dst, const src_reg &src0)		\
-   {									\
-      return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, src0); \
-   }
-
-#define ALU2(op)							\
-   vec4_instruction *							\
-   vec4_visitor::op(const dst_reg &dst, const src_reg &src0,		\
-                    const src_reg &src1)				\
-   {									\
-      return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst,        \
-                                           src0, src1);                 \
-   }
-
-#define ALU2_ACC(op)							\
-   vec4_instruction *							\
-   vec4_visitor::op(const dst_reg &dst, const src_reg &src0,		\
-                    const src_reg &src1)				\
-   {									\
-      vec4_instruction *inst = new(mem_ctx) vec4_instruction(           \
-                       BRW_OPCODE_##op, dst, src0, src1);		\
-      inst->writes_accumulator = true;                                  \
-      return inst;                                                      \
-   }
-
-#define ALU3(op)							\
-   vec4_instruction *							\
-   vec4_visitor::op(const dst_reg &dst, const src_reg &src0,		\
-                    const src_reg &src1, const src_reg &src2)		\
-   {									\
-      assert(devinfo->ver >= 6);						\
-      return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst,	\
-					   src0, src1, src2);		\
-   }
-
-ALU1(NOT)
-ALU1(MOV)
-ALU1(FRC)
-ALU1(RNDD)
-ALU1(RNDE)
-ALU1(RNDZ)
-ALU1(F32TO16)
-ALU1(F16TO32)
-ALU2(ADD)
-ALU2(MUL)
-ALU2_ACC(MACH)
-ALU2(AND)
-ALU2(OR)
-ALU2(XOR)
-ALU2(DP3)
-ALU2(DP4)
-ALU2(DPH)
-ALU2(SHL)
-ALU2(SHR)
-ALU2(ASR)
-ALU3(LRP)
-ALU1(BFREV)
-ALU3(BFE)
-ALU2(BFI1)
-ALU3(BFI2)
-ALU1(FBH)
-ALU1(FBL)
-ALU1(CBIT)
-ALU1(LZD)
-ALU3(MAD)
-ALU2_ACC(ADDC)
-ALU2_ACC(SUBB)
-ALU2(MAC)
-ALU1(DIM)
-
-/** Gfx4 predicated IF. */
-vec4_instruction *
-vec4_visitor::IF(enum brw_predicate predicate)
-{
-   vec4_instruction *inst;
-
-   inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF);
-   inst->predicate = predicate;
-
-   return inst;
-}
-
-/** Gfx6 IF with embedded comparison. */
-vec4_instruction *
-vec4_visitor::IF(src_reg src0, src_reg src1,
-                 enum brw_conditional_mod condition)
-{
-   assert(devinfo->ver == 6);
-
-   vec4_instruction *inst;
-
-   resolve_ud_negate(&src0);
-   resolve_ud_negate(&src1);
-
-   inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF, dst_null_d(),
-					src0, src1);
-   inst->conditional_mod = condition;
-
-   return inst;
-}
-
-/**
- * CMP: Sets the low bit of the destination channels with the result
- * of the comparison, while the upper bits are undefined, and updates
- * the flag register with the packed 16 bits of the result.
- */
-vec4_instruction *
-vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1,
-                  enum brw_conditional_mod condition)
-{
-   vec4_instruction *inst;
-
-   /* Take the instruction:
-    *
-    * CMP null<d> src0<f> src1<f>
-    *
-    * Original gfx4 does type conversion to the destination type before
-    * comparison, producing garbage results for floating point comparisons.
-    *
-    * The destination type doesn't matter on newer generations, so we set the
-    * type to match src0 so we can compact the instruction.
-    */
-   dst.type = src0.type;
-
-   resolve_ud_negate(&src0);
-   resolve_ud_negate(&src1);
-
-   inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP, dst, src0, src1);
-   inst->conditional_mod = condition;
-
-   return inst;
-}
-
-vec4_instruction *
-vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index)
-{
-   vec4_instruction *inst;
-
-   inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GFX4_SCRATCH_READ,
-					dst, index);
-   inst->base_mrf = FIRST_SPILL_MRF(devinfo->ver) + 1;
-   inst->mlen = 2;
-
-   return inst;
-}
-
-vec4_instruction *
-vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
-                            const src_reg &index)
-{
-   vec4_instruction *inst;
-
-   inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GFX4_SCRATCH_WRITE,
-					dst, src, index);
-   inst->base_mrf = FIRST_SPILL_MRF(devinfo->ver);
-   inst->mlen = 3;
-
-   return inst;
-}
-
-src_reg
-vec4_visitor::fix_3src_operand(const src_reg &src)
-{
-   /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
-    * able to use vertical stride of zero to replicate the vec4 uniform, like
-    *
-    *    g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
-    *
-    * But you can't, since vertical stride is always four in three-source
-    * instructions. Instead, insert a MOV instruction to do the replication so
-    * that the three-source instruction can consume it.
-    */
-
-   /* The MOV is only needed if the source is a uniform or immediate. */
-   if (src.file != UNIFORM && src.file != IMM)
-      return src;
-
-   if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
-      return src;
-
-   dst_reg expanded = dst_reg(this, glsl_vec4_type());
-   expanded.type = src.type;
-   emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
-   return src_reg(expanded);
-}
-
-src_reg
-vec4_visitor::fix_math_operand(const src_reg &src)
-{
-   if (devinfo->ver < 6 || src.file == BAD_FILE)
-      return src;
-
-   /* The gfx6 math instruction ignores the source modifiers --
-    * swizzle, abs, negate, and at least some parts of the register
-    * region description.
-    *
-    * Rather than trying to enumerate all these cases, *always* expand the
-    * operand to a temp GRF for gfx6.
-    *
-    * For gfx7, keep the operand as-is, except if immediate, which gfx7 still
-    * can't use.
-    */
-
-   if (devinfo->ver == 7 && src.file != IMM)
-      return src;
-
-   dst_reg expanded = dst_reg(this, glsl_vec4_type());
-   expanded.type = src.type;
-   emit(MOV(expanded, src));
-   return src_reg(expanded);
-}
-
-vec4_instruction *
-vec4_visitor::emit_math(enum opcode opcode,
-                        const dst_reg &dst,
-                        const src_reg &src0, const src_reg &src1)
-{
-   vec4_instruction *math =
-      emit(opcode, dst, fix_math_operand(src0), fix_math_operand(src1));
-
-   if (devinfo->ver == 6 && dst.writemask != WRITEMASK_XYZW) {
-      /* MATH on Gfx6 must be align1, so we can't do writemasks. */
-      math->dst = dst_reg(this, glsl_vec4_type());
-      math->dst.type = dst.type;
-      math = emit(MOV(dst, src_reg(math->dst)));
-   } else if (devinfo->ver < 6) {
-      math->base_mrf = 1;
-      math->mlen = src1.file == BAD_FILE ? 1 : 2;
-   }
-
-   return math;
-}
-
-void
-vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
-{
-   if (devinfo->ver < 7) {
-      unreachable("ir_unop_pack_half_2x16 should be lowered");
-   }
-
-   assert(dst.type == BRW_REGISTER_TYPE_UD);
-   assert(src0.type == BRW_REGISTER_TYPE_F);
-
-   /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
-    *
-    *   Because this instruction does not have a 16-bit floating-point type,
-    *   the destination data type must be Word (W).
-    *
-    *   The destination must be DWord-aligned and specify a horizontal stride
-    *   (HorzStride) of 2. The 16-bit result is stored in the lower word of
-    *   each destination channel and the upper word is not modified.
-    *
-    * The above restriction implies that the f32to16 instruction must use
-    * align1 mode, because only in align1 mode is it possible to specify
-    * horizontal stride.  We choose here to defy the hardware docs and emit
-    * align16 instructions.
-    *
-    * (I [chadv] did attempt to emit align1 instructions for VS f32to16
-    * instructions. I was partially successful in that the code passed all
-    * tests.  However, the code was dubiously correct and fragile, and the
-    * tests were not harsh enough to probe that frailty. Not trusting the
-    * code, I chose instead to remain in align16 mode in defiance of the hw
-    * docs).
-    *
-    * I've [chadv] experimentally confirmed that, on gfx7 hardware and the
-    * simulator, emitting a f32to16 in align16 mode with UD as destination
-    * data type is safe. The behavior differs from that specified in the PRM
-    * in that the upper word of each destination channel is cleared to 0.
-    */
-
-   dst_reg tmp_dst(this, glsl_uvec2_type());
-   src_reg tmp_src(tmp_dst);
-
-#if 0
-   /* Verify the undocumented behavior on which the following instructions
-    * rely.  If f32to16 fails to clear the upper word of the X and Y channels,
-    * then the result of the bit-or instruction below will be incorrect.
-    *
-    * You should inspect the disasm output in order to verify that the MOV is
-    * not optimized away.
-    */
-   emit(MOV(tmp_dst, brw_imm_ud(0x12345678u)));
-#endif
-
-   /* Give tmp the form below, where "." means untouched.
-    *
-    *     w z          y          x w z          y          x
-    *   |.|.|0x0000hhhh|0x0000llll|.|.|0x0000hhhh|0x0000llll|
-    *
-    * That the upper word of each write-channel be 0 is required for the
-    * following bit-shift and bit-or instructions to work. Note that this
-    * relies on the undocumented hardware behavior mentioned above.
-    */
-   tmp_dst.writemask = WRITEMASK_XY;
-   emit(F32TO16(tmp_dst, src0));
-
-   /* Give the write-channels of dst the form:
-    *   0xhhhh0000
-    */
-   tmp_src.swizzle = BRW_SWIZZLE_YYYY;
-   emit(SHL(dst, tmp_src, brw_imm_ud(16u)));
-
-   /* Finally, give the write-channels of dst the form of packHalf2x16's
-    * output:
-    *   0xhhhhllll
-    */
-   tmp_src.swizzle = BRW_SWIZZLE_XXXX;
-   emit(OR(dst, src_reg(dst), tmp_src));
-}
-
-void
-vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
-{
-   if (devinfo->ver < 7) {
-      unreachable("ir_unop_unpack_half_2x16 should be lowered");
-   }
-
-   assert(dst.type == BRW_REGISTER_TYPE_F);
-   assert(src0.type == BRW_REGISTER_TYPE_UD);
-
-   /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
-    *
-    *   Because this instruction does not have a 16-bit floating-point type,
-    *   the source data type must be Word (W). The destination type must be
-    *   F (Float).
-    *
-    * To use W as the source data type, we must adjust horizontal strides,
-    * which is only possible in align1 mode. All my [chadv] attempts at
-    * emitting align1 instructions for unpackHalf2x16 failed to pass the
-    * Piglit tests, so I gave up.
-    *
-    * I've verified that, on gfx7 hardware and the simulator, it is safe to
-    * emit f16to32 in align16 mode with UD as source data type.
-    */
-
-   dst_reg tmp_dst(this, glsl_uvec2_type());
-   src_reg tmp_src(tmp_dst);
-
-   tmp_dst.writemask = WRITEMASK_X;
-   emit(AND(tmp_dst, src0, brw_imm_ud(0xffffu)));
-
-   tmp_dst.writemask = WRITEMASK_Y;
-   emit(SHR(tmp_dst, src0, brw_imm_ud(16u)));
-
-   dst.writemask = WRITEMASK_XY;
-   emit(F16TO32(dst, tmp_src));
-}
-
-void
-vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0)
-{
-   /* Instead of splitting the 32-bit integer, shifting, and ORing it back
-    * together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
-    * is not suitable to generate the shift values, but we can use the packed
-    * vector float and a type-converting MOV.
-    */
-   dst_reg shift(this, glsl_uvec4_type());
-   emit(MOV(shift, brw_imm_vf4(0x00, 0x60, 0x70, 0x78)));
-
-   dst_reg shifted(this, glsl_uvec4_type());
-   src0.swizzle = BRW_SWIZZLE_XXXX;
-   emit(SHR(shifted, src0, src_reg(shift)));
-
-   shifted.type = BRW_REGISTER_TYPE_UB;
-   dst_reg f(this, glsl_vec4_type());
-   emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
-
-   emit(MUL(dst, src_reg(f), brw_imm_f(1.0f / 255.0f)));
-}
-
-void
-vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0)
-{
-   /* Instead of splitting the 32-bit integer, shifting, and ORing it back
-    * together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
-    * is not suitable to generate the shift values, but we can use the packed
-    * vector float and a type-converting MOV.
-    */
-   dst_reg shift(this, glsl_uvec4_type());
-   emit(MOV(shift, brw_imm_vf4(0x00, 0x60, 0x70, 0x78)));
-
-   dst_reg shifted(this, glsl_uvec4_type());
-   src0.swizzle = BRW_SWIZZLE_XXXX;
-   emit(SHR(shifted, src0, src_reg(shift)));
-
-   shifted.type = BRW_REGISTER_TYPE_B;
-   dst_reg f(this, glsl_vec4_type());
-   emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
-
-   dst_reg scaled(this, glsl_vec4_type());
-   emit(MUL(scaled, src_reg(f), brw_imm_f(1.0f / 127.0f)));
-
-   dst_reg max(this, glsl_vec4_type());
-   emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), brw_imm_f(-1.0f));
-   emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), brw_imm_f(1.0f));
-}
-
-void
-vec4_visitor::emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0)
-{
-   dst_reg saturated(this, glsl_vec4_type());
-   vec4_instruction *inst = emit(MOV(saturated, src0));
-   inst->saturate = true;
-
-   dst_reg scaled(this, glsl_vec4_type());
-   emit(MUL(scaled, src_reg(saturated), brw_imm_f(255.0f)));
-
-   dst_reg rounded(this, glsl_vec4_type());
-   emit(RNDE(rounded, src_reg(scaled)));
-
-   dst_reg u(this, glsl_uvec4_type());
-   emit(MOV(u, src_reg(rounded)));
-
-   src_reg bytes(u);
-   emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
-}
-
-void
-vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
-{
-   dst_reg max(this, glsl_vec4_type());
-   emit_minmax(BRW_CONDITIONAL_GE, max, src0, brw_imm_f(-1.0f));
-
-   dst_reg min(this, glsl_vec4_type());
-   emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), brw_imm_f(1.0f));
-
-   dst_reg scaled(this, glsl_vec4_type());
-   emit(MUL(scaled, src_reg(min), brw_imm_f(127.0f)));
-
-   dst_reg rounded(this, glsl_vec4_type());
-   emit(RNDE(rounded, src_reg(scaled)));
-
-   dst_reg i(this, glsl_ivec4_type());
-   emit(MOV(i, src_reg(rounded)));
-
-   src_reg bytes(i);
-   emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
-}
-
-src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
-{
-   init();
-
-   this->file = VGRF;
-   this->nr = v->alloc.allocate(type_size_vec4(type, false));
-
-   if (glsl_type_is_array(type) || glsl_type_is_struct(type)) {
-      this->swizzle = BRW_SWIZZLE_NOOP;
-   } else {
-      this->swizzle = brw_swizzle_for_size(type->vector_elements);
-   }
-
-   this->type = brw_type_for_base_type(type);
-}
-
-src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
-{
-   assert(size > 0);
-
-   init();
-
-   this->file = VGRF;
-   this->nr = v->alloc.allocate(type_size_vec4(type, false) * size);
-
-   this->swizzle = BRW_SWIZZLE_NOOP;
-
-   this->type = brw_type_for_base_type(type);
-}
-
-dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
-{
-   init();
-
-   this->file = VGRF;
-   this->nr = v->alloc.allocate(type_size_vec4(type, false));
-
-   if (glsl_type_is_array(type) || glsl_type_is_struct(type)) {
-      this->writemask = WRITEMASK_XYZW;
-   } else {
-      this->writemask = (1 << type->vector_elements) - 1;
-   }
-
-   this->type = brw_type_for_base_type(type);
-}
-
-vec4_instruction *
-vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
-                          src_reg src0, src_reg src1)
-{
-   vec4_instruction *inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
-   inst->conditional_mod = conditionalmod;
-   return inst;
-}
-
-/**
- * Emits the instructions needed to perform a pull constant load. before_block
- * and before_inst can be NULL in which case the instruction will be appended
- * to the end of the instruction list.
- */
-void
-vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
-                                          src_reg surf_index,
-                                          src_reg offset_reg,
-                                          bblock_t *before_block,
-                                          vec4_instruction *before_inst)
-{
-   assert((before_inst == NULL && before_block == NULL) ||
-          (before_inst && before_block));
-
-   vec4_instruction *pull;
-
-   if (devinfo->ver >= 7) {
-      dst_reg grf_offset = dst_reg(this, glsl_uint_type());
-
-      grf_offset.type = offset_reg.type;
-
-      pull = MOV(grf_offset, offset_reg);
-
-      if (before_inst)
-         emit_before(before_block, before_inst, pull);
-      else
-         emit(pull);
-
-      pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GFX7,
-                                           dst,
-                                           surf_index,
-                                           src_reg(grf_offset));
-      pull->mlen = 1;
-   } else {
-      pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
-                                           dst,
-                                           surf_index,
-                                           offset_reg);
-      pull->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->ver) + 1;
-      pull->mlen = 1;
-   }
-
-   if (before_inst)
-      emit_before(before_block, before_inst, pull);
-   else
-      emit(pull);
-}
-
-src_reg
-vec4_visitor::emit_uniformize(const src_reg &src)
-{
-   const src_reg chan_index(this, glsl_uint_type());
-   const dst_reg dst = retype(dst_reg(this, glsl_uint_type()),
-                              src.type);
-
-   emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index))
-      ->force_writemask_all = true;
-   emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index)
-      ->force_writemask_all = true;
-
-   return src_reg(dst);
-}
-
-void
-vec4_visitor::gs_emit_vertex(int /* stream_id */)
-{
-   unreachable("not reached");
-}
-
-void
-vec4_visitor::gs_end_primitive()
-{
-   unreachable("not reached");
-}
-
-void
-vec4_visitor::emit_ndc_computation()
-{
-   if (output_reg[VARYING_SLOT_POS][0].file == BAD_FILE)
-      return;
-
-   /* Get the position */
-   src_reg pos = src_reg(output_reg[VARYING_SLOT_POS][0]);
-
-   /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
-   dst_reg ndc = dst_reg(this, glsl_vec4_type());
-   output_reg[BRW_VARYING_SLOT_NDC][0] = ndc;
-   output_num_components[BRW_VARYING_SLOT_NDC][0] = 4;
-
-   current_annotation = "NDC";
-   dst_reg ndc_w = ndc;
-   ndc_w.writemask = WRITEMASK_W;
-   src_reg pos_w = pos;
-   pos_w.swizzle = BRW_SWIZZLE4(BRW_SWIZZLE_W, BRW_SWIZZLE_W, BRW_SWIZZLE_W, BRW_SWIZZLE_W);
-   emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
-
-   dst_reg ndc_xyz = ndc;
-   ndc_xyz.writemask = WRITEMASK_XYZ;
-
-   emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
-}
-
-void
-vec4_visitor::emit_psiz_and_flags(dst_reg reg)
-{
-   if (devinfo->ver < 6 &&
-       ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
-        output_reg[VARYING_SLOT_CLIP_DIST0][0].file != BAD_FILE ||
-        devinfo->has_negative_rhw_bug)) {
-      dst_reg header1 = dst_reg(this, glsl_uvec4_type());
-      dst_reg header1_w = header1;
-      header1_w.writemask = WRITEMASK_W;
-
-      emit(MOV(header1, brw_imm_ud(0u)));
-
-      if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
-	 src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ][0]);
-
-	 current_annotation = "Point size";
-	 emit(MUL(header1_w, psiz, brw_imm_f((float)(1 << 11))));
-	 emit(AND(header1_w, src_reg(header1_w), brw_imm_d(0x7ff << 8)));
-      }
-
-      if (output_reg[VARYING_SLOT_CLIP_DIST0][0].file != BAD_FILE) {
-         current_annotation = "Clipping flags";
-         dst_reg flags0 = dst_reg(this, glsl_uint_type());
-
-         emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
-         emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, brw_imm_d(0));
-         emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
-      }
-
-      if (output_reg[VARYING_SLOT_CLIP_DIST1][0].file != BAD_FILE) {
-         dst_reg flags1 = dst_reg(this, glsl_uint_type());
-         emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
-         emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, brw_imm_d(0));
-         emit(SHL(flags1, src_reg(flags1), brw_imm_d(4)));
-         emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
-      }
-
-      /* i965 clipping workaround:
-       * 1) Test for -ve rhw
-       * 2) If set,
-       *      set ndc = (0,0,0,0)
-       *      set ucp[6] = 1
-       *
-       * Later, clipping will detect ucp[6] and ensure the primitive is
-       * clipped against all fixed planes.
-       */
-      if (devinfo->has_negative_rhw_bug &&
-          output_reg[BRW_VARYING_SLOT_NDC][0].file != BAD_FILE) {
-         src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC][0]);
-         ndc_w.swizzle = BRW_SWIZZLE_WWWW;
-         emit(CMP(dst_null_f(), ndc_w, brw_imm_f(0.0f), BRW_CONDITIONAL_L));
-         vec4_instruction *inst;
-         inst = emit(OR(header1_w, src_reg(header1_w), brw_imm_ud(1u << 6)));
-         inst->predicate = BRW_PREDICATE_NORMAL;
-         output_reg[BRW_VARYING_SLOT_NDC][0].type = BRW_REGISTER_TYPE_F;
-         inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC][0], brw_imm_f(0.0f)));
-         inst->predicate = BRW_PREDICATE_NORMAL;
-      }
-
-      emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
-   } else if (devinfo->ver < 6) {
-      emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u)));
-   } else {
-      emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), brw_imm_d(0)));
-      if (output_reg[VARYING_SLOT_PSIZ][0].file != BAD_FILE) {
-         dst_reg reg_w = reg;
-         reg_w.writemask = WRITEMASK_W;
-         src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ][0]);
-         reg_as_src.type = reg_w.type;
-         reg_as_src.swizzle = brw_swizzle_for_size(1);
-         emit(MOV(reg_w, reg_as_src));
-      }
-      if (output_reg[VARYING_SLOT_LAYER][0].file != BAD_FILE) {
-         dst_reg reg_y = reg;
-         reg_y.writemask = WRITEMASK_Y;
-         reg_y.type = BRW_REGISTER_TYPE_D;
-         output_reg[VARYING_SLOT_LAYER][0].type = reg_y.type;
-         emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER][0])));
-      }
-      if (output_reg[VARYING_SLOT_VIEWPORT][0].file != BAD_FILE) {
-         dst_reg reg_z = reg;
-         reg_z.writemask = WRITEMASK_Z;
-         reg_z.type = BRW_REGISTER_TYPE_D;
-         output_reg[VARYING_SLOT_VIEWPORT][0].type = reg_z.type;
-         emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT][0])));
-      }
-   }
-}
-
-vec4_instruction *
-vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying, int component)
-{
-   assert(varying < VARYING_SLOT_MAX);
-
-   unsigned num_comps = output_num_components[varying][component];
-   if (num_comps == 0)
-      return NULL;
-
-   assert(output_reg[varying][component].type == reg.type);
-   current_annotation = output_reg_annotation[varying];
-   if (output_reg[varying][component].file != BAD_FILE) {
-      src_reg src = src_reg(output_reg[varying][component]);
-      src.swizzle = BRW_SWZ_COMP_OUTPUT(component);
-      reg.writemask =
-         brw_writemask_for_component_packing(num_comps, component);
-      return emit(MOV(reg, src));
-   }
-   return NULL;
-}
-
-void
-vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
-{
-   reg.type = BRW_REGISTER_TYPE_F;
-   output_reg[varying][0].type = reg.type;
-
-   switch (varying) {
-   case VARYING_SLOT_PSIZ:
-   {
-      /* PSIZ is always in slot 0, and is coupled with other flags. */
-      current_annotation = "indices, point width, clip flags";
-      emit_psiz_and_flags(reg);
-      break;
-   }
-   case BRW_VARYING_SLOT_NDC:
-      current_annotation = "NDC";
-      if (output_reg[BRW_VARYING_SLOT_NDC][0].file != BAD_FILE)
-         emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC][0])));
-      break;
-   case VARYING_SLOT_POS:
-      current_annotation = "gl_Position";
-      if (output_reg[VARYING_SLOT_POS][0].file != BAD_FILE)
-         emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS][0])));
-      break;
-   case BRW_VARYING_SLOT_PAD:
-      /* No need to write to this slot */
-      break;
-   default:
-      for (int i = 0; i < 4; i++) {
-         emit_generic_urb_slot(reg, varying, i);
-      }
-      break;
-   }
-}
-
-static unsigned
-align_interleaved_urb_mlen(const struct intel_device_info *devinfo,
-                           unsigned mlen)
-{
-   if (devinfo->ver >= 6) {
-      /* URB data written (does not include the message header reg) must
-       * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
-       * section 5.4.3.2.2: URB_INTERLEAVED.
-       *
-       * URB entries are allocated on a multiple of 1024 bits, so an
-       * extra 128 bits written here to make the end align to 256 is
-       * no problem.
-       */
-      if ((mlen % 2) != 1)
-	 mlen++;
-   }
-
-   return mlen;
-}
-
-
-/**
- * Generates the VUE payload plus the necessary URB write instructions to
- * output it.
- *
- * The VUE layout is documented in Volume 2a.
- */
-void
-vec4_visitor::emit_vertex()
-{
-   /* MRF 0 is reserved for the debugger, so start with message header
-    * in MRF 1.
-    */
-   int base_mrf = 1;
-   int mrf = base_mrf;
-   /* In the process of generating our URB write message contents, we
-    * may need to unspill a register or load from an array.  Those
-    * reads would use MRFs 14-15.
-    */
-   int max_usable_mrf = FIRST_SPILL_MRF(devinfo->ver);
-
-   /* The following assertion verifies that max_usable_mrf causes an
-    * even-numbered amount of URB write data, which will meet gfx6's
-    * requirements for length alignment.
-    */
-   assert ((max_usable_mrf - base_mrf) % 2 == 0);
-
-   /* First mrf is the g0-based message header containing URB handles and
-    * such.
-    */
-   emit_urb_write_header(mrf++);
-
-   if (devinfo->ver < 6) {
-      emit_ndc_computation();
-   }
-
-   /* We may need to split this up into several URB writes, so do them in a
-    * loop.
-    */
-   int slot = 0;
-   bool complete = false;
-   do {
-      /* URB offset is in URB row increments, and each of our MRFs is half of
-       * one of those, since we're doing interleaved writes.
-       */
-      int offset = slot / 2;
-
-      mrf = base_mrf + 1;
-      for (; slot < prog_data->vue_map.num_slots; ++slot) {
-         emit_urb_slot(dst_reg(MRF, mrf++),
-                       prog_data->vue_map.slot_to_varying[slot]);
-
-         /* If this was max_usable_mrf, we can't fit anything more into this
-          * URB WRITE. Same thing if we reached the maximum length available.
-          */
-         if (mrf > max_usable_mrf ||
-             align_interleaved_urb_mlen(devinfo, mrf - base_mrf + 1) > BRW_MAX_MSG_LENGTH) {
-            slot++;
-            break;
-         }
-      }
-
-      complete = slot >= prog_data->vue_map.num_slots;
-      current_annotation = "URB write";
-      vec4_instruction *inst = emit_urb_write_opcode(complete);
-      inst->base_mrf = base_mrf;
-      inst->mlen = align_interleaved_urb_mlen(devinfo, mrf - base_mrf);
-      inst->offset += offset;
-   } while(!complete);
-}
-
-
-src_reg
-vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst,
-				 src_reg *reladdr, int reg_offset)
-{
-   /* Because we store the values to scratch interleaved like our
-    * vertex data, we need to scale the vec4 index by 2.
-    */
-   int message_header_scale = 2;
-
-   /* Pre-gfx6, the message header uses byte offsets instead of vec4
-    * (16-byte) offset units.
-    */
-   if (devinfo->ver < 6)
-      message_header_scale *= 16;
-
-   if (reladdr) {
-      /* A vec4 is 16 bytes and a dvec4 is 32 bytes so for doubles we have
-       * to multiply the reladdr by 2. Notice that the reg_offset part
-       * is in units of 16 bytes and is used to select the low/high 16-byte
-       * chunk of a full dvec4, so we don't want to multiply that part.
-       */
-      src_reg index = src_reg(this, glsl_int_type());
-      if (type_sz(inst->dst.type) < 8) {
-         emit_before(block, inst, ADD(dst_reg(index), *reladdr,
-                                      brw_imm_d(reg_offset)));
-         emit_before(block, inst, MUL(dst_reg(index), index,
-                                      brw_imm_d(message_header_scale)));
-      } else {
-         emit_before(block, inst, MUL(dst_reg(index), *reladdr,
-                                      brw_imm_d(message_header_scale * 2)));
-         emit_before(block, inst, ADD(dst_reg(index), index,
-                                      brw_imm_d(reg_offset * message_header_scale)));
-      }
-      return index;
-   } else {
-      return brw_imm_d(reg_offset * message_header_scale);
-   }
-}
-
-/**
- * Emits an instruction before @inst to load the value named by @orig_src
- * from scratch space at @base_offset to @temp.
- *
- * @base_offset is measured in 32-byte units (the size of a register).
- */
-void
-vec4_visitor::emit_scratch_read(bblock_t *block, vec4_instruction *inst,
-				dst_reg temp, src_reg orig_src,
-				int base_offset)
-{
-   assert(orig_src.offset % REG_SIZE == 0);
-   int reg_offset = base_offset + orig_src.offset / REG_SIZE;
-   src_reg index = get_scratch_offset(block, inst, orig_src.reladdr,
-                                      reg_offset);
-
-   if (type_sz(orig_src.type) < 8) {
-      emit_before(block, inst, SCRATCH_READ(temp, index));
-   } else {
-      dst_reg shuffled = dst_reg(this, glsl_dvec4_type());
-      dst_reg shuffled_float = retype(shuffled, BRW_REGISTER_TYPE_F);
-      emit_before(block, inst, SCRATCH_READ(shuffled_float, index));
-      index = get_scratch_offset(block, inst, orig_src.reladdr, reg_offset + 1);
-      vec4_instruction *last_read =
-         SCRATCH_READ(byte_offset(shuffled_float, REG_SIZE), index);
-      emit_before(block, inst, last_read);
-      shuffle_64bit_data(temp, src_reg(shuffled), false, true, block, last_read);
-   }
-}
-
-/**
- * Emits an instruction after @inst to store the value to be written
- * to @orig_dst to scratch space at @base_offset, from @temp.
- *
- * @base_offset is measured in 32-byte units (the size of a register).
- */
-void
-vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
-                                 int base_offset)
-{
-   assert(inst->dst.offset % REG_SIZE == 0);
-   int reg_offset = base_offset + inst->dst.offset / REG_SIZE;
-   src_reg index = get_scratch_offset(block, inst, inst->dst.reladdr,
-                                      reg_offset);
-
-   /* Create a temporary register to store *inst's result in.
-    *
-    * We have to be careful in MOVing from our temporary result register in
-    * the scratch write.  If we swizzle from channels of the temporary that
-    * weren't initialized, it will confuse live interval analysis, which will
-    * make spilling fail to make progress.
-    */
-   bool is_64bit = type_sz(inst->dst.type) == 8;
-   const glsl_type *alloc_type =
-      is_64bit ? glsl_dvec4_type() : glsl_vec4_type();
-   const src_reg temp = swizzle(retype(src_reg(this, alloc_type),
-                                       inst->dst.type),
-                                brw_swizzle_for_mask(inst->dst.writemask));
-
-   if (!is_64bit) {
-      dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
-				          inst->dst.writemask));
-      vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
-      if (inst->opcode != BRW_OPCODE_SEL)
-         write->predicate = inst->predicate;
-      write->ir = inst->ir;
-      write->annotation = inst->annotation;
-      inst->insert_after(block, write);
-   } else {
-      dst_reg shuffled = dst_reg(this, alloc_type);
-      vec4_instruction *last =
-         shuffle_64bit_data(shuffled, temp, true, true, block, inst);
-      src_reg shuffled_float = src_reg(retype(shuffled, BRW_REGISTER_TYPE_F));
-
-      uint8_t mask = 0;
-      if (inst->dst.writemask & WRITEMASK_X)
-         mask |= WRITEMASK_XY;
-      if (inst->dst.writemask & WRITEMASK_Y)
-         mask |= WRITEMASK_ZW;
-      if (mask) {
-         dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), mask));
-
-         vec4_instruction *write = SCRATCH_WRITE(dst, shuffled_float, index);
-         if (inst->opcode != BRW_OPCODE_SEL)
-            write->predicate = inst->predicate;
-         write->ir = inst->ir;
-         write->annotation = inst->annotation;
-         last->insert_after(block, write);
-      }
-
-      mask = 0;
-      if (inst->dst.writemask & WRITEMASK_Z)
-         mask |= WRITEMASK_XY;
-      if (inst->dst.writemask & WRITEMASK_W)
-         mask |= WRITEMASK_ZW;
-      if (mask) {
-         dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), mask));
-
-         src_reg index = get_scratch_offset(block, inst, inst->dst.reladdr,
-                                            reg_offset + 1);
-         vec4_instruction *write =
-            SCRATCH_WRITE(dst, byte_offset(shuffled_float, REG_SIZE), index);
-         if (inst->opcode != BRW_OPCODE_SEL)
-            write->predicate = inst->predicate;
-         write->ir = inst->ir;
-         write->annotation = inst->annotation;
-         last->insert_after(block, write);
-      }
-   }
-
-   inst->dst.file = temp.file;
-   inst->dst.nr = temp.nr;
-   inst->dst.offset %= REG_SIZE;
-   inst->dst.reladdr = NULL;
-}
-
-/**
- * Checks if \p src and/or \p src.reladdr require a scratch read, and if so,
- * adds the scratch read(s) before \p inst. The function also checks for
- * recursive reladdr scratch accesses, issuing the corresponding scratch
- * loads and rewriting reladdr references accordingly.
- *
- * \return \p src if it did not require a scratch load, otherwise, the
- * register holding the result of the scratch load that the caller should
- * use to rewrite src.
- */
-src_reg
-vec4_visitor::emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
-                                   vec4_instruction *inst, src_reg src)
-{
-   /* Resolve recursive reladdr scratch access by calling ourselves
-    * with src.reladdr
-    */
-   if (src.reladdr)
-      *src.reladdr = emit_resolve_reladdr(scratch_loc, block, inst,
-                                          *src.reladdr);
-
-   /* Now handle scratch access on src */
-   if (src.file == VGRF && scratch_loc[src.nr] != -1) {
-      dst_reg temp = dst_reg(this, type_sz(src.type) == 8 ?
-         glsl_dvec4_type() : glsl_vec4_type());
-      emit_scratch_read(block, inst, temp, src, scratch_loc[src.nr]);
-      src.nr = temp.nr;
-      src.offset %= REG_SIZE;
-      src.reladdr = NULL;
-   }
-
-   return src;
-}
-
-/**
- * We can't generally support array access in GRF space, because a
- * single instruction's destination can only span 2 contiguous
- * registers.  So, we send all GRF arrays that get variable index
- * access to scratch space.
- */
-void
-vec4_visitor::move_grf_array_access_to_scratch()
-{
-   int scratch_loc[this->alloc.count];
-   memset(scratch_loc, -1, sizeof(scratch_loc));
-
-   /* First, calculate the set of virtual GRFs that need to be punted
-    * to scratch due to having any array access on them, and where in
-    * scratch.
-    */
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      if (inst->dst.file == VGRF && inst->dst.reladdr) {
-         if (scratch_loc[inst->dst.nr] == -1) {
-            scratch_loc[inst->dst.nr] = last_scratch;
-            last_scratch += this->alloc.sizes[inst->dst.nr];
-         }
-
-         for (src_reg *iter = inst->dst.reladdr;
-              iter->reladdr;
-              iter = iter->reladdr) {
-            if (iter->file == VGRF && scratch_loc[iter->nr] == -1) {
-               scratch_loc[iter->nr] = last_scratch;
-               last_scratch += this->alloc.sizes[iter->nr];
-            }
-         }
-      }
-
-      for (int i = 0 ; i < 3; i++) {
-         for (src_reg *iter = &inst->src[i];
-              iter->reladdr;
-              iter = iter->reladdr) {
-            if (iter->file == VGRF && scratch_loc[iter->nr] == -1) {
-               scratch_loc[iter->nr] = last_scratch;
-               last_scratch += this->alloc.sizes[iter->nr];
-            }
-         }
-      }
-   }
-
-   /* Now, for anything that will be accessed through scratch, rewrite
-    * it to load/store.  Note that this is a _safe list walk, because
-    * we may generate a new scratch_write instruction after the one
-    * we're processing.
-    */
-   foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
-      /* Set up the annotation tracking for new generated instructions. */
-      base_ir = inst->ir;
-      current_annotation = inst->annotation;
-
-      /* First handle scratch access on the dst. Notice we have to handle
-       * the case where the dst's reladdr also points to scratch space.
-       */
-      if (inst->dst.reladdr)
-         *inst->dst.reladdr = emit_resolve_reladdr(scratch_loc, block, inst,
-                                                   *inst->dst.reladdr);
-
-      /* Now that we have handled any (possibly recursive) reladdr scratch
-       * accesses for dst we can safely do the scratch write for dst itself
-       */
-      if (inst->dst.file == VGRF && scratch_loc[inst->dst.nr] != -1)
-         emit_scratch_write(block, inst, scratch_loc[inst->dst.nr]);
-
-      /* Now handle scratch access on any src. In this case, since inst->src[i]
-       * already is a src_reg, we can just call emit_resolve_reladdr with
-       * inst->src[i] and it will take care of handling scratch loads for
-       * both src and src.reladdr (recursively).
-       */
-      for (int i = 0 ; i < 3; i++) {
-         inst->src[i] = emit_resolve_reladdr(scratch_loc, block, inst,
-                                             inst->src[i]);
-      }
-   }
-}
-
-void
-vec4_visitor::resolve_ud_negate(src_reg *reg)
-{
-   if (reg->type != BRW_REGISTER_TYPE_UD ||
-       !reg->negate)
-      return;
-
-   src_reg temp = src_reg(this, glsl_uvec4_type());
-   emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
-   *reg = temp;
-}
-
-static brw_rnd_mode
-brw_rnd_mode_from_execution_mode(unsigned execution_mode)
-{
-   if (nir_has_any_rounding_mode_rtne(execution_mode))
-      return BRW_RND_MODE_RTNE;
-   if (nir_has_any_rounding_mode_rtz(execution_mode))
-      return BRW_RND_MODE_RTZ;
-   return BRW_RND_MODE_UNSPECIFIED;
-}
-
-void
-vec4_visitor::emit_shader_float_controls_execution_mode()
-{
-   unsigned execution_mode = this->nir->info.float_controls_execution_mode;
-   if (nir_has_any_rounding_mode_enabled(execution_mode)) {
-      brw_rnd_mode rnd = brw_rnd_mode_from_execution_mode(execution_mode);
-      const vec4_builder bld = vec4_builder(this).at_end();
-      bld.exec_all().emit(SHADER_OPCODE_RND_MODE, dst_null_ud(), brw_imm_d(rnd));
-   }
-}
-
-vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
-                           const struct brw_compile_params *params,
-                           const struct brw_sampler_prog_key_data *key_tex,
-                           struct brw_vue_prog_data *prog_data,
-                           const nir_shader *shader,
-                           bool no_spills,
-                           bool debug_enabled)
-   : backend_shader(compiler, params, shader, &prog_data->base, debug_enabled),
-     key_tex(key_tex),
-     prog_data(prog_data),
-     fail_msg(NULL),
-     first_non_payload_grf(0),
-     ubo_push_start(),
-     push_length(0),
-     live_analysis(this), performance_analysis(this),
-     no_spills(no_spills),
-     last_scratch(0)
-{
-   this->failed = false;
-
-   this->base_ir = NULL;
-   this->current_annotation = NULL;
-   memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
-
-   memset(this->output_num_components, 0, sizeof(this->output_num_components));
-
-   this->max_grf = devinfo->ver >= 7 ? GFX7_MRF_HACK_START : BRW_MAX_GRF;
-
-   this->uniforms = 0;
-
-   this->nir_ssa_values = NULL;
-}
-
-
-void
-vec4_visitor::fail(const char *format, ...)
-{
-   va_list va;
-   char *msg;
-
-   if (failed)
-      return;
-
-   failed = true;
-
-   va_start(va, format);
-   msg = ralloc_vasprintf(mem_ctx, format, va);
-   va_end(va);
-   msg = ralloc_asprintf(mem_ctx, "%s compile failed: %s\n",
-                         _mesa_shader_stage_to_abbrev(stage), msg);
-
-   this->fail_msg = msg;
-
-   if (unlikely(debug_enabled)) {
-      fprintf(stderr, "%s",  msg);
-   }
-}
-
-} /* namespace brw */
diff --git a/src/intel/compiler/brw_vec4_vs.h b/src/intel/compiler/brw_vec4_vs.h
deleted file mode 100644
index 0929df5ff3d..00000000000
--- a/src/intel/compiler/brw_vec4_vs.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright © 2006 - 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_VEC4_VS_VISITOR_H
-#define BRW_VEC4_VS_VISITOR_H
-
-#include "brw_vec4.h"
-
-namespace brw {
-
-class vec4_vs_visitor : public vec4_visitor
-{
-public:
-   vec4_vs_visitor(const struct brw_compiler *compiler,
-                   const struct brw_compile_params *params,
-                   const struct brw_vs_prog_key *key,
-                   struct brw_vs_prog_data *vs_prog_data,
-                   const nir_shader *shader,
-                   bool debug_enabled);
-
-protected:
-   virtual void setup_payload();
-   virtual void emit_prolog();
-   virtual void emit_thread_end();
-   virtual void emit_urb_write_header(int mrf);
-   virtual void emit_urb_slot(dst_reg reg, int varying);
-   virtual vec4_instruction *emit_urb_write_opcode(bool complete);
-
-private:
-   int setup_attributes(int payload_reg);
-
-   const struct brw_vs_prog_key *const key;
-   struct brw_vs_prog_data * const vs_prog_data;
-};
-
-} /* namespace brw */
-
-#endif /* BRW_VEC4_VS_VISITOR_H */
diff --git a/src/intel/compiler/brw_vec4_vs_visitor.cpp b/src/intel/compiler/brw_vec4_vs_visitor.cpp
deleted file mode 100644
index c30a3434451..00000000000
--- a/src/intel/compiler/brw_vec4_vs_visitor.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-
-#include "brw_vec4_vs.h"
-#include "dev/intel_debug.h"
-
-namespace brw {
-
-void
-vec4_vs_visitor::emit_prolog()
-{
-}
-
-
-void
-vec4_vs_visitor::emit_urb_write_header(int mrf)
-{
-   /* No need to do anything for VS; an implied write to this MRF will be
-    * performed by VEC4_VS_OPCODE_URB_WRITE.
-    */
-   (void) mrf;
-}
-
-
-vec4_instruction *
-vec4_vs_visitor::emit_urb_write_opcode(bool complete)
-{
-   vec4_instruction *inst = emit(VEC4_VS_OPCODE_URB_WRITE);
-   inst->urb_write_flags = complete ?
-      BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
-
-   return inst;
-}
-
-
-void
-vec4_vs_visitor::emit_urb_slot(dst_reg reg, int varying)
-{
-   reg.type = BRW_REGISTER_TYPE_F;
-   output_reg[varying][0].type = reg.type;
-
-   switch (varying) {
-   case VARYING_SLOT_COL0:
-   case VARYING_SLOT_COL1:
-   case VARYING_SLOT_BFC0:
-   case VARYING_SLOT_BFC1: {
-      /* These built-in varyings are only supported in compatibility mode,
-       * and we only support GS in core profile.  So, this must be a vertex
-       * shader.
-       */
-      vec4_instruction *inst = emit_generic_urb_slot(reg, varying, 0);
-      if (inst && key->clamp_vertex_color)
-         inst->saturate = true;
-      break;
-   }
-   default:
-      return vec4_visitor::emit_urb_slot(reg, varying);
-   }
-}
-
-
-void
-vec4_vs_visitor::emit_thread_end()
-{
-   /* For VS, we always end the thread by emitting a single vertex.
-    * emit_urb_write_opcode() will take care of setting the eot flag on the
-    * SEND instruction.
-    */
-   emit_vertex();
-}
-
-
-vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
-                                 const struct brw_compile_params *params,
-                                 const struct brw_vs_prog_key *key,
-                                 struct brw_vs_prog_data *vs_prog_data,
-                                 const nir_shader *shader,
-                                 bool debug_enabled)
-   : vec4_visitor(compiler, params, &key->base.tex, &vs_prog_data->base,
-                  shader, false /* no_spills */, debug_enabled),
-     key(key),
-     vs_prog_data(vs_prog_data)
-{
-}
-
-
-} /* namespace brw */
diff --git a/src/intel/compiler/gfx6_gs_visitor.cpp b/src/intel/compiler/gfx6_gs_visitor.cpp
deleted file mode 100644
index 5465094ed36..00000000000
--- a/src/intel/compiler/gfx6_gs_visitor.cpp
+++ /dev/null
@@ -1,702 +0,0 @@
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * This code is based on original work by Ilia Mirkin.
- */
-
-/**
- * \file gfx6_gs_visitor.cpp
- *
- * Gfx6 geometry shader implementation
- */
-
-#include "gfx6_gs_visitor.h"
-#include "brw_eu.h"
-#include "brw_prim.h"
-
-namespace brw {
-
-void
-gfx6_gs_visitor::emit_prolog()
-{
-   vec4_gs_visitor::emit_prolog();
-
-   /* Gfx6 geometry shaders require to allocate an initial VUE handle via
-    * FF_SYNC message, however the documentation remarks that only one thread
-    * can write to the URB simultaneously and the FF_SYNC message provides the
-    * synchronization mechanism for this, so using this message effectively
-    * stalls the thread until it is its turn to write to the URB. Because of
-    * this, the best way to implement geometry shader algorithms in gfx6 is to
-    * execute the algorithm before the FF_SYNC message to maximize parallelism.
-    *
-    * To achieve this we buffer the geometry shader outputs for each emitted
-    * vertex in vertex_output during operation. Then, when we have processed
-    * the last vertex (that is, at thread end time), we send the FF_SYNC
-    * message to allocate the initial VUE handle and write all buffered vertex
-    * data to the URB in one go.
-    *
-    * For each emitted vertex, vertex_output will hold vue_map.num_slots
-    * data items plus one additional item to hold required flags
-    * (PrimType, PrimStart, PrimEnd, as expected by the URB_WRITE message)
-    * which come right after the data items for that vertex. Vertex data and
-    * flags for the next vertex come right after the data items and flags for
-    * the previous vertex.
-    */
-   this->current_annotation = "gfx6 prolog";
-   this->vertex_output = src_reg(this,
-                                 glsl_uint_type(),
-                                 (prog_data->vue_map.num_slots + 1) *
-                                 nir->info.gs.vertices_out);
-   this->vertex_output_offset = src_reg(this, glsl_uint_type());
-   emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u)));
-
-   /* MRF 1 will be the header for all messages (FF_SYNC and URB_WRITES),
-    * so initialize it once to R0.
-    */
-   vec4_instruction *inst = emit(MOV(dst_reg(MRF, 1),
-                                     retype(brw_vec8_grf(0, 0),
-                                            BRW_REGISTER_TYPE_UD)));
-   inst->force_writemask_all = true;
-
-   /* This will be used as a temporary to store writeback data of FF_SYNC
-    * and URB_WRITE messages.
-    */
-   this->temp = src_reg(this, glsl_uint_type());
-
-   /* This will be used to know when we are processing the first vertex of
-    * a primitive. We will set this to URB_WRITE_PRIM_START only when we know
-    * that we are processing the first vertex in the primitive and to zero
-    * otherwise. This way we can use its value directly in the URB write
-    * headers.
-    */
-   this->first_vertex = src_reg(this, glsl_uint_type());
-   emit(MOV(dst_reg(this->first_vertex), brw_imm_ud(URB_WRITE_PRIM_START)));
-
-   /* The FF_SYNC message requires to know the number of primitives generated,
-    * so keep a counter for this.
-    */
-   this->prim_count = src_reg(this, glsl_uint_type());
-   emit(MOV(dst_reg(this->prim_count), brw_imm_ud(0u)));
-
-   if (gs_prog_data->num_transform_feedback_bindings) {
-      /* Create a virtual register to hold destination indices in SOL */
-      this->destination_indices = src_reg(this, glsl_uvec4_type());
-      /* Create a virtual register to hold number of written primitives */
-      this->sol_prim_written = src_reg(this, glsl_uint_type());
-      /* Create a virtual register to hold Streamed Vertex Buffer Indices */
-      this->svbi = src_reg(this, glsl_uvec4_type());
-      /* Create a virtual register to hold max values of SVBI */
-      this->max_svbi = src_reg(this, glsl_uvec4_type());
-      emit(MOV(dst_reg(this->max_svbi),
-               src_reg(retype(brw_vec1_grf(1, 4), BRW_REGISTER_TYPE_UD))));
-   }
-
-   /* PrimitveID is delivered in r0.1 of the thread payload. If the program
-    * needs it we have to move it to a separate register where we can map
-    * the attribute.
-    *
-    * Notice that we cannot use a virtual register for this, because we need to
-    * map all input attributes to hardware registers in setup_payload(),
-    * which happens before virtual registers are mapped to hardware registers.
-    * We could work around that issue if we were able to compute the first
-    * non-payload register here and move the PrimitiveID information to that
-    * register, but we can't because at this point we don't know the final
-    * number uniforms that will be included in the payload.
-    *
-    * So, what we do is to place PrimitiveID information in r1, which is always
-    * delivered as part of the payload, but its only populated with data
-    * relevant for transform feedback when we set GFX6_GS_SVBI_PAYLOAD_ENABLE
-    * in the 3DSTATE_GS state packet. That information can be obtained by other
-    * means though, so we can safely use r1 for this purpose.
-    */
-   if (gs_prog_data->include_primitive_id) {
-      this->primitive_id =
-         src_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
-      emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this->primitive_id));
-   }
-}
-
-void
-gfx6_gs_visitor::gs_emit_vertex(int stream_id)
-{
-   this->current_annotation = "gfx6 emit vertex";
-
-   /* Buffer all output slots for this vertex in vertex_output */
-   for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
-      int varying = prog_data->vue_map.slot_to_varying[slot];
-      if (varying != VARYING_SLOT_PSIZ) {
-         dst_reg dst(this->vertex_output);
-         dst.reladdr = ralloc(mem_ctx, src_reg);
-         memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-         emit_urb_slot(dst, varying);
-      } else {
-         /* The PSIZ slot can pack multiple varyings in different channels
-          * and emit_urb_slot() will produce a MOV instruction for each of
-          * them. Since we are writing to an array, that will translate to
-          * possibly multiple MOV instructions with an array destination and
-          * each will generate a scratch write with the same offset into
-          * scratch space (thus, each one overwriting the previous). This is
-          * not what we want. What we will do instead is emit PSIZ to a
-          * a regular temporary register, then move that register into the
-          * array. This way we only have one instruction with an array
-          * destination and we only produce a single scratch write.
-          */
-         dst_reg tmp = dst_reg(src_reg(this, glsl_uvec4_type()));
-         emit_urb_slot(tmp, varying);
-         dst_reg dst(this->vertex_output);
-         dst.reladdr = ralloc(mem_ctx, src_reg);
-         memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-         vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
-         inst->force_writemask_all = true;
-      }
-
-      emit(ADD(dst_reg(this->vertex_output_offset),
-               this->vertex_output_offset, brw_imm_ud(1u)));
-   }
-
-   /* Now buffer flags for this vertex */
-   dst_reg dst(this->vertex_output);
-   dst.reladdr = ralloc(mem_ctx, src_reg);
-   memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-   if (nir->info.gs.output_primitive == MESA_PRIM_POINTS) {
-      /* If we are outputting points, then every vertex has PrimStart and
-       * PrimEnd set.
-       */
-      emit(MOV(dst, brw_imm_d((_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
-                              URB_WRITE_PRIM_START | URB_WRITE_PRIM_END)));
-      emit(ADD(dst_reg(this->prim_count), this->prim_count, brw_imm_ud(1u)));
-   } else {
-      /* Otherwise, we can only set the PrimStart flag, which we have stored
-       * in the first_vertex register. We will have to wait until we execute
-       * EndPrimitive() or we end the thread to set the PrimEnd flag on a
-       * vertex.
-       */
-      emit(OR(dst, this->first_vertex,
-              brw_imm_ud(gs_prog_data->output_topology <<
-                         URB_WRITE_PRIM_TYPE_SHIFT)));
-      emit(MOV(dst_reg(this->first_vertex), brw_imm_ud(0u)));
-   }
-   emit(ADD(dst_reg(this->vertex_output_offset),
-            this->vertex_output_offset, brw_imm_ud(1u)));
-}
-
-void
-gfx6_gs_visitor::gs_end_primitive()
-{
-   this->current_annotation = "gfx6 end primitive";
-   /* Calling EndPrimitive() is optional for point output. In this case we set
-    * the PrimEnd flag when we process EmitVertex().
-    */
-   if (nir->info.gs.output_primitive == MESA_PRIM_POINTS)
-      return;
-
-   /* Otherwise we know that the last vertex we have processed was the last
-    * vertex in the primitive and we need to set its PrimEnd flag, so do this
-    * unless we haven't emitted that vertex at all (vertex_count != 0).
-    *
-    * Notice that we have already incremented vertex_count when we processed
-    * the last emit_vertex, so we need to take that into account in the
-    * comparison below (hence the num_output_vertices + 1 in the comparison
-    * below).
-    */
-   unsigned num_output_vertices = nir->info.gs.vertices_out;
-   emit(CMP(dst_null_ud(), this->vertex_count,
-            brw_imm_ud(num_output_vertices + 1), BRW_CONDITIONAL_L));
-   vec4_instruction *inst = emit(CMP(dst_null_ud(),
-                                     this->vertex_count, brw_imm_ud(0u),
-                                     BRW_CONDITIONAL_NEQ));
-   inst->predicate = BRW_PREDICATE_NORMAL;
-   emit(IF(BRW_PREDICATE_NORMAL));
-   {
-      /* vertex_output_offset is already pointing at the first entry of the
-       * next vertex. So subtract 1 to modify the flags for the previous
-       * vertex.
-       */
-      src_reg offset(this, glsl_uint_type());
-      emit(ADD(dst_reg(offset), this->vertex_output_offset, brw_imm_d(-1)));
-
-      src_reg dst(this->vertex_output);
-      dst.reladdr = ralloc(mem_ctx, src_reg);
-      memcpy(dst.reladdr, &offset, sizeof(src_reg));
-
-      emit(OR(dst_reg(dst), dst, brw_imm_d(URB_WRITE_PRIM_END)));
-      emit(ADD(dst_reg(this->prim_count), this->prim_count, brw_imm_ud(1u)));
-
-      /* Set the first vertex flag to indicate that the next vertex will start
-       * a primitive.
-       */
-      emit(MOV(dst_reg(this->first_vertex), brw_imm_d(URB_WRITE_PRIM_START)));
-   }
-   emit(BRW_OPCODE_ENDIF);
-}
-
-void
-gfx6_gs_visitor::emit_urb_write_header(int mrf)
-{
-   this->current_annotation = "gfx6 urb header";
-   /* Compute offset of the flags for the current vertex in vertex_output and
-    * write them in dw2 of the message header.
-    *
-    * Notice that by the time that emit_thread_end() calls here
-    * vertex_output_offset should point to the first data item of the current
-    * vertex in vertex_output, thus we only need to add the number of output
-    * slots per vertex to that offset to obtain the flags data offset.
-    */
-   src_reg flags_offset(this, glsl_uint_type());
-   emit(ADD(dst_reg(flags_offset),
-            this->vertex_output_offset,
-            brw_imm_d(prog_data->vue_map.num_slots)));
-
-   src_reg flags_data(this->vertex_output);
-   flags_data.reladdr = ralloc(mem_ctx, src_reg);
-   memcpy(flags_data.reladdr, &flags_offset, sizeof(src_reg));
-
-   emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, mrf), flags_data);
-}
-
-static unsigned
-align_interleaved_urb_mlen(unsigned mlen)
-{
-   /* URB data written (does not include the message header reg) must
-    * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
-    * section 5.4.3.2.2: URB_INTERLEAVED.
-    */
-   if ((mlen % 2) != 1)
-      mlen++;
-   return mlen;
-}
-
-void
-gfx6_gs_visitor::emit_snb_gs_urb_write_opcode(bool complete, int base_mrf,
-                                              int last_mrf, int urb_offset)
-{
-   vec4_instruction *inst = NULL;
-
-   if (!complete) {
-      /* If the vertex is not complete we don't have to do anything special */
-      inst = emit(VEC4_GS_OPCODE_URB_WRITE);
-      inst->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
-   } else {
-      /* Otherwise we always request to allocate a new VUE handle. If this is
-       * the last write before the EOT message and the new handle never gets
-       * used it will be dereferenced when we send the EOT message. This is
-       * necessary to avoid different setups for the EOT message (one for the
-       * case when there is no output and another for the case when there is)
-       * which would require to end the program with an IF/ELSE/ENDIF block,
-       * something we do not want.
-       */
-      inst = emit(VEC4_GS_OPCODE_URB_WRITE_ALLOCATE);
-      inst->urb_write_flags = BRW_URB_WRITE_COMPLETE;
-      inst->dst = dst_reg(MRF, base_mrf);
-      inst->src[0] = this->temp;
-   }
-
-   inst->base_mrf = base_mrf;
-   inst->mlen = align_interleaved_urb_mlen(last_mrf - base_mrf);
-   inst->offset = urb_offset;
-}
-
-void
-gfx6_gs_visitor::emit_thread_end()
-{
-   /* Make sure the current primitive is ended: we know it is not ended when
-    * first_vertex is not zero. This is only relevant for outputs other than
-    * points because in the point case we set PrimEnd on all vertices.
-    */
-   if (nir->info.gs.output_primitive != MESA_PRIM_POINTS) {
-      emit(CMP(dst_null_ud(), this->first_vertex, brw_imm_ud(0u), BRW_CONDITIONAL_Z));
-      emit(IF(BRW_PREDICATE_NORMAL));
-      gs_end_primitive();
-      emit(BRW_OPCODE_ENDIF);
-   }
-
-   /* Here we have to:
-    * 1) Emit an FF_SYNC message to obtain an initial VUE handle.
-    * 2) Loop over all buffered vertex data and write it to corresponding
-    *    URB entries.
-    * 3) Allocate new VUE handles for all vertices other than the first.
-    * 4) Send a final EOT message.
-    */
-
-   /* MRF 0 is reserved for the debugger, so start with message header
-    * in MRF 1.
-    */
-   int base_mrf = 1;
-
-   /* In the process of generating our URB write message contents, we
-    * may need to unspill a register or load from an array.  Those
-    * reads would use MRFs 21..23
-    */
-   int max_usable_mrf = FIRST_SPILL_MRF(devinfo->ver);
-
-   /* Issue the FF_SYNC message and obtain the initial VUE handle. */
-   this->current_annotation = "gfx6 thread end: ff_sync";
-
-   vec4_instruction *inst = NULL;
-   if (gs_prog_data->num_transform_feedback_bindings) {
-      src_reg sol_temp(this, glsl_uvec4_type());
-      emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
-           dst_reg(this->svbi),
-           this->vertex_count,
-           this->prim_count,
-           sol_temp);
-      inst = emit(GS_OPCODE_FF_SYNC,
-                  dst_reg(this->temp), this->prim_count, this->svbi);
-   } else {
-      inst = emit(GS_OPCODE_FF_SYNC,
-                  dst_reg(this->temp), this->prim_count, brw_imm_ud(0u));
-   }
-   inst->base_mrf = base_mrf;
-
-   emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_G));
-   emit(IF(BRW_PREDICATE_NORMAL));
-   {
-      /* Loop over all buffered vertices and emit URB write messages */
-      this->current_annotation = "gfx6 thread end: urb writes init";
-      src_reg vertex(this, glsl_uint_type());
-      emit(MOV(dst_reg(vertex), brw_imm_ud(0u)));
-      emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u)));
-
-      this->current_annotation = "gfx6 thread end: urb writes";
-      emit(BRW_OPCODE_DO);
-      {
-         emit(CMP(dst_null_d(), vertex, this->vertex_count, BRW_CONDITIONAL_GE));
-         inst = emit(BRW_OPCODE_BREAK);
-         inst->predicate = BRW_PREDICATE_NORMAL;
-
-         /* First we prepare the message header */
-         emit_urb_write_header(base_mrf);
-
-         /* Then add vertex data to the message in interleaved fashion */
-         int slot = 0;
-         bool complete = false;
-         do {
-            int mrf = base_mrf + 1;
-
-            /* URB offset is in URB row increments, and each of our MRFs is half
-             * of one of those, since we're doing interleaved writes.
-             */
-            int urb_offset = slot / 2;
-
-            for (; slot < prog_data->vue_map.num_slots; ++slot) {
-               int varying = prog_data->vue_map.slot_to_varying[slot];
-               current_annotation = output_reg_annotation[varying];
-
-               /* Compute offset of this slot for the current vertex
-                * in vertex_output
-                */
-               src_reg data(this->vertex_output);
-               data.reladdr = ralloc(mem_ctx, src_reg);
-               memcpy(data.reladdr, &this->vertex_output_offset,
-                      sizeof(src_reg));
-
-               /* Copy this slot to the appropriate message register */
-               dst_reg reg = dst_reg(MRF, mrf);
-               reg.type = output_reg[varying][0].type;
-               data.type = reg.type;
-               inst = emit(MOV(reg, data));
-               inst->force_writemask_all = true;
-
-               mrf++;
-               emit(ADD(dst_reg(this->vertex_output_offset),
-                        this->vertex_output_offset, brw_imm_ud(1u)));
-
-               /* If this was max_usable_mrf, we can't fit anything more into
-                * this URB WRITE. Same if we reached the max. message length.
-                */
-               if (mrf > max_usable_mrf ||
-                   align_interleaved_urb_mlen(mrf - base_mrf + 1) > BRW_MAX_MSG_LENGTH) {
-                  slot++;
-                  break;
-               }
-            }
-
-            complete = slot >= prog_data->vue_map.num_slots;
-            emit_snb_gs_urb_write_opcode(complete, base_mrf, mrf, urb_offset);
-         } while (!complete);
-
-         /* Skip over the flags data item so that vertex_output_offset points
-          * to the first data item of the next vertex, so that we can start
-          * writing the next vertex.
-          */
-         emit(ADD(dst_reg(this->vertex_output_offset),
-                  this->vertex_output_offset, brw_imm_ud(1u)));
-
-         emit(ADD(dst_reg(vertex), vertex, brw_imm_ud(1u)));
-      }
-      emit(BRW_OPCODE_WHILE);
-
-      if (gs_prog_data->num_transform_feedback_bindings)
-         xfb_write();
-   }
-   emit(BRW_OPCODE_ENDIF);
-
-   /* Finally, emit EOT message.
-    *
-    * In gfx6 we need to end the thread differently depending on whether we have
-    * emitted at least one vertex or not. In case we did, the EOT message must
-    * always include the COMPLETE flag or else the GPU hangs. If we have not
-    * produced any output we can't use the COMPLETE flag.
-    *
-    * However, this would lead us to end the program with an ENDIF opcode,
-    * which we want to avoid, so what we do is that we always request a new
-    * VUE handle every time, even if GS produces no output.
-    * With this we make sure that whether we have emitted at least one vertex
-    * or none at all, we have to finish the thread without writing to the URB,
-    * which works for both cases by setting the COMPLETE and UNUSED flags in
-    * the EOT message.
-    */
-   this->current_annotation = "gfx6 thread end: EOT";
-
-   if (gs_prog_data->num_transform_feedback_bindings) {
-      /* When emitting EOT, set SONumPrimsWritten Increment Value. */
-      src_reg data(this, glsl_uint_type());
-      emit(AND(dst_reg(data), this->sol_prim_written, brw_imm_ud(0xffffu)));
-      emit(SHL(dst_reg(data), data, brw_imm_ud(16u)));
-      emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data);
-   }
-
-   inst = emit(GS_OPCODE_THREAD_END);
-   inst->urb_write_flags = BRW_URB_WRITE_COMPLETE | BRW_URB_WRITE_UNUSED;
-   inst->base_mrf = base_mrf;
-   inst->mlen = 1;
-}
-
-void
-gfx6_gs_visitor::setup_payload()
-{
-   int attribute_map[BRW_VARYING_SLOT_COUNT * MAX_GS_INPUT_VERTICES];
-
-   /* Attributes are going to be interleaved, so one register contains two
-    * attribute slots.
-    */
-   int attributes_per_reg = 2;
-
-   /* If a geometry shader tries to read from an input that wasn't written by
-    * the vertex shader, that produces undefined results, but it shouldn't
-    * crash anything.  So initialize attribute_map to zeros--that ensures that
-    * these undefined results are read from r0.
-    */
-   memset(attribute_map, 0, sizeof(attribute_map));
-
-   int reg = 0;
-
-   /* The payload always contains important data in r0. */
-   reg++;
-
-   /* r1 is always part of the payload and it holds information relevant
-    * for transform feedback when we set the GFX6_GS_SVBI_PAYLOAD_ENABLE bit in
-    * the 3DSTATE_GS packet. We will overwrite it with the PrimitiveID
-    * information (and move the original value to a virtual register if
-    * necessary).
-    */
-   if (gs_prog_data->include_primitive_id)
-      attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg;
-   reg++;
-
-   reg = setup_uniforms(reg);
-
-   reg = setup_varying_inputs(reg, attributes_per_reg);
-
-   this->first_non_payload_grf = reg;
-}
-
-void
-gfx6_gs_visitor::xfb_write()
-{
-   unsigned num_verts;
-
-   switch (gs_prog_data->output_topology) {
-   case _3DPRIM_POINTLIST:
-      num_verts = 1;
-      break;
-   case _3DPRIM_LINELIST:
-   case _3DPRIM_LINESTRIP:
-   case _3DPRIM_LINELOOP:
-      num_verts = 2;
-      break;
-   case _3DPRIM_TRILIST:
-   case _3DPRIM_TRIFAN:
-   case _3DPRIM_TRISTRIP:
-   case _3DPRIM_RECTLIST:
-      num_verts = 3;
-      break;
-   case _3DPRIM_QUADLIST:
-   case _3DPRIM_QUADSTRIP:
-   case _3DPRIM_POLYGON:
-      num_verts = 3;
-      break;
-   default:
-      unreachable("Unexpected primitive type in Gfx6 SOL program.");
-   }
-
-   this->current_annotation = "gfx6 thread end: svb writes init";
-
-   emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u)));
-   emit(MOV(dst_reg(this->sol_prim_written), brw_imm_ud(0u)));
-
-   /* Check that at least one primitive can be written
-    *
-    * Note: since we use the binding table to keep track of buffer offsets
-    * and stride, the GS doesn't need to keep track of a separate pointer
-    * into each buffer; it uses a single pointer which increments by 1 for
-    * each vertex.  So we use SVBI0 for this pointer, regardless of whether
-    * transform feedback is in interleaved or separate attribs mode.
-    */
-   src_reg sol_temp(this, glsl_uvec4_type());
-   emit(ADD(dst_reg(sol_temp), this->svbi, brw_imm_ud(num_verts)));
-
-   /* Compare SVBI calculated number with the maximum value, which is
-    * in R1.4 (previously saved in this->max_svbi) for gfx6.
-    */
-   emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
-   emit(IF(BRW_PREDICATE_NORMAL));
-   {
-      vec4_instruction *inst = emit(MOV(dst_reg(destination_indices),
-                                        brw_imm_vf4(brw_float_to_vf(0.0),
-                                                    brw_float_to_vf(1.0),
-                                                    brw_float_to_vf(2.0),
-                                                    brw_float_to_vf(0.0))));
-      inst->force_writemask_all = true;
-
-      emit(ADD(dst_reg(this->destination_indices),
-               this->destination_indices,
-               this->svbi));
-   }
-   emit(BRW_OPCODE_ENDIF);
-
-   /* Write transform feedback data for all processed vertices. */
-   for (int i = 0; i < (int)nir->info.gs.vertices_out; i++) {
-      emit(MOV(dst_reg(sol_temp), brw_imm_d(i)));
-      emit(CMP(dst_null_d(), sol_temp, this->vertex_count,
-               BRW_CONDITIONAL_L));
-      emit(IF(BRW_PREDICATE_NORMAL));
-      {
-         xfb_program(i, num_verts);
-      }
-      emit(BRW_OPCODE_ENDIF);
-   }
-}
-
-void
-gfx6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
-{
-   unsigned binding;
-   unsigned num_bindings = gs_prog_data->num_transform_feedback_bindings;
-   src_reg sol_temp(this, glsl_uvec4_type());
-
-   /* Check for buffer overflow: we need room to write the complete primitive
-    * (all vertices). Otherwise, avoid writing any vertices for it
-    */
-   emit(ADD(dst_reg(sol_temp), this->sol_prim_written, brw_imm_ud(1u)));
-   emit(MUL(dst_reg(sol_temp), sol_temp, brw_imm_ud(num_verts)));
-   emit(ADD(dst_reg(sol_temp), sol_temp, this->svbi));
-   emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
-   emit(IF(BRW_PREDICATE_NORMAL));
-   {
-      /* Avoid overwriting MRF 1 as it is used as URB write message header */
-      dst_reg mrf_reg(MRF, 2);
-
-      this->current_annotation = "gfx6: emit SOL vertex data";
-      /* For each vertex, generate code to output each varying using the
-       * appropriate binding table entry.
-       */
-      for (binding = 0; binding < num_bindings; ++binding) {
-         unsigned char varying =
-            gs_prog_data->transform_feedback_bindings[binding];
-
-         /* Set up the correct destination index for this vertex */
-         vec4_instruction *inst = emit(GS_OPCODE_SVB_SET_DST_INDEX,
-                                       mrf_reg,
-                                       this->destination_indices);
-         inst->sol_vertex = vertex % num_verts;
-
-         /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
-          *
-          *   "Prior to End of Thread with a URB_WRITE, the kernel must
-          *   ensure that all writes are complete by sending the final
-          *   write as a committed write."
-          */
-         bool final_write = binding == (unsigned) num_bindings - 1 &&
-                            inst->sol_vertex == num_verts - 1;
-
-         /* Compute offset of this varying for the current vertex
-          * in vertex_output
-          */
-         this->current_annotation = output_reg_annotation[varying];
-         src_reg data(this->vertex_output);
-         data.reladdr = ralloc(mem_ctx, src_reg);
-         int offset = get_vertex_output_offset_for_varying(vertex, varying);
-         emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_d(offset)));
-         memcpy(data.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-         data.type = output_reg[varying][0].type;
-         data.swizzle = gs_prog_data->transform_feedback_swizzles[binding];
-
-         /* Write data */
-         inst = emit(GS_OPCODE_SVB_WRITE, mrf_reg, data, sol_temp);
-         inst->sol_binding = binding;
-         inst->sol_final_write = final_write;
-
-         if (final_write) {
-            /* This is the last vertex of the primitive, then increment
-             * SO num primitive counter and destination indices.
-             */
-            emit(ADD(dst_reg(this->destination_indices),
-                     this->destination_indices,
-                     brw_imm_ud(num_verts)));
-            emit(ADD(dst_reg(this->sol_prim_written),
-                     this->sol_prim_written, brw_imm_ud(1u)));
-         }
-
-      }
-      this->current_annotation = NULL;
-   }
-   emit(BRW_OPCODE_ENDIF);
-}
-
-int
-gfx6_gs_visitor::get_vertex_output_offset_for_varying(int vertex, int varying)
-{
-   /* Find the output slot assigned to this varying.
-    *
-    * VARYING_SLOT_LAYER and VARYING_SLOT_VIEWPORT are packed in the same slot
-    * as VARYING_SLOT_PSIZ.
-    */
-   if (varying == VARYING_SLOT_LAYER || varying == VARYING_SLOT_VIEWPORT)
-      varying = VARYING_SLOT_PSIZ;
-   int slot = prog_data->vue_map.varying_to_slot[varying];
-
-   if (slot < 0) {
-      /* This varying does not exist in the VUE so we are not writing to it
-       * and its value is undefined. We still want to return a valid offset
-       * into vertex_output though, to prevent any out-of-bound accesses into
-       * the vertex_output array. Since the value for this varying is undefined
-       * we don't really care for the value we assign to it, so any offset
-       * within the limits of vertex_output will do.
-       */
-      slot = 0;
-   }
-
-   return vertex * (prog_data->vue_map.num_slots + 1) + slot;
-}
-
-} /* namespace brw */
diff --git a/src/intel/compiler/gfx6_gs_visitor.h b/src/intel/compiler/gfx6_gs_visitor.h
deleted file mode 100644
index 61832a0cb6b..00000000000
--- a/src/intel/compiler/gfx6_gs_visitor.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef GFX6_GS_VISITOR_H
-#define GFX6_GS_VISITOR_H
-
-#include "brw_vec4.h"
-#include "brw_vec4_gs_visitor.h"
-
-#ifdef __cplusplus
-
-namespace brw {
-
-class gfx6_gs_visitor : public vec4_gs_visitor
-{
-public:
-   gfx6_gs_visitor(const struct brw_compiler *comp,
-                   const struct brw_compile_params *params,
-                   struct brw_gs_compile *c,
-                   struct brw_gs_prog_data *prog_data,
-                   const nir_shader *shader,
-                   bool no_spills,
-                   bool debug_enabled) :
-      vec4_gs_visitor(comp, params, c, prog_data, shader, no_spills, debug_enabled)
-      {
-      }
-
-protected:
-   virtual void emit_prolog();
-   virtual void emit_thread_end();
-   virtual void gs_emit_vertex(int stream_id);
-   virtual void gs_end_primitive();
-   virtual void emit_urb_write_header(int mrf);
-   virtual void setup_payload();
-
-private:
-   void xfb_write();
-   void xfb_program(unsigned vertex, unsigned num_verts);
-   int get_vertex_output_offset_for_varying(int vertex, int varying);
-   void emit_snb_gs_urb_write_opcode(bool complete,
-                                     int base_mrf,
-                                     int last_mrf,
-                                     int urb_offset);
-
-   src_reg vertex_output;
-   src_reg vertex_output_offset;
-   src_reg temp;
-   src_reg first_vertex;
-   src_reg prim_count;
-   src_reg primitive_id;
-
-   /* Transform Feedback members */
-   src_reg sol_prim_written;
-   src_reg svbi;
-   src_reg max_svbi;
-   src_reg destination_indices;
-};
-
-} /* namespace brw */
-
-#endif /* __cplusplus */
-
-#endif /* GFX6_GS_VISITOR_H */
diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build
index 700da611dda..d61e98405ee 100644
--- a/src/intel/compiler/meson.build
+++ b/src/intel/compiler/meson.build
@@ -105,7 +105,6 @@ libintel_compiler_brw_files = files(
   'brw_ir_fs.h',
   'brw_ir_performance.h',
   'brw_ir_performance.cpp',
-  'brw_ir_vec4.h',
   'brw_isa_info.h',
   'brw_lower_logical_sends.cpp',
   'brw_mesh.cpp',
@@ -137,33 +136,7 @@ libintel_compiler_brw_files = files(
   'brw_shader.cpp',
   'brw_shader.h',
   'brw_simd_selection.cpp',
-  'brw_vec4_builder.h',
-  'brw_vec4_cmod_propagation.cpp',
-  'brw_vec4_copy_propagation.cpp',
-  'brw_vec4.cpp',
-  'brw_vec4_cse.cpp',
-  'brw_vec4_dead_code_eliminate.cpp',
-  'brw_vec4_generator.cpp',
-  'brw_vec4_gs_visitor.cpp',
-  'brw_vec4_gs_visitor.h',
-  'brw_vec4.h',
-  'brw_vec4_live_variables.cpp',
-  'brw_vec4_live_variables.h',
-  'brw_vec4_nir.cpp',
-  'brw_vec4_gs_nir.cpp',
-  'brw_vec4_reg_allocate.cpp',
-  'brw_vec4_surface_builder.cpp',
-  'brw_vec4_surface_builder.h',
-  'brw_vec4_tcs.cpp',
-  'brw_vec4_tcs.h',
-  'brw_vec4_tes.cpp',
-  'brw_vec4_tes.h',
-  'brw_vec4_visitor.cpp',
-  'brw_vec4_vs_visitor.cpp',
-  'brw_vec4_vs.h',
   'brw_vue_map.c',
-  'gfx6_gs_visitor.cpp',
-  'gfx6_gs_visitor.h',
 )
 
 brw_device_sha1_gen_src = custom_target('brw_device_sha1_gen.c',
@@ -236,10 +209,6 @@ if with_tests
         'test_fs_saturate_propagation.cpp',
         'test_fs_scoreboard.cpp',
         'test_simd_selection.cpp',
-        'test_vec4_cmod_propagation.cpp',
-        'test_vec4_copy_propagation.cpp',
-        'test_vec4_dead_code_eliminate.cpp',
-        'test_vec4_register_coalesce.cpp',
         'test_vf_float_conversions.cpp',
       ),
       ir_expression_operation_h,
diff --git a/src/intel/compiler/test_vec4_cmod_propagation.cpp b/src/intel/compiler/test_vec4_cmod_propagation.cpp
deleted file mode 100644
index 73de39d10fe..00000000000
--- a/src/intel/compiler/test_vec4_cmod_propagation.cpp
+++ /dev/null
@@ -1,1056 +0,0 @@
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Based on test_fs_cmod_propagation.cpp
- */
-
-#include <gtest/gtest.h>
-#include "brw_vec4.h"
-#include "brw_vec4_builder.h"
-#include "brw_cfg.h"
-
-using namespace brw;
-
-class cmod_propagation_vec4_test : public ::testing::Test {
-   virtual void SetUp();
-   virtual void TearDown();
-
-public:
-   struct brw_compiler *compiler;
-   struct brw_compile_params params;
-   struct intel_device_info *devinfo;
-   void *ctx;
-   struct gl_shader_program *shader_prog;
-   struct brw_vue_prog_data *prog_data;
-   vec4_visitor *v;
-};
-
-class cmod_propagation_vec4_visitor : public vec4_visitor
-{
-public:
-   cmod_propagation_vec4_visitor(struct brw_compiler *compiler,
-                                 struct brw_compile_params *params,
-                                 nir_shader *shader,
-                                 struct brw_vue_prog_data *prog_data)
-      : vec4_visitor(compiler, params, NULL, prog_data, shader,
-                     false, false)
-      {
-         prog_data->dispatch_mode = INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT;
-      }
-
-protected:
-   /* Dummy implementation for pure virtual methods */
-   virtual dst_reg *make_reg_for_system_value(int /* location */)
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void setup_payload()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_prolog()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_program_code()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_thread_end()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_urb_write_header(int /* mrf */)
-   {
-      unreachable("Not reached");
-   }
-
-   virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */)
-   {
-      unreachable("Not reached");
-   }
-};
-
-
-void cmod_propagation_vec4_test::SetUp()
-{
-   ctx = ralloc_context(NULL);
-   compiler = rzalloc(ctx, struct brw_compiler);
-   devinfo = rzalloc(ctx, struct intel_device_info);
-   compiler->devinfo = devinfo;
-
-   params = {};
-   params.mem_ctx = ctx;
-
-   prog_data = ralloc(ctx, struct brw_vue_prog_data);
-   nir_shader *shader =
-      nir_shader_create(ctx, MESA_SHADER_VERTEX, NULL, NULL);
-
-   v = new cmod_propagation_vec4_visitor(compiler, &params, shader, prog_data);
-
-   devinfo->ver = 7;
-   devinfo->verx10 = devinfo->ver * 10;
-}
-
-void cmod_propagation_vec4_test::TearDown()
-{
-   delete v;
-   v = NULL;
-
-   ralloc_free(ctx);
-   ctx = NULL;
-}
-
-static vec4_instruction *
-instruction(bblock_t *block, int num)
-{
-   vec4_instruction *inst = (vec4_instruction *)block->start();
-   for (int i = 0; i < num; i++) {
-      inst = (vec4_instruction *)inst->next;
-   }
-   return inst;
-}
-
-static bool
-cmod_propagation(vec4_visitor *v)
-{
-   const bool print = getenv("TEST_DEBUG");
-
-   if (print) {
-      fprintf(stderr, "= Before =\n");
-      v->dump_instructions();
-   }
-
-   bool ret = v->opt_cmod_propagation();
-
-   if (print) {
-      fprintf(stderr, "\n= After =\n");
-      v->dump_instructions();
-   }
-
-   return ret;
-}
-
-TEST_F(cmod_propagation_vec4_test, basic)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_float_type());
-   src_reg src0 = src_reg(v, glsl_float_type());
-   src_reg src1 = src_reg(v, glsl_float_type());
-   src_reg zero(brw_imm_f(0.0f));
-   dst_reg dest_null = bld.null_reg_f();
-   dest_null.writemask = WRITEMASK_X;
-
-   bld.ADD(dest, src0, src1);
-   bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_GE);
-
-   /* = Before =
-    *
-    * 0: add        dest.x  src0.xxxx  src1.xxxx
-    * 1: cmp.ge.f0  null.x  dest.xxxx  0.0f
-    *
-    * = After =
-    * 0: add.ge.f0  dest.x  src0.xxxx  src1.xxxx
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_TRUE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(0, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, basic_different_dst_writemask)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_float_type());
-   src_reg src0 = src_reg(v, glsl_float_type());
-   src_reg src1 = src_reg(v, glsl_float_type());
-   src_reg zero(brw_imm_f(0.0f));
-   dst_reg dest_null = bld.null_reg_f();
-
-   bld.ADD(dest, src0, src1);
-   bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_GE);
-
-   /* = Before =
-    *
-    * 0: add        dest.x     src0  src1
-    * 1: cmp.ge.f0  null.xyzw  dest  0.0f
-    *
-    * = After =
-    * (no changes)
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_FALSE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(1, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, andz_one)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_int_type());
-   src_reg src0 = src_reg(v, glsl_float_type());
-   src_reg zero(brw_imm_f(0.0f));
-   src_reg one(brw_imm_d(1));
-
-   bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
-   set_condmod(BRW_CONDITIONAL_Z,
-               bld.AND(bld.null_reg_d(), src_reg(dest), one));
-
-   /* = Before =
-    * 0: cmp.l.f0     dest:F  src0:F  0F
-    * 1: and.z.f0     null:D  dest:D  1D
-    *
-    * = After =
-    * (no changes)
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_FALSE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(1, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
-   EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_EQ, instruction(block0, 1)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, non_cmod_instruction)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_uint_type());
-   src_reg src0 = src_reg(v, glsl_uint_type());
-   src_reg zero(brw_imm_ud(0u));
-   bld.FBL(dest, src0);
-   bld.CMP(bld.null_reg_ud(), src_reg(dest), zero, BRW_CONDITIONAL_GE);
-
-   /* = Before =
-    *
-    * 0: fbl        dest  src0
-    * 1: cmp.ge.f0  null  dest  0u
-    *
-    * = After =
-    * (no changes)
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_FALSE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(1, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_FBL, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, intervening_flag_write)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_float_type());
-   src_reg src0 = src_reg(v, glsl_float_type());
-   src_reg src1 = src_reg(v, glsl_float_type());
-   src_reg src2 = src_reg(v, glsl_float_type());
-   src_reg zero(brw_imm_f(0.0f));
-   bld.ADD(dest, src0, src1);
-   bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE);
-   bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_GE);
-
-   /* = Before =
-    *
-    * 0: add        dest  src0  src1
-    * 1: cmp.ge.f0  null  src2  0.0f
-    * 2: cmp.ge.f0  null  dest  0.0f
-    *
-    * = After =
-    * (no changes)
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(2, block0->end_ip);
-
-   EXPECT_FALSE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, intervening_flag_read)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest0 = dst_reg(v, glsl_float_type());
-   dst_reg dest1 = dst_reg(v, glsl_float_type());
-   src_reg src0 = src_reg(v, glsl_float_type());
-   src_reg src1 = src_reg(v, glsl_float_type());
-   src_reg src2 = src_reg(v, glsl_float_type());
-   src_reg zero(brw_imm_f(0.0f));
-   bld.ADD(dest0, src0, src1);
-   set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
-   bld.CMP(bld.null_reg_f(), src_reg(dest0), zero, BRW_CONDITIONAL_GE);
-
-   /* = Before =
-    *
-    * 0: add        dest0 src0  src1
-    * 1: (+f0) sel  dest1 src2  0.0f
-    * 2: cmp.ge.f0  null  dest0 0.0f
-    *
-    * = After =
-    * (no changes)
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(2, block0->end_ip);
-
-   EXPECT_FALSE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
-   EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, intervening_dest_write)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_vec4_type());
-   src_reg src0 = src_reg(v, glsl_float_type());
-   src_reg src1 = src_reg(v, glsl_float_type());
-   src_reg src2 = src_reg(v, glsl_vec2_type());
-   src_reg zero(brw_imm_f(0.0f));
-   bld.ADD(offset(dest, 8, 2), src0, src1);
-   bld.emit(SHADER_OPCODE_TEX, dest, src2)
-      ->size_written = 4 * REG_SIZE;
-   bld.CMP(bld.null_reg_f(), offset(src_reg(dest), 8, 2), zero, BRW_CONDITIONAL_GE);
-
-   /* = Before =
-    *
-    * 0: add        dest+2  src0    src1
-    * 1: tex rlen 4 dest+0  src2
-    * 2: cmp.ge.f0  null    dest+2  0.0f
-    *
-    * = After =
-    * (no changes)
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(2, block0->end_ip);
-
-   EXPECT_FALSE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
-   EXPECT_EQ(SHADER_OPCODE_TEX, instruction(block0, 1)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, intervening_flag_read_same_value)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest0 = dst_reg(v, glsl_float_type());
-   dst_reg dest1 = dst_reg(v, glsl_float_type());
-   src_reg src0 = src_reg(v, glsl_float_type());
-   src_reg src1 = src_reg(v, glsl_float_type());
-   src_reg src2 = src_reg(v, glsl_float_type());
-   src_reg zero(brw_imm_f(0.0f));
-   dst_reg dest_null = bld.null_reg_f();
-   dest_null.writemask = WRITEMASK_X;
-
-   set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1));
-   set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
-   bld.CMP(dest_null, src_reg(dest0), zero, BRW_CONDITIONAL_GE);
-
-   /* = Before =
-    *
-    * 0: add.ge.f0  dest0   src0  src1
-    * 1: (+f0) sel  dest1   src2  0.0f
-    * 2: cmp.ge.f0  null.x  dest0 0.0f
-    *
-    * = After =
-    * 0: add.ge.f0  dest0 src0  src1
-    * 1: (+f0) sel  dest1 src2  0.0f
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(2, block0->end_ip);
-
-   EXPECT_TRUE(cmod_propagation(v));
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(1, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
-   EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
-   EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
-}
-
-TEST_F(cmod_propagation_vec4_test, negate)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_float_type());
-   src_reg src0 = src_reg(v, glsl_float_type());
-   src_reg src1 = src_reg(v, glsl_float_type());
-   src_reg zero(brw_imm_f(0.0f));
-   bld.ADD(dest, src0, src1);
-   src_reg tmp_src = src_reg(dest);
-   tmp_src.negate = true;
-   dst_reg dest_null = bld.null_reg_f();
-   dest_null.writemask = WRITEMASK_X;
-   bld.CMP(dest_null, tmp_src, zero, BRW_CONDITIONAL_GE);
-
-   /* = Before =
-    *
-    * 0: add        dest     src0  src1
-    * 1: cmp.ge.f0  null.x  -dest 0.0f
-    *
-    * = After =
-    * 0: add.le.f0  dest     src0  src1
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_TRUE(cmod_propagation(v));
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(0, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_LE, instruction(block0, 0)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, movnz)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_float_type());
-   src_reg src0 = src_reg(v, glsl_float_type());
-   src_reg src1 = src_reg(v, glsl_float_type());
-   dst_reg dest_null = bld.null_reg_f();
-   dest_null.writemask = WRITEMASK_X;
-
-   bld.CMP(dest, src0, src1, BRW_CONDITIONAL_L);
-   set_condmod(BRW_CONDITIONAL_NZ,
-               bld.MOV(dest_null, src_reg(dest)));
-
-   /* = Before =
-    *
-    * 0: cmp.l.f0  dest:F  src0:F  src1:F
-    * 1: mov.nz.f0 null.x  dest:F
-    *
-    * = After =
-    * 0: cmp.l.f0  dest  src0:F  src1:F
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_TRUE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(0, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, different_types_cmod_with_zero)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_int_type());
-   src_reg src0 = src_reg(v, glsl_int_type());
-   src_reg src1 = src_reg(v, glsl_int_type());
-   src_reg zero(brw_imm_f(0.0f));
-   bld.ADD(dest, src0, src1);
-   bld.CMP(bld.null_reg_f(), retype(src_reg(dest), BRW_REGISTER_TYPE_F), zero,
-           BRW_CONDITIONAL_GE);
-
-   /* = Before =
-    *
-    * 0: add        dest:D  src0:D  src1:D
-    * 1: cmp.ge.f0  null:F  dest:F  0.0f
-    *
-    * = After =
-    * (no changes)
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_FALSE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(1, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, andnz_non_one)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_int_type());
-   src_reg src0 = src_reg(v, glsl_float_type());
-   src_reg zero(brw_imm_f(0.0f));
-   src_reg nonone(brw_imm_d(38));
-
-   bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
-   set_condmod(BRW_CONDITIONAL_NZ,
-               bld.AND(bld.null_reg_d(), src_reg(dest), nonone));
-
-   /* = Before =
-    * 0: cmp.l.f0     dest:F  src0:F  0F
-    * 1: and.nz.f0    null:D  dest:D  38D
-    *
-    * = After =
-    * (no changes)
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_FALSE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(1, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
-   EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod);
-}
-
-/* Note that basic is using glsl_type:float types, while this one is using
- * glsl_type::vec4 */
-TEST_F(cmod_propagation_vec4_test, basic_vec4)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_vec4_type());
-   src_reg src0 = src_reg(v, glsl_vec4_type());
-   src_reg src1 = src_reg(v, glsl_vec4_type());
-   src_reg zero(brw_imm_f(0.0f));
-
-   bld.MUL(dest, src0, src1);
-   bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_NZ);
-
-   /* = Before =
-    * 0: mul         dest.xyzw  src0.xyzw  src1.xyzw
-    * 1: cmp.nz.f0.0 null.xyzw  dest.xyzw  0.0f
-    *
-    * = After =
-    * 0: mul.nz.f0.0 dest.xyzw  src0.xyzw  src1.xyzw
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_TRUE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(0, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, basic_vec4_different_dst_writemask)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_vec4_type());
-   dest.writemask = WRITEMASK_X;
-   src_reg src0 = src_reg(v, glsl_vec4_type());
-   src_reg src1 = src_reg(v, glsl_vec4_type());
-   src_reg zero(brw_imm_f(0.0f));
-   dst_reg dest_null = bld.null_reg_f();
-
-   bld.MUL(dest, src0, src1);
-   bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_NZ);
-
-   /* = Before =
-    * 0: mul         dest.x  src0  src1
-    * 1: cmp.nz.f0.0 null    dest  0.0f
-    *
-    * = After =
-    * (no changes)
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_FALSE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(1, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, mad_one_component_vec4)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_vec4_type());
-   dest.writemask = WRITEMASK_X;
-   src_reg src0 = src_reg(v, glsl_vec4_type());
-   src_reg src1 = src_reg(v, glsl_vec4_type());
-   src_reg src2 = src_reg(v, glsl_vec4_type());
-   src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX;
-   src2.negate = true;
-   src_reg zero(brw_imm_f(0.0f));
-   src_reg tmp(dest);
-   tmp.swizzle = BRW_SWIZZLE_XXXX;
-   dst_reg dest_null = bld.null_reg_f();
-   dest_null.writemask = WRITEMASK_X;
-
-   bld.MAD(dest, src0, src1, src2);
-   bld.CMP(dest_null, tmp, zero, BRW_CONDITIONAL_L);
-
-   /* = Before =
-    *
-    * 0: mad         dest.x:F  src0.xxxx:F  src10.xxxx:F  -src2.xxxx:F
-    * 1: cmp.l.f0.0  null.x:F  dest.xxxx:F  0.0f
-    *
-    * = After =
-    * 0: mad.l.f0    dest.x:F  src0.xxxx:F  src10.xxxx:F  -src2.xxxx:F
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_TRUE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(0, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_MAD, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, mad_more_one_component_vec4)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_vec4_type());
-   dest.writemask = WRITEMASK_XW;
-   src_reg src0 = src_reg(v, glsl_vec4_type());
-   src_reg src1 = src_reg(v, glsl_vec4_type());
-   src_reg src2 = src_reg(v, glsl_vec4_type());
-   src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX;
-   src2.negate = true;
-   src_reg zero(brw_imm_f(0.0f));
-   src_reg tmp(dest);
-   tmp.swizzle = BRW_SWIZZLE_XXXX;
-   dst_reg dest_null = bld.null_reg_f();
-
-   bld.MAD(dest, src0, src1, src2);
-   bld.CMP(dest_null, tmp, zero, BRW_CONDITIONAL_L);
-
-   /* = Before =
-    *
-    * 0: mad         dest.xw:F  src0.xxxx:F  src10.xxxx:F  -src2.xxxx:F
-    * 1: cmp.l.f0.0  null:F  dest.xxxx:F  zeroF
-    *
-    * = After =
-    * (No changes)
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_FALSE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(1, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_MAD, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 1)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, cmp_mov_vec4)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_ivec4_type());
-   dest.writemask = WRITEMASK_X;
-   src_reg src0 = src_reg(v, glsl_ivec4_type());
-   src0.swizzle = BRW_SWIZZLE_XXXX;
-   src0.file = UNIFORM;
-   src_reg nonone = retype(brw_imm_d(16), BRW_REGISTER_TYPE_D);
-   src_reg mov_src = src_reg(dest);
-   mov_src.swizzle = BRW_SWIZZLE_XXXX;
-   dst_reg dest_null = bld.null_reg_d();
-   dest_null.writemask = WRITEMASK_X;
-
-   bld.CMP(dest, src0, nonone, BRW_CONDITIONAL_GE);
-   set_condmod(BRW_CONDITIONAL_NZ,
-               bld.MOV(dest_null, mov_src));
-
-   /* = Before =
-    *
-    * 0: cmp.ge.f0  dest.x:D  u.xxxx:D  16D
-    * 1: mov.nz.f0  null.x:D  dest.xxxx:D
-    *
-    * = After =
-    * 0: cmp.ge.f0  dest.x:D  u.xxxx:D  16D
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_TRUE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(0, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, mul_cmp_different_channels_vec4)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_vec4_type());
-   src_reg src0 = src_reg(v, glsl_vec4_type());
-   src_reg src1 = src_reg(v, glsl_vec4_type());
-   src_reg zero(brw_imm_f(0.0f));
-   src_reg cmp_src = src_reg(dest);
-   cmp_src.swizzle = BRW_SWIZZLE4(0,1,3,2);
-
-   bld.MUL(dest, src0, src1);
-   bld.CMP(bld.null_reg_f(), cmp_src, zero, BRW_CONDITIONAL_NZ);
-
-   /* = Before =
-    * 0: mul         dest  src0       src1
-    * 1: cmp.nz.f0.0 null  dest.xywz  0.0f
-    *
-    * = After =
-    * (No changes)
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_FALSE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(1, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, add_cmp_same_dst_writemask)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_vec4_type());
-   src_reg src0 = src_reg(v, glsl_vec4_type());
-   src_reg src1 = src_reg(v, glsl_vec4_type());
-   dst_reg dest_null = bld.null_reg_f();
-
-   bld.ADD(dest, src0, src1);
-   vec4_instruction *inst = bld.CMP(dest_null, src0, src1, BRW_CONDITIONAL_GE);
-   inst->src[1].negate = true;
-
-   /* = Before =
-    *
-    * 0: add        dest.xyzw  src0  src1
-    * 1: cmp.ge.f0  null.xyzw  src0  -src1
-    *
-    * = After =
-    * 0: add.ge.f0  dest.xyzw  src0  src1
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_TRUE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(0, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, add_cmp_different_dst_writemask)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_float_type());
-   src_reg src0 = src_reg(v, glsl_vec4_type());
-   src_reg src1 = src_reg(v, glsl_vec4_type());
-   dst_reg dest_null = bld.null_reg_f();
-
-   bld.ADD(dest, src0, src1);
-   vec4_instruction *inst = bld.CMP(dest_null, src0, src1, BRW_CONDITIONAL_GE);
-   inst->src[1].negate = true;
-
-   /* = Before =
-    *
-    * 0: add        dest.x     src0  src1
-    * 1: cmp.ge.f0  null.xyzw  src0  -src1
-    *
-    * = After =
-    * (no changes)
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_FALSE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(1, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, prop_across_sel_gfx7)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest1 = dst_reg(v, glsl_float_type());
-   dst_reg dest2 = dst_reg(v, glsl_float_type());
-   src_reg src0 = src_reg(v, glsl_float_type());
-   src_reg src1 = src_reg(v, glsl_float_type());
-   src_reg src2 = src_reg(v, glsl_float_type());
-   src_reg src3 = src_reg(v, glsl_float_type());
-   src_reg zero(brw_imm_f(0.0f));
-   dst_reg dest_null = bld.null_reg_f();
-   dest_null.writemask = WRITEMASK_X;
-
-   bld.ADD(dest1, src0, src1);
-   bld.SEL(dest2, src2, src3)
-      ->conditional_mod = BRW_CONDITIONAL_GE;
-   bld.CMP(dest_null, src_reg(dest1), zero, BRW_CONDITIONAL_GE);
-
-   /* = Before =
-    *
-    * 0: add        dest1.x src0.xxxx  src1.xxxx
-    * 1: sel.ge.f0  dest2.x src2.xxxx  src3.xxxx
-    * 2: cmp.ge.f0  null.x  dest.xxxx  0.0f
-    *
-    * = After =
-    * 0: add.ge.f0  dest.x  src0.xxxx  src1.xxxx
-    * 1: sel.ge.f0  dest2.x src2.xxxx  src3.xxxx
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(2, block0->end_ip);
-
-   EXPECT_TRUE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(1, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
-   EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, prop_across_sel_gfx5)
-{
-   devinfo->ver = 5;
-   devinfo->verx10 = devinfo->ver * 10;
-
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest1 = dst_reg(v, glsl_float_type());
-   dst_reg dest2 = dst_reg(v, glsl_float_type());
-   src_reg src0 = src_reg(v, glsl_float_type());
-   src_reg src1 = src_reg(v, glsl_float_type());
-   src_reg src2 = src_reg(v, glsl_float_type());
-   src_reg src3 = src_reg(v, glsl_float_type());
-   src_reg zero(brw_imm_f(0.0f));
-   dst_reg dest_null = bld.null_reg_f();
-   dest_null.writemask = WRITEMASK_X;
-
-   bld.ADD(dest1, src0, src1);
-   bld.SEL(dest2, src2, src3)
-      ->conditional_mod = BRW_CONDITIONAL_GE;
-   bld.CMP(dest_null, src_reg(dest1), zero, BRW_CONDITIONAL_GE);
-
-   /* = Before =
-    *
-    * 0: add        dest1.x src0.xxxx  src1.xxxx
-    * 1: sel.ge.f0  dest2.x src2.xxxx  src3.xxxx
-    * 2: cmp.ge.f0  null.x  dest.xxxx  0.0f
-    *
-    * = After =
-    * (no changes)
-    *
-    * On Gfx4 and Gfx5, sel.l (for min) and sel.ge (for max) are implemented
-    * using a separate cmpn and sel instruction.  This lowering occurs in
-    * fs_vistor::lower_minmax which is called a long time after the first
-    * calls to cmod_propagation.
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(2, block0->end_ip);
-
-   EXPECT_FALSE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
-   EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
-}
-
-TEST_F(cmod_propagation_vec4_test, prop_into_sel_gfx5)
-{
-   devinfo->ver = 5;
-   devinfo->verx10 = devinfo->ver * 10;
-
-   const vec4_builder bld = vec4_builder(v).at_end();
-   dst_reg dest = dst_reg(v, glsl_float_type());
-   src_reg src0 = src_reg(v, glsl_float_type());
-   src_reg src1 = src_reg(v, glsl_float_type());
-   src_reg zero(brw_imm_f(0.0f));
-   dst_reg dest_null = bld.null_reg_f();
-   dest_null.writemask = WRITEMASK_X;
-
-   bld.SEL(dest, src0, src1)
-      ->conditional_mod = BRW_CONDITIONAL_GE;
-   bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_GE);
-
-   /* = Before =
-    *
-    * 0: sel.ge.f0  dest.x  src2.xxxx  src3.xxxx
-    * 1: cmp.ge.f0  null.x  dest.xxxx  0.0f
-    *
-    * = After =
-    * (no changes)
-    *
-    * Do not copy propagate into a sel.cond instruction.  While it does modify
-    * the flags, the flags are not based on the result compared with zero (as
-    * with most other instructions).  The result is based on the sources
-    * compared with each other (like cmp.cond).
-    */
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-
-   EXPECT_EQ(0, block0->start_ip);
-   EXPECT_EQ(1, block0->end_ip);
-
-   EXPECT_FALSE(cmod_propagation(v));
-
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(1, block0->end_ip);
-   EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 0)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
-   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
-   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
-}
diff --git a/src/intel/compiler/test_vec4_copy_propagation.cpp b/src/intel/compiler/test_vec4_copy_propagation.cpp
deleted file mode 100644
index 7690458b928..00000000000
--- a/src/intel/compiler/test_vec4_copy_propagation.cpp
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <gtest/gtest.h>
-#include "brw_vec4.h"
-
-using namespace brw;
-
-class copy_propagation_vec4_test : public ::testing::Test {
-   virtual void SetUp();
-   virtual void TearDown();
-
-public:
-   struct brw_compiler *compiler;
-   struct brw_compile_params params;
-   struct intel_device_info *devinfo;
-   void *ctx;
-   struct gl_shader_program *shader_prog;
-   struct brw_vue_prog_data *prog_data;
-   vec4_visitor *v;
-};
-
-class copy_propagation_vec4_visitor : public vec4_visitor
-{
-public:
-   copy_propagation_vec4_visitor(struct brw_compiler *compiler,
-                                 struct brw_compile_params *params,
-                                 nir_shader *shader,
-                                 struct brw_vue_prog_data *prog_data)
-      : vec4_visitor(compiler, params, NULL, prog_data, shader,
-                     false /* no_spills */, false)
-   {
-      prog_data->dispatch_mode = INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT;
-   }
-
-protected:
-   virtual dst_reg *make_reg_for_system_value(int /* location */)
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void setup_payload()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_prolog()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_thread_end()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_urb_write_header(int /* mrf */)
-   {
-      unreachable("Not reached");
-   }
-
-   virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */)
-   {
-      unreachable("Not reached");
-   }
-};
-
-
-void copy_propagation_vec4_test::SetUp()
-{
-   ctx = ralloc_context(NULL);
-   compiler = rzalloc(ctx, struct brw_compiler);
-   devinfo = rzalloc(ctx, struct intel_device_info);
-   compiler->devinfo = devinfo;
-
-   params = {};
-   params.mem_ctx = ctx;
-
-   prog_data = ralloc(ctx, struct brw_vue_prog_data);
-   nir_shader *shader =
-      nir_shader_create(ctx, MESA_SHADER_VERTEX, NULL, NULL);
-
-   v = new copy_propagation_vec4_visitor(compiler, &params, shader, prog_data);
-
-   devinfo->ver = 4;
-   devinfo->verx10 = devinfo->ver * 10;
-}
-
-void copy_propagation_vec4_test::TearDown()
-{
-   delete v;
-   v = NULL;
-
-   ralloc_free(ctx);
-   ctx = NULL;
-}
-
-
-static void
-copy_propagation(vec4_visitor *v)
-{
-   const bool print = getenv("TEST_DEBUG");
-
-   if (print) {
-      fprintf(stderr, "instructions before:\n");
-      v->dump_instructions();
-   }
-
-   v->calculate_cfg();
-   v->opt_copy_propagation();
-
-   if (print) {
-      fprintf(stderr, "instructions after:\n");
-      v->dump_instructions();
-   }
-}
-
-TEST_F(copy_propagation_vec4_test, test_swizzle_swizzle)
-{
-   dst_reg a = dst_reg(v, glsl_vec4_type());
-   dst_reg b = dst_reg(v, glsl_vec4_type());
-   dst_reg c = dst_reg(v, glsl_vec4_type());
-
-   v->emit(v->ADD(a, src_reg(a), src_reg(a)));
-
-   v->emit(v->MOV(b, swizzle(src_reg(a), BRW_SWIZZLE4(BRW_SWIZZLE_Y,
-                                                      BRW_SWIZZLE_Z,
-                                                      BRW_SWIZZLE_W,
-                                                      BRW_SWIZZLE_X))));
-
-   vec4_instruction *test_mov =
-      v->MOV(c, swizzle(src_reg(b), BRW_SWIZZLE4(BRW_SWIZZLE_Y,
-                                                 BRW_SWIZZLE_Z,
-                                                 BRW_SWIZZLE_W,
-                                                 BRW_SWIZZLE_X)));
-   v->emit(test_mov);
-
-   copy_propagation(v);
-
-   EXPECT_EQ(test_mov->src[0].nr, a.nr);
-   EXPECT_EQ(test_mov->src[0].swizzle, BRW_SWIZZLE4(BRW_SWIZZLE_Z,
-                                                    BRW_SWIZZLE_W,
-                                                    BRW_SWIZZLE_X,
-                                                    BRW_SWIZZLE_Y));
-}
-
-TEST_F(copy_propagation_vec4_test, test_swizzle_writemask)
-{
-   dst_reg a = dst_reg(v, glsl_vec4_type());
-   dst_reg b = dst_reg(v, glsl_vec4_type());
-   dst_reg c = dst_reg(v, glsl_vec4_type());
-
-   v->emit(v->MOV(b, swizzle(src_reg(a), BRW_SWIZZLE4(BRW_SWIZZLE_X,
-                                                      BRW_SWIZZLE_Y,
-                                                      BRW_SWIZZLE_X,
-                                                      BRW_SWIZZLE_Z))));
-
-   v->emit(v->MOV(writemask(a, WRITEMASK_XYZ), brw_imm_f(1.0f)));
-
-   vec4_instruction *test_mov =
-      v->MOV(c, swizzle(src_reg(b), BRW_SWIZZLE4(BRW_SWIZZLE_W,
-                                                 BRW_SWIZZLE_W,
-                                                 BRW_SWIZZLE_W,
-                                                 BRW_SWIZZLE_W)));
-   v->emit(test_mov);
-
-   copy_propagation(v);
-
-   /* should not copy propagate */
-   EXPECT_EQ(test_mov->src[0].nr, b.nr);
-   EXPECT_EQ(test_mov->src[0].swizzle, BRW_SWIZZLE4(BRW_SWIZZLE_W,
-                                                    BRW_SWIZZLE_W,
-                                                    BRW_SWIZZLE_W,
-                                                    BRW_SWIZZLE_W));
-}
diff --git a/src/intel/compiler/test_vec4_dead_code_eliminate.cpp b/src/intel/compiler/test_vec4_dead_code_eliminate.cpp
deleted file mode 100644
index c3a07c1735b..00000000000
--- a/src/intel/compiler/test_vec4_dead_code_eliminate.cpp
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright © 2018 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <gtest/gtest.h>
-#include "brw_vec4.h"
-
-using namespace brw;
-
-class dead_code_eliminate_vec4_test : public ::testing::Test {
-   virtual void SetUp();
-   virtual void TearDown();
-
-public:
-   struct brw_compiler *compiler;
-   struct brw_compile_params params;
-   struct intel_device_info *devinfo;
-   void *ctx;
-   struct gl_shader_program *shader_prog;
-   struct brw_vue_prog_data *prog_data;
-   vec4_visitor *v;
-};
-
-class dead_code_eliminate_vec4_visitor : public vec4_visitor
-{
-public:
-   dead_code_eliminate_vec4_visitor(struct brw_compiler *compiler,
-                                    struct brw_compile_params *params,
-                                 nir_shader *shader,
-                                 struct brw_vue_prog_data *prog_data)
-      : vec4_visitor(compiler, params, NULL, prog_data, shader,
-                     false /* no_spills */, false)
-   {
-      prog_data->dispatch_mode = INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT;
-   }
-
-protected:
-   virtual dst_reg *make_reg_for_system_value(int /* location */)
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void setup_payload()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_prolog()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_thread_end()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_urb_write_header(int /* mrf */)
-   {
-      unreachable("Not reached");
-   }
-
-   virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */)
-   {
-      unreachable("Not reached");
-   }
-};
-
-
-void dead_code_eliminate_vec4_test::SetUp()
-{
-   ctx = ralloc_context(NULL);
-   compiler = rzalloc(ctx, struct brw_compiler);
-   devinfo = rzalloc(ctx, struct intel_device_info);
-   compiler->devinfo = devinfo;
-
-   params = {};
-   params.mem_ctx = ctx;
-
-   prog_data = ralloc(ctx, struct brw_vue_prog_data);
-   nir_shader *shader =
-      nir_shader_create(ctx, MESA_SHADER_VERTEX, NULL, NULL);
-
-  v = new dead_code_eliminate_vec4_visitor(compiler, &params, shader, prog_data);
-
-   devinfo->ver = 4;
-   devinfo->verx10 = devinfo->ver * 10;
-}
-
-void dead_code_eliminate_vec4_test::TearDown()
-{
-   delete v;
-   v = NULL;
-
-   ralloc_free(ctx);
-   ctx = NULL;
-}
-
-static void
-dead_code_eliminate(vec4_visitor *v)
-{
-   const bool print = getenv("TEST_DEBUG");
-
-   if (print) {
-      fprintf(stderr, "instructions before:\n");
-      v->dump_instructions();
-   }
-
-   v->calculate_cfg();
-   v->dead_code_eliminate();
-
-   if (print) {
-      fprintf(stderr, "instructions after:\n");
-      v->dump_instructions();
-   }
-}
-
-TEST_F(dead_code_eliminate_vec4_test, some_dead_channels_all_flags_used)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   src_reg r1 = src_reg(v, glsl_vec4_type());
-   src_reg r2 = src_reg(v, glsl_vec4_type());
-   src_reg r3 = src_reg(v, glsl_vec4_type());
-   src_reg r4 = src_reg(v, glsl_vec4_type());
-   src_reg r5 = src_reg(v, glsl_vec4_type());
-   src_reg r6 = src_reg(v, glsl_vec4_type());
-
-   /* Sequence like the following should not be modified by DCE.
-    *
-    *     cmp.l.f0(8)     g4<1>F         g2<4,4,1>.wF   g1<4,4,1>.xF
-    *     mov(8)          g5<1>.xF       g4<4,4,1>.xF
-    *     (+f0.x) sel(8)  g6<1>UD        g3<4>UD        g6<4>UD
-    */
-   vec4_instruction *test_cmp =
-      bld.CMP(dst_reg(r4), r2, r1, BRW_CONDITIONAL_L);
-
-   test_cmp->src[0].swizzle = BRW_SWIZZLE_WWWW;
-   test_cmp->src[1].swizzle = BRW_SWIZZLE_XXXX;
-
-   vec4_instruction *test_mov =
-      bld.MOV(dst_reg(r5), r4);
-
-   test_mov->dst.writemask = WRITEMASK_X;
-   test_mov->src[0].swizzle = BRW_SWIZZLE_XXXX;
-
-   vec4_instruction *test_sel =
-      bld.SEL(dst_reg(r6), r3, r6);
-
-   set_predicate(BRW_PREDICATE_NORMAL, test_sel);
-
-   /* The scratch write is here just to make r5 and r6 be live so that the
-    * whole program doesn't get eliminated by DCE.
-    */
-   v->emit(v->SCRATCH_WRITE(dst_reg(r4), r6, r5));
-
-   dead_code_eliminate(v);
-
-   EXPECT_EQ(test_cmp->dst.writemask, WRITEMASK_XYZW);
-}
diff --git a/src/intel/compiler/test_vec4_register_coalesce.cpp b/src/intel/compiler/test_vec4_register_coalesce.cpp
deleted file mode 100644
index 13d01c450d4..00000000000
--- a/src/intel/compiler/test_vec4_register_coalesce.cpp
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <gtest/gtest.h>
-#include "brw_vec4.h"
-
-using namespace brw;
-
-#define register_coalesce(v) _register_coalesce(v, __func__)
-
-class register_coalesce_vec4_test : public ::testing::Test {
-   virtual void SetUp();
-   virtual void TearDown();
-
-public:
-   struct brw_compiler *compiler;
-   struct brw_compile_params params;
-   struct intel_device_info *devinfo;
-   void *ctx;
-   struct gl_shader_program *shader_prog;
-   struct brw_vue_prog_data *prog_data;
-   vec4_visitor *v;
-};
-
-
-class register_coalesce_vec4_visitor : public vec4_visitor
-{
-public:
-   register_coalesce_vec4_visitor(struct brw_compiler *compiler,
-                                  struct brw_compile_params *params,
-                                  nir_shader *shader,
-                                  struct brw_vue_prog_data *prog_data)
-      : vec4_visitor(compiler, params, NULL, prog_data, shader,
-                     false /* no_spills */, false)
-   {
-      prog_data->dispatch_mode = INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT;
-   }
-
-protected:
-   virtual dst_reg *make_reg_for_system_value(int /* location */)
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void setup_payload()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_prolog()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_thread_end()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_urb_write_header(int /* mrf */)
-   {
-      unreachable("Not reached");
-   }
-
-   virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */)
-   {
-      unreachable("Not reached");
-   }
-};
-
-
-void register_coalesce_vec4_test::SetUp()
-{
-   ctx = ralloc_context(NULL);
-   compiler = rzalloc(ctx, struct brw_compiler);
-   devinfo = rzalloc(ctx, struct intel_device_info);
-   compiler->devinfo = devinfo;
-
-   prog_data = ralloc(ctx, struct brw_vue_prog_data);
-
-   params = {};
-   params.mem_ctx = ctx;
-
-   nir_shader *shader =
-      nir_shader_create(ctx, MESA_SHADER_VERTEX, NULL, NULL);
-
-   v = new register_coalesce_vec4_visitor(compiler, &params, shader, prog_data);
-
-   devinfo->ver = 4;
-   devinfo->verx10 = devinfo->ver * 10;
-}
-
-void register_coalesce_vec4_test::TearDown()
-{
-   delete v;
-   v = NULL;
-
-   ralloc_free(ctx);
-   ctx = NULL;
-}
-
-static void
-_register_coalesce(vec4_visitor *v, const char *func)
-{
-   const bool print = getenv("TEST_DEBUG");
-
-   if (print) {
-      printf("%s: instructions before:\n", func);
-      v->dump_instructions();
-   }
-
-   v->calculate_cfg();
-   v->opt_register_coalesce();
-
-   if (print) {
-      printf("%s: instructions after:\n", func);
-      v->dump_instructions();
-   }
-}
-
-TEST_F(register_coalesce_vec4_test, test_compute_to_mrf)
-{
-   src_reg something = src_reg(v, glsl_float_type());
-   dst_reg temp = dst_reg(v, glsl_float_type());
-   dst_reg init;
-
-   dst_reg m0 = dst_reg(MRF, 0);
-   m0.writemask = WRITEMASK_X;
-   m0.type = BRW_REGISTER_TYPE_F;
-
-   vec4_instruction *mul = v->emit(v->MUL(temp, something, brw_imm_f(1.0f)));
-   v->emit(v->MOV(m0, src_reg(temp)));
-
-   register_coalesce(v);
-
-   EXPECT_EQ(mul->dst.file, MRF);
-}
-
-
-TEST_F(register_coalesce_vec4_test, test_multiple_use)
-{
-   src_reg something = src_reg(v, glsl_float_type());
-   dst_reg temp = dst_reg(v, glsl_vec4_type());
-   dst_reg init;
-
-   dst_reg m0 = dst_reg(MRF, 0);
-   m0.writemask = WRITEMASK_X;
-   m0.type = BRW_REGISTER_TYPE_F;
-
-   dst_reg m1 = dst_reg(MRF, 1);
-   m1.writemask = WRITEMASK_XYZW;
-   m1.type = BRW_REGISTER_TYPE_F;
-
-   src_reg src = src_reg(temp);
-   vec4_instruction *mul = v->emit(v->MUL(temp, something, brw_imm_f(1.0f)));
-   src.swizzle = BRW_SWIZZLE_XXXX;
-   v->emit(v->MOV(m0, src));
-   src.swizzle = BRW_SWIZZLE_XYZW;
-   v->emit(v->MOV(m1, src));
-
-   register_coalesce(v);
-
-   EXPECT_NE(mul->dst.file, MRF);
-}
-
-TEST_F(register_coalesce_vec4_test, test_dp4_mrf)
-{
-   src_reg some_src_1 = src_reg(v, glsl_vec4_type());
-   src_reg some_src_2 = src_reg(v, glsl_vec4_type());
-   dst_reg init;
-
-   dst_reg m0 = dst_reg(MRF, 0);
-   m0.writemask = WRITEMASK_Y;
-   m0.type = BRW_REGISTER_TYPE_F;
-
-   dst_reg temp = dst_reg(v, glsl_float_type());
-
-   vec4_instruction *dp4 = v->emit(v->DP4(temp, some_src_1, some_src_2));
-   v->emit(v->MOV(m0, src_reg(temp)));
-
-   register_coalesce(v);
-
-   EXPECT_EQ(dp4->dst.file, MRF);
-   EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y);
-}
-
-TEST_F(register_coalesce_vec4_test, test_dp4_grf)
-{
-   src_reg some_src_1 = src_reg(v, glsl_vec4_type());
-   src_reg some_src_2 = src_reg(v, glsl_vec4_type());
-   dst_reg init;
-
-   dst_reg to = dst_reg(v, glsl_vec4_type());
-   dst_reg temp = dst_reg(v, glsl_float_type());
-
-   vec4_instruction *dp4 = v->emit(v->DP4(temp, some_src_1, some_src_2));
-   to.writemask = WRITEMASK_Y;
-   v->emit(v->MOV(to, src_reg(temp)));
-
-   /* if we don't do something with the result, the automatic dead code
-    * elimination will remove all our instructions.
-    */
-   src_reg src = src_reg(to);
-   src.negate = true;
-   v->emit(v->MOV(dst_reg(MRF, 0), src));
-
-   register_coalesce(v);
-
-   EXPECT_EQ(dp4->dst.nr, to.nr);
-   EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y);
-}
-
-TEST_F(register_coalesce_vec4_test, test_channel_mul_grf)
-{
-   src_reg some_src_1 = src_reg(v, glsl_vec4_type());
-   src_reg some_src_2 = src_reg(v, glsl_vec4_type());
-   dst_reg init;
-
-   dst_reg to = dst_reg(v, glsl_vec4_type());
-   dst_reg temp = dst_reg(v, glsl_float_type());
-
-   vec4_instruction *mul = v->emit(v->MUL(temp, some_src_1, some_src_2));
-   to.writemask = WRITEMASK_Y;
-   v->emit(v->MOV(to, src_reg(temp)));
-
-   /* if we don't do something with the result, the automatic dead code
-    * elimination will remove all our instructions.
-    */
-   src_reg src = src_reg(to);
-   src.negate = true;
-   v->emit(v->MOV(dst_reg(MRF, 0), src));
-
-   register_coalesce(v);
-
-   EXPECT_EQ(mul->dst.nr, to.nr);
-}