intel/brw: Remove vec4 backend

It still exists as part of ELK for older gfx versions. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>
2025-12-24 08:50:13 +01:00 · 2024-02-14 22:57:40 -08:00 · 2024-02-14 22:57:40 -08:00 · a641aa294e
commit a641aa294e
parent 7c23b90537
39 changed files with 0 additions and 17138 deletions
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@ -87,8 +87,6 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
   brw_init_isa_info(&compiler->isa, devinfo);

   brw_fs_alloc_reg_sets(compiler);
-   if (devinfo->ver < 8)
-      brw_vec4_alloc_reg_set(compiler);

   compiler->precise_trig = debug_get_bool_option("INTEL_PRECISE_TRIG", false);

--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@ -57,16 +57,6 @@ struct brw_compiler {

   struct brw_isa_info isa;

-   struct {
-      struct ra_regs *regs;
-
-      /**
-       * Array of the ra classes for the unaligned contiguous register
-       * block sizes used.
-       */
-      struct ra_class **classes;
-   } vec4_reg_set;
-
   struct {
      struct ra_regs *regs;

--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@ -33,7 +33,6 @@
 #include "brw_fs_builder.h"
 #include "brw_fs_live_variables.h"
 #include "brw_nir.h"
-#include "brw_vec4_gs_visitor.h"
 #include "brw_cfg.h"
 #include "brw_dead_control_flow.h"
 #include "brw_private.h"
--- a/src/intel/compiler/brw_ir_performance.cpp
+++ b/src/intel/compiler/brw_ir_performance.cpp
@ -23,7 +23,6 @@

 #include "brw_eu.h"
 #include "brw_fs.h"
-#include "brw_vec4.h"
 #include "brw_cfg.h"

 using namespace brw;
@ -152,29 +151,6 @@ namespace {
         rcount = inst->opcode == BRW_OPCODE_DPAS ? inst->rcount : 0;
      }

-      instruction_info(const struct brw_isa_info *isa,
-                       const vec4_instruction *inst) :
-         isa(isa), devinfo(isa->devinfo), op(inst->opcode),
-         td(inst->dst.type), sd(DIV_ROUND_UP(inst->size_written, REG_SIZE)),
-         tx(get_exec_type(inst)), sx(0), ss(0), sc(0),
-         desc(inst->desc), sfid(inst->sfid), rcount(0)
-      {
-         /* Compute the maximum source size. */
-         for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++)
-            ss = MAX2(ss, DIV_ROUND_UP(inst->size_read(i), REG_SIZE));
-
-         /* Convert the execution size to GRF units. */
-         sx = DIV_ROUND_UP(inst->exec_size * type_sz(tx), REG_SIZE);
-
-         /* 32x32 integer multiplication has half the usual ALU throughput.
-          * Treat it as double-precision.
-          */
-         if ((inst->opcode == BRW_OPCODE_MUL || inst->opcode == BRW_OPCODE_MAD) &&
-             !brw_reg_type_is_floating_point(tx) && type_sz(tx) == 4 &&
-             type_sz(inst->src[0].type) == type_sz(inst->src[1].type))
-            tx = brw_int_type(8, tx == BRW_REGISTER_TYPE_D);
-      }
-
      /** ISA encoding information */
      const struct brw_isa_info *isa;
      /** Device information. */
@ -1505,102 +1481,6 @@ namespace {
      }
   }

-   /**
-    * Model the performance behavior of a VEC4 back-end instruction.
-    */
-   void
-   issue_vec4_instruction(state &st, const struct brw_isa_info *isa,
-                          const backend_instruction *be_inst)
-   {
-      const struct intel_device_info *devinfo = isa->devinfo;
-      const vec4_instruction *inst =
-         static_cast<const vec4_instruction *>(be_inst);
-      const instruction_info info(isa, inst);
-      const perf_desc perf = instruction_desc(info);
-
-      /* Stall on any source dependencies. */
-      for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) {
-         for (unsigned j = 0; j < regs_read(inst, i); j++)
-            stall_on_dependency(
-               st, reg_dependency_id(devinfo, inst->src[i], j));
-      }
-
-      if (inst->reads_accumulator_implicitly()) {
-         for (unsigned j = accum_reg_of_channel(devinfo, inst, info.tx, 0);
-              j <= accum_reg_of_channel(devinfo, inst, info.tx,
-                                        inst->exec_size - 1); j++)
-            stall_on_dependency(
-               st, reg_dependency_id(devinfo, brw_acc_reg(8), j));
-      }
-
-      if (inst->base_mrf != -1) {
-         for (unsigned j = 0; j < inst->mlen; j++)
-            stall_on_dependency(
-               st, reg_dependency_id(
-                  devinfo, brw_uvec_mrf(8, inst->base_mrf, 0), j));
-      }
-
-      if (inst->reads_flag())
-         stall_on_dependency(st, EU_DEPENDENCY_ID_FLAG0);
-
-      /* Stall on any write dependencies. */
-      if (!inst->no_dd_check) {
-         if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) {
-            for (unsigned j = 0; j < regs_written(inst); j++)
-               stall_on_dependency(
-                  st, reg_dependency_id(devinfo, inst->dst, j));
-         }
-
-         if (inst->writes_accumulator_implicitly(devinfo)) {
-            for (unsigned j = accum_reg_of_channel(devinfo, inst, info.tx, 0);
-                 j <= accum_reg_of_channel(devinfo, inst, info.tx,
-                                           inst->exec_size - 1); j++)
-               stall_on_dependency(
-                  st, reg_dependency_id(devinfo, brw_acc_reg(8), j));
-         }
-
-         if (inst->writes_flag(devinfo))
-            stall_on_dependency(st, EU_DEPENDENCY_ID_FLAG0);
-      }
-
-      /* Execute the instruction. */
-      execute_instruction(st, perf);
-
-      /* Mark any source dependencies. */
-      if (inst->is_send_from_grf()) {
-         for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) {
-            for (unsigned j = 0; j < regs_read(inst, i); j++)
-               mark_read_dependency(
-                  st, perf, reg_dependency_id(devinfo, inst->src[i], j));
-         }
-      }
-
-      if (inst->base_mrf != -1) {
-         for (unsigned j = 0; j < inst->mlen; j++)
-            mark_read_dependency(st, perf,
-               reg_dependency_id(devinfo, brw_uvec_mrf(8, inst->base_mrf, 0), j));
-      }
-
-      /* Mark any destination dependencies. */
-      if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) {
-         for (unsigned j = 0; j < regs_written(inst); j++) {
-            mark_write_dependency(st, perf,
-                                  reg_dependency_id(devinfo, inst->dst, j));
-         }
-      }
-
-      if (inst->writes_accumulator_implicitly(devinfo)) {
-         for (unsigned j = accum_reg_of_channel(devinfo, inst, info.tx, 0);
-              j <= accum_reg_of_channel(devinfo, inst, info.tx,
-                                        inst->exec_size - 1); j++)
-            mark_write_dependency(st, perf,
-                                  reg_dependency_id(devinfo, brw_acc_reg(8), j));
-      }
-
-      if (inst->writes_flag(devinfo))
-         mark_write_dependency(st, perf, EU_DEPENDENCY_ID_FLAG0);
-   }
-
   /**
    * Calculate the maximum possible throughput of the program compatible with
    * the cycle-count utilization estimated for each asynchronous unit, in
@ -1692,12 +1572,6 @@ brw::performance::performance(const fs_visitor *v) :
   calculate_performance(*this, v, issue_fs_inst, v->dispatch_width);
 }

-brw::performance::performance(const vec4_visitor *v) :
-   block_latency(new unsigned[v->cfg->num_blocks])
-{
-   calculate_performance(*this, v, issue_vec4_instruction, 8);
-}
-
 brw::performance::~performance()
 {
   delete[] block_latency;
--- a/src/intel/compiler/brw_ir_performance.h
+++ b/src/intel/compiler/brw_ir_performance.h
@ -28,15 +28,12 @@
 class fs_visitor;

 namespace brw {
-   class vec4_visitor;
-
   /**
    * Various estimates of the performance of a shader based on static
    * analysis.
    */
   struct performance {
      performance(const fs_visitor *v);
-      performance(const vec4_visitor *v);
      ~performance();

      analysis_dependency_class
--- a/src/intel/compiler/brw_ir_vec4.h
+++ b/src/intel/compiler/brw_ir_vec4.h
@ -1,475 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright © 2011-2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_IR_VEC4_H
-#define BRW_IR_VEC4_H
-
-#include "brw_shader.h"
-
-namespace brw {
-
-class dst_reg;
-
-class src_reg : public backend_reg
-{
-public:
-   DECLARE_RALLOC_CXX_OPERATORS(src_reg)
-
-   void init();
-
-   src_reg(enum brw_reg_file file, int nr, const glsl_type *type);
-   src_reg();
-   src_reg(struct ::brw_reg reg);
-
-   bool equals(const src_reg &r) const;
-   bool negative_equals(const src_reg &r) const;
-
-   src_reg(class vec4_visitor *v, const struct glsl_type *type);
-   src_reg(class vec4_visitor *v, const struct glsl_type *type, int size);
-
-   explicit src_reg(const dst_reg &reg);
-
-   src_reg *reladdr;
-};
-
-static inline src_reg
-retype(src_reg reg, enum brw_reg_type type)
-{
-   reg.type = type;
-   return reg;
-}
-
-namespace detail {
-
-static inline void
-add_byte_offset(backend_reg *reg, unsigned bytes)
-{
-   switch (reg->file) {
-      case BAD_FILE:
-         break;
-      case VGRF:
-      case ATTR:
-      case UNIFORM:
-         reg->offset += bytes;
-         assert(reg->offset % 16 == 0);
-         break;
-      case MRF: {
-         const unsigned suboffset = reg->offset + bytes;
-         reg->nr += suboffset / REG_SIZE;
-         reg->offset = suboffset % REG_SIZE;
-         assert(reg->offset % 16 == 0);
-         break;
-      }
-      case ARF:
-      case FIXED_GRF: {
-         const unsigned suboffset = reg->subnr + bytes;
-         reg->nr += suboffset / REG_SIZE;
-         reg->subnr = suboffset % REG_SIZE;
-         assert(reg->subnr % 16 == 0);
-         break;
-      }
-      default:
-         assert(bytes == 0);
-   }
-}
-
-} /* namespace detail */
-
-static inline src_reg
-byte_offset(src_reg reg, unsigned bytes)
-{
-   detail::add_byte_offset(&reg, bytes);
-   return reg;
-}
-
-static inline src_reg
-offset(src_reg reg, unsigned width, unsigned delta)
-{
-   const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
-   const unsigned num_components = MAX2(width / 4 * stride, 4);
-   return byte_offset(reg, num_components * type_sz(reg.type) * delta);
-}
-
-static inline src_reg
-horiz_offset(src_reg reg, unsigned delta)
-{
-   return byte_offset(reg, delta * type_sz(reg.type));
-}
-
-/**
- * Reswizzle a given source register.
- * \sa brw_swizzle().
- */
-static inline src_reg
-swizzle(src_reg reg, unsigned swizzle)
-{
-   if (reg.file == IMM)
-      reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swizzle);
-   else
-      reg.swizzle = brw_compose_swizzle(swizzle, reg.swizzle);
-
-   return reg;
-}
-
-static inline src_reg
-negate(src_reg reg)
-{
-   assert(reg.file != IMM);
-   reg.negate = !reg.negate;
-   return reg;
-}
-
-static inline bool
-is_uniform(const src_reg &reg)
-{
-   return (reg.file == IMM || reg.file == UNIFORM || reg.is_null()) &&
-          (!reg.reladdr || is_uniform(*reg.reladdr));
-}
-
-class dst_reg : public backend_reg
-{
-public:
-   DECLARE_RALLOC_CXX_OPERATORS(dst_reg)
-
-   void init();
-
-   dst_reg();
-   dst_reg(enum brw_reg_file file, int nr);
-   dst_reg(enum brw_reg_file file, int nr, const glsl_type *type,
-           unsigned writemask);
-   dst_reg(enum brw_reg_file file, int nr, brw_reg_type type,
-           unsigned writemask);
-   dst_reg(struct ::brw_reg reg);
-   dst_reg(class vec4_visitor *v, const struct glsl_type *type);
-
-   explicit dst_reg(const src_reg &reg);
-
-   bool equals(const dst_reg &r) const;
-
-   src_reg *reladdr;
-};
-
-static inline dst_reg
-retype(dst_reg reg, enum brw_reg_type type)
-{
-   reg.type = type;
-   return reg;
-}
-
-static inline dst_reg
-byte_offset(dst_reg reg, unsigned bytes)
-{
-   detail::add_byte_offset(&reg, bytes);
-   return reg;
-}
-
-static inline dst_reg
-offset(dst_reg reg, unsigned width, unsigned delta)
-{
-   const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
-   const unsigned num_components = MAX2(width / 4 * stride, 4);
-   return byte_offset(reg, num_components * type_sz(reg.type) * delta);
-}
-
-static inline dst_reg
-horiz_offset(const dst_reg &reg, unsigned delta)
-{
-   if (is_uniform(src_reg(reg)))
-      return reg;
-   else
-      return byte_offset(reg, delta * type_sz(reg.type));
-}
-
-static inline dst_reg
-writemask(dst_reg reg, unsigned mask)
-{
-   assert(reg.file != IMM);
-   assert((reg.writemask & mask) != 0);
-   reg.writemask &= mask;
-   return reg;
-}
-
-/**
- * Return an integer identifying the discrete address space a register is
- * contained in.  A register is by definition fully contained in the single
- * reg_space it belongs to, so two registers with different reg_space ids are
- * guaranteed not to overlap.  Most register files are a single reg_space of
- * its own, only the VGRF file is composed of multiple discrete address
- * spaces, one for each VGRF allocation.
- */
-static inline uint32_t
-reg_space(const backend_reg &r)
-{
-   return r.file << 16 | (r.file == VGRF ? r.nr : 0);
-}
-
-/**
- * Return the base offset in bytes of a register relative to the start of its
- * reg_space().
- */
-static inline unsigned
-reg_offset(const backend_reg &r)
-{
-   return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
-          (r.file == UNIFORM ? 16 : REG_SIZE) + r.offset +
-          (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
-}
-
-/**
- * Return whether the register region starting at \p r and spanning \p dr
- * bytes could potentially overlap the register region starting at \p s and
- * spanning \p ds bytes.
- */
-static inline bool
-regions_overlap(const backend_reg &r, unsigned dr,
-                const backend_reg &s, unsigned ds)
-{
-   if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
-      /* COMPR4 regions are translated by the hardware during decompression
-       * into two separate half-regions 4 MRFs apart from each other.
-       */
-      backend_reg t0 = r;
-      t0.nr &= ~BRW_MRF_COMPR4;
-      backend_reg t1 = t0;
-      t1.offset += 4 * REG_SIZE;
-      return regions_overlap(t0, dr / 2, s, ds) ||
-             regions_overlap(t1, dr / 2, s, ds);
-
-   } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
-      return regions_overlap(s, ds, r, dr);
-
-   } else {
-      return reg_space(r) == reg_space(s) &&
-             !(reg_offset(r) + dr <= reg_offset(s) ||
-               reg_offset(s) + ds <= reg_offset(r));
-   }
-}
-
-class vec4_instruction : public backend_instruction {
-public:
-   DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction)
-
-   vec4_instruction(enum opcode opcode,
-                    const dst_reg &dst = dst_reg(),
-                    const src_reg &src0 = src_reg(),
-                    const src_reg &src1 = src_reg(),
-                    const src_reg &src2 = src_reg());
-
-   dst_reg dst;
-   src_reg src[3];
-
-   enum brw_urb_write_flags urb_write_flags;
-
-   unsigned sol_binding; /**< gfx6: SOL binding table index */
-   bool sol_final_write; /**< gfx6: send commit message */
-   unsigned sol_vertex; /**< gfx6: used for setting dst index in SVB header */
-
-   bool is_send_from_grf() const;
-   unsigned size_read(unsigned arg) const;
-   bool can_reswizzle(const struct intel_device_info *devinfo,
-                      int dst_writemask,
-                      int swizzle, int swizzle_mask);
-   void reswizzle(int dst_writemask, int swizzle);
-   bool can_do_source_mods(const struct intel_device_info *devinfo);
-   bool can_do_cmod();
-   bool can_do_writemask(const struct intel_device_info *devinfo);
-   bool can_change_types() const;
-   bool has_source_and_destination_hazard() const;
-   unsigned implied_mrf_writes() const;
-
-   bool is_align1_partial_write()
-   {
-      return opcode == VEC4_OPCODE_SET_LOW_32BIT ||
-             opcode == VEC4_OPCODE_SET_HIGH_32BIT;
-   }
-
-   bool reads_flag() const
-   {
-      return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2;
-   }
-
-   bool reads_flag(unsigned c)
-   {
-      if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
-         return true;
-
-      switch (predicate) {
-      case BRW_PREDICATE_NONE:
-         return false;
-      case BRW_PREDICATE_ALIGN16_REPLICATE_X:
-         return c == 0;
-      case BRW_PREDICATE_ALIGN16_REPLICATE_Y:
-         return c == 1;
-      case BRW_PREDICATE_ALIGN16_REPLICATE_Z:
-         return c == 2;
-      case BRW_PREDICATE_ALIGN16_REPLICATE_W:
-         return c == 3;
-      default:
-         return true;
-      }
-   }
-
-   bool writes_flag(const intel_device_info *devinfo) const
-   {
-      return (conditional_mod && ((opcode != BRW_OPCODE_SEL || devinfo->ver <= 5) &&
-                                  opcode != BRW_OPCODE_CSEL &&
-                                  opcode != BRW_OPCODE_IF &&
-                                  opcode != BRW_OPCODE_WHILE));
-   }
-
-   bool reads_g0_implicitly() const
-   {
-      switch (opcode) {
-      case SHADER_OPCODE_TEX:
-      case SHADER_OPCODE_TXL:
-      case SHADER_OPCODE_TXD:
-      case SHADER_OPCODE_TXF:
-      case SHADER_OPCODE_TXF_CMS_W:
-      case SHADER_OPCODE_TXF_CMS:
-      case SHADER_OPCODE_TXF_MCS:
-      case SHADER_OPCODE_TXS:
-      case SHADER_OPCODE_TG4:
-      case SHADER_OPCODE_TG4_OFFSET:
-      case SHADER_OPCODE_SAMPLEINFO:
-      case VS_OPCODE_PULL_CONSTANT_LOAD:
-      case GS_OPCODE_SET_PRIMITIVE_ID:
-      case GS_OPCODE_GET_INSTANCE_ID:
-      case SHADER_OPCODE_GFX4_SCRATCH_READ:
-      case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
-         return true;
-      default:
-         return false;
-      }
-   }
-};
-
-/**
- * Make the execution of \p inst dependent on the evaluation of a possibly
- * inverted predicate.
- */
-inline vec4_instruction *
-set_predicate_inv(enum brw_predicate pred, bool inverse,
-                  vec4_instruction *inst)
-{
-   inst->predicate = pred;
-   inst->predicate_inverse = inverse;
-   return inst;
-}
-
-/**
- * Make the execution of \p inst dependent on the evaluation of a predicate.
- */
-inline vec4_instruction *
-set_predicate(enum brw_predicate pred, vec4_instruction *inst)
-{
-   return set_predicate_inv(pred, false, inst);
-}
-
-/**
- * Write the result of evaluating the condition given by \p mod to a flag
- * register.
- */
-inline vec4_instruction *
-set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst)
-{
-   inst->conditional_mod = mod;
-   return inst;
-}
-
-/**
- * Clamp the result of \p inst to the saturation range of its destination
- * datatype.
- */
-inline vec4_instruction *
-set_saturate(bool saturate, vec4_instruction *inst)
-{
-   inst->saturate = saturate;
-   return inst;
-}
-
-/**
- * Return the number of dataflow registers written by the instruction (either
- * fully or partially) counted from 'floor(reg_offset(inst->dst) /
- * register_size)'.  The somewhat arbitrary register size unit is 16B for the
- * UNIFORM and IMM files and 32B for all other files.
- */
-inline unsigned
-regs_written(const vec4_instruction *inst)
-{
-   assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
-   return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE + inst->size_written,
-                       REG_SIZE);
-}
-
-/**
- * Return the number of dataflow registers read by the instruction (either
- * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
- * register_size)'.  The somewhat arbitrary register size unit is 16B for the
- * UNIFORM and IMM files and 32B for all other files.
- */
-inline unsigned
-regs_read(const vec4_instruction *inst, unsigned i)
-{
-   const unsigned reg_size =
-      inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 16 : REG_SIZE;
-   return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + inst->size_read(i),
-                       reg_size);
-}
-
-static inline enum brw_reg_type
-get_exec_type(const vec4_instruction *inst)
-{
-   enum brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
-
-   for (int i = 0; i < 3; i++) {
-      if (inst->src[i].file != BAD_FILE) {
-         const brw_reg_type t = get_exec_type(brw_reg_type(inst->src[i].type));
-         if (type_sz(t) > type_sz(exec_type))
-            exec_type = t;
-         else if (type_sz(t) == type_sz(exec_type) &&
-                  brw_reg_type_is_floating_point(t))
-            exec_type = t;
-      }
-   }
-
-   if (exec_type == BRW_REGISTER_TYPE_B)
-      exec_type = inst->dst.type;
-
-   /* TODO: We need to handle half-float conversions. */
-   assert(exec_type != BRW_REGISTER_TYPE_HF ||
-          inst->dst.type == BRW_REGISTER_TYPE_HF);
-   assert(exec_type != BRW_REGISTER_TYPE_B);
-
-   return exec_type;
-}
-
-static inline unsigned
-get_exec_type_size(const vec4_instruction *inst)
-{
-   return type_sz(get_exec_type(inst));
-}
-
-} /* namespace brw */
-
-#endif
--- a/src/intel/compiler/brw_schedule_instructions.cpp
+++ b/src/intel/compiler/brw_schedule_instructions.cpp
@ -28,7 +28,6 @@
 #include "brw_eu.h"
 #include "brw_fs.h"
 #include "brw_fs_live_variables.h"
-#include "brw_vec4.h"
 #include "brw_cfg.h"
 #include "brw_shader.h"
 #include <new>
@ -1027,25 +1026,6 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
   return benefit;
 }

-class vec4_instruction_scheduler : public instruction_scheduler
-{
-public:
-   vec4_instruction_scheduler(void *mem_ctx, const vec4_visitor *v, int grf_count);
-   void calculate_deps();
-   schedule_node *choose_instruction_to_schedule();
-   const vec4_visitor *v;
-
-   void run();
-};
-
-vec4_instruction_scheduler::vec4_instruction_scheduler(void *mem_ctx, const vec4_visitor *v,
-                                                       int grf_count)
-   : instruction_scheduler(mem_ctx, v, grf_count, /* grf_write_scale */ 1,
-                           /* post_reg_alloc */ true),
-     v(v)
-{
-}
-
 void
 instruction_scheduler::set_current_block(bblock_t *block)
 {
@ -1534,179 +1514,6 @@ fs_instruction_scheduler::calculate_deps()
   clear_last_grf_write();
 }

-void
-vec4_instruction_scheduler::calculate_deps()
-{
-   schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->ver)];
-   schedule_node *last_conditional_mod = NULL;
-   schedule_node *last_accumulator_write = NULL;
-   /* Fixed HW registers are assumed to be separate from the virtual
-    * GRFs, so they can be tracked separately.  We don't really write
-    * to fixed GRFs much, so don't bother tracking them on a more
-    * granular level.
-    */
-   schedule_node *last_fixed_grf_write = NULL;
-
-   memset(last_grf_write, 0, grf_count * sizeof(*last_grf_write));
-   memset(last_mrf_write, 0, sizeof(last_mrf_write));
-
-   /* top-to-bottom dependencies: RAW and WAW. */
-   for (schedule_node *n = current.start; n < current.end; n++) {
-      vec4_instruction *inst = (vec4_instruction *)n->inst;
-
-      if (is_scheduling_barrier(inst))
-         add_barrier_deps(n);
-
-      /* read-after-write deps. */
-      for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file == VGRF) {
-            for (unsigned j = 0; j < regs_read(inst, i); ++j)
-               add_dep(last_grf_write[inst->src[i].nr + j], n);
-         } else if (inst->src[i].file == FIXED_GRF) {
-            add_dep(last_fixed_grf_write, n);
-         } else if (inst->src[i].is_accumulator()) {
-            assert(last_accumulator_write);
-            add_dep(last_accumulator_write, n);
-         } else if (inst->src[i].file == ARF && !inst->src[i].is_null()) {
-            add_barrier_deps(n);
-         }
-      }
-
-      if (inst->reads_g0_implicitly())
-         add_dep(last_fixed_grf_write, n);
-
-      if (!inst->is_send_from_grf()) {
-         for (int i = 0; i < inst->mlen; i++) {
-            /* It looks like the MRF regs are released in the send
-             * instruction once it's sent, not when the result comes
-             * back.
-             */
-            add_dep(last_mrf_write[inst->base_mrf + i], n);
-         }
-      }
-
-      if (inst->reads_flag()) {
-         assert(last_conditional_mod);
-         add_dep(last_conditional_mod, n);
-      }
-
-      if (inst->reads_accumulator_implicitly()) {
-         assert(last_accumulator_write);
-         add_dep(last_accumulator_write, n);
-      }
-
-      /* write-after-write deps. */
-      if (inst->dst.file == VGRF) {
-         for (unsigned j = 0; j < regs_written(inst); ++j) {
-            add_dep(last_grf_write[inst->dst.nr + j], n);
-            last_grf_write[inst->dst.nr + j] = n;
-         }
-      } else if (inst->dst.file == MRF) {
-         add_dep(last_mrf_write[inst->dst.nr], n);
-         last_mrf_write[inst->dst.nr] = n;
-     } else if (inst->dst.file == FIXED_GRF) {
-         add_dep(last_fixed_grf_write, n);
-         last_fixed_grf_write = n;
-      } else if (inst->dst.is_accumulator()) {
-         add_dep(last_accumulator_write, n);
-         last_accumulator_write = n;
-      } else if (inst->dst.file == ARF && !inst->dst.is_null()) {
-         add_barrier_deps(n);
-      }
-
-      if (inst->mlen > 0 && !inst->is_send_from_grf()) {
-         for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
-            add_dep(last_mrf_write[inst->base_mrf + i], n);
-            last_mrf_write[inst->base_mrf + i] = n;
-         }
-      }
-
-      if (inst->writes_flag(v->devinfo)) {
-         add_dep(last_conditional_mod, n, 0);
-         last_conditional_mod = n;
-      }
-
-      if (inst->writes_accumulator_implicitly(v->devinfo) &&
-          !inst->dst.is_accumulator()) {
-         add_dep(last_accumulator_write, n);
-         last_accumulator_write = n;
-      }
-   }
-
-   /* bottom-to-top dependencies: WAR */
-   memset(last_grf_write, 0, grf_count * sizeof(*last_grf_write));
-   memset(last_mrf_write, 0, sizeof(last_mrf_write));
-   last_conditional_mod = NULL;
-   last_accumulator_write = NULL;
-   last_fixed_grf_write = NULL;
-
-   for (schedule_node *n = current.end - 1; n >= current.start; n--) {
-      vec4_instruction *inst = (vec4_instruction *)n->inst;
-
-      /* write-after-read deps. */
-      for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file == VGRF) {
-            for (unsigned j = 0; j < regs_read(inst, i); ++j)
-               add_dep(n, last_grf_write[inst->src[i].nr + j]);
-         } else if (inst->src[i].file == FIXED_GRF) {
-            add_dep(n, last_fixed_grf_write);
-         } else if (inst->src[i].is_accumulator()) {
-            add_dep(n, last_accumulator_write);
-         } else if (inst->src[i].file == ARF && !inst->src[i].is_null()) {
-            add_barrier_deps(n);
-         }
-      }
-
-      if (!inst->is_send_from_grf()) {
-         for (int i = 0; i < inst->mlen; i++) {
-            /* It looks like the MRF regs are released in the send
-             * instruction once it's sent, not when the result comes
-             * back.
-             */
-            add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
-         }
-      }
-
-      if (inst->reads_flag()) {
-         add_dep(n, last_conditional_mod);
-      }
-
-      if (inst->reads_accumulator_implicitly()) {
-         add_dep(n, last_accumulator_write);
-      }
-
-      /* Update the things this instruction wrote, so earlier reads
-       * can mark this as WAR dependency.
-       */
-      if (inst->dst.file == VGRF) {
-         for (unsigned j = 0; j < regs_written(inst); ++j)
-            last_grf_write[inst->dst.nr + j] = n;
-      } else if (inst->dst.file == MRF) {
-         last_mrf_write[inst->dst.nr] = n;
-      } else if (inst->dst.file == FIXED_GRF) {
-         last_fixed_grf_write = n;
-      } else if (inst->dst.is_accumulator()) {
-         last_accumulator_write = n;
-      } else if (inst->dst.file == ARF && !inst->dst.is_null()) {
-         add_barrier_deps(n);
-      }
-
-      if (inst->mlen > 0 && !inst->is_send_from_grf()) {
-         for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
-            last_mrf_write[inst->base_mrf + i] = n;
-         }
-      }
-
-      if (inst->writes_flag(v->devinfo)) {
-         last_conditional_mod = n;
-      }
-
-      if (inst->writes_accumulator_implicitly(v->devinfo)) {
-         last_accumulator_write = n;
-      }
-   }
-}
-
 schedule_node *
 fs_instruction_scheduler::choose_instruction_to_schedule()
 {
@ -1837,25 +1644,6 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
   return chosen;
 }

-schedule_node *
-vec4_instruction_scheduler::choose_instruction_to_schedule()
-{
-   schedule_node *chosen = NULL;
-   int chosen_time = 0;
-
-   /* Of the instructions ready to execute or the closest to being ready,
-    * choose the oldest one.
-    */
-   foreach_in_list(schedule_node, n, &current.available) {
-      if (!chosen || n->tmp.unblocked_time < chosen_time) {
-         chosen = n;
-         chosen_time = n->tmp.unblocked_time;
-      }
-   }
-
-   return chosen;
-}
-
 int
 fs_instruction_scheduler::calculate_issue_time(backend_instruction *inst0)
 {
@ -2009,41 +1797,6 @@ fs_instruction_scheduler::run(instruction_scheduler_mode mode)
   }
 }

-void
-vec4_instruction_scheduler::run()
-{
-   foreach_block(block, v->cfg) {
-      set_current_block(block);
-
-      for (schedule_node *n = current.start; n < current.end; n++) {
-         /* We always execute as two vec4s in parallel. */
-         n->issue_time = 2;
-      }
-
-      calculate_deps();
-
-      compute_delays();
-      compute_exits();
-
-      assert(current.available.is_empty());
-      for (schedule_node *n = current.start; n < current.end; n++) {
-         reset_node_tmp(n);
-
-         /* Add DAG heads to the list of available instructions. */
-         if (n->tmp.parent_count == 0)
-            current.available.push_tail(n);
-      }
-
-      current.block->instructions.make_empty();
-
-      while (!current.available.is_empty()) {
-         schedule_node *chosen = choose_instruction_to_schedule();
-         schedule(chosen);
-         update_children(chosen);
-      }
-   }
-}
-
 fs_instruction_scheduler *
 fs_visitor::prepare_scheduler(void *mem_ctx)
 {
@ -2082,16 +1835,3 @@ fs_visitor::schedule_instructions_post_ra()

   invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
 }
-
-void
-vec4_visitor::opt_schedule_instructions()
-{
-   void *mem_ctx = ralloc_context(NULL);
-
-   vec4_instruction_scheduler sched(mem_ctx, this, prog_data->total_grf);
-   sched.run();
-
-   ralloc_free(mem_ctx);
-
-   invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
-}
--- a/src/intel/compiler/brw_shader.h
+++ b/src/intel/compiler/brw_shader.h
@ -114,9 +114,6 @@ extern "C" {
 /* brw_fs_reg_allocate.cpp */
 void brw_fs_alloc_reg_sets(struct brw_compiler *compiler);

-/* brw_vec4_reg_allocate.cpp */
-void brw_vec4_alloc_reg_set(struct brw_compiler *compiler);
-
 /* brw_disasm.c */
 extern const char *const conditional_modifier[16];
 extern const char *const pred_ctrl_align16[16];
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
--- a/src/intel/compiler/brw_vec4.h
+++ b/src/intel/compiler/brw_vec4.h
@ -1,350 +0,0 @@
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_VEC4_H
-#define BRW_VEC4_H
-
-#include "brw_shader.h"
-
-#ifdef __cplusplus
-#include "brw_ir_vec4.h"
-#include "brw_ir_performance.h"
-#include "brw_vec4_builder.h"
-#include "brw_vec4_live_variables.h"
-#endif
-
-#include "compiler/glsl/ir.h"
-#include "compiler/nir/nir.h"
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-const unsigned *
-brw_vec4_generate_assembly(const struct brw_compiler *compiler,
-                           const struct brw_compile_params *params,
-                           const nir_shader *nir,
-                           struct brw_vue_prog_data *prog_data,
-                           const struct cfg_t *cfg,
-                           const brw::performance &perf,
-                           bool debug_enabled);
-
-#ifdef __cplusplus
-} /* extern "C" */
-
-namespace brw {
-/**
- * The vertex shader front-end.
- *
- * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
- * fixed-function) into VS IR.
- */
-class vec4_visitor : public backend_shader
-{
-public:
-   vec4_visitor(const struct brw_compiler *compiler,
-                const struct brw_compile_params *params,
-                const struct brw_sampler_prog_key_data *key,
-                struct brw_vue_prog_data *prog_data,
-                const nir_shader *shader,
-                bool no_spills,
-                bool debug_enabled);
-
-   dst_reg dst_null_f()
-   {
-      return dst_reg(brw_null_reg());
-   }
-
-   dst_reg dst_null_df()
-   {
-      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
-   }
-
-   dst_reg dst_null_d()
-   {
-      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   }
-
-   dst_reg dst_null_ud()
-   {
-      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
-   }
-
-   const struct brw_sampler_prog_key_data * const key_tex;
-   struct brw_vue_prog_data * const prog_data;
-   char *fail_msg;
-   bool failed;
-
-   /**
-    * GLSL IR currently being processed, which is associated with our
-    * driver IR instructions for debugging purposes.
-    */
-   const void *base_ir;
-   const char *current_annotation;
-
-   int first_non_payload_grf;
-   unsigned ubo_push_start[4];
-   unsigned push_length;
-   unsigned int max_grf;
-   brw_analysis<brw::vec4_live_variables, backend_shader> live_analysis;
-   brw_analysis<brw::performance, vec4_visitor> performance_analysis;
-
-   /* Regs for vertex results.  Generated at ir_variable visiting time
-    * for the ir->location's used.
-    */
-   dst_reg output_reg[VARYING_SLOT_TESS_MAX][4];
-   unsigned output_num_components[VARYING_SLOT_TESS_MAX][4];
-   const char *output_reg_annotation[VARYING_SLOT_TESS_MAX];
-   int uniforms;
-
-   bool run();
-   void fail(const char *msg, ...);
-
-   int setup_uniforms(int payload_reg);
-
-   bool reg_allocate_trivial();
-   bool reg_allocate();
-   void evaluate_spill_costs(float *spill_costs, bool *no_spill);
-   int choose_spill_reg(struct ra_graph *g);
-   void spill_reg(unsigned spill_reg);
-   void move_grf_array_access_to_scratch();
-   void split_uniform_registers();
-   void setup_push_ranges();
-   virtual void invalidate_analysis(brw::analysis_dependency_class c);
-   void split_virtual_grfs();
-   bool opt_vector_float();
-   bool opt_reduce_swizzle();
-   bool dead_code_eliminate();
-   bool opt_cmod_propagation();
-   bool opt_copy_propagation(bool do_constant_prop = true);
-   bool opt_cse_local(bblock_t *block, const vec4_live_variables &live);
-   bool opt_cse();
-   bool opt_algebraic();
-   bool opt_register_coalesce();
-   bool eliminate_find_live_channel();
-   bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
-   void opt_set_dependency_control();
-   void opt_schedule_instructions();
-   void convert_to_hw_regs();
-   void fixup_3src_null_dest();
-
-   bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
-   bool lower_simd_width();
-   bool scalarize_df();
-   bool lower_64bit_mad_to_mul_add();
-   void apply_logical_swizzle(struct brw_reg *hw_reg,
-                              vec4_instruction *inst, int arg);
-
-   vec4_instruction *emit(vec4_instruction *inst);
-
-   vec4_instruction *emit(enum opcode opcode);
-   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
-   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
-                          const src_reg &src0);
-   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
-                          const src_reg &src0, const src_reg &src1);
-   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
-                          const src_reg &src0, const src_reg &src1,
-                          const src_reg &src2);
-
-   vec4_instruction *emit_before(bblock_t *block,
-                                 vec4_instruction *inst,
-				 vec4_instruction *new_inst);
-
-#define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
-#define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
-#define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
-   EMIT1(MOV)
-   EMIT1(NOT)
-   EMIT1(RNDD)
-   EMIT1(RNDE)
-   EMIT1(RNDZ)
-   EMIT1(FRC)
-   EMIT1(F32TO16)
-   EMIT1(F16TO32)
-   EMIT2(ADD)
-   EMIT2(MUL)
-   EMIT2(MACH)
-   EMIT2(MAC)
-   EMIT2(AND)
-   EMIT2(OR)
-   EMIT2(XOR)
-   EMIT2(DP3)
-   EMIT2(DP4)
-   EMIT2(DPH)
-   EMIT2(SHL)
-   EMIT2(SHR)
-   EMIT2(ASR)
-   vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
-			 enum brw_conditional_mod condition);
-   vec4_instruction *IF(src_reg src0, src_reg src1,
-                        enum brw_conditional_mod condition);
-   vec4_instruction *IF(enum brw_predicate predicate);
-   EMIT1(SCRATCH_READ)
-   EMIT2(SCRATCH_WRITE)
-   EMIT3(LRP)
-   EMIT1(BFREV)
-   EMIT3(BFE)
-   EMIT2(BFI1)
-   EMIT3(BFI2)
-   EMIT1(FBH)
-   EMIT1(FBL)
-   EMIT1(CBIT)
-   EMIT1(LZD)
-   EMIT3(MAD)
-   EMIT2(ADDC)
-   EMIT2(SUBB)
-   EMIT1(DIM)
-
-#undef EMIT1
-#undef EMIT2
-#undef EMIT3
-
-   vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
-                                 src_reg src0, src_reg src1);
-
-   /**
-    * Copy any live channel from \p src to the first channel of the
-    * result.
-    */
-   src_reg emit_uniformize(const src_reg &src);
-
-   /** Fix all float operands of a 3-source instruction. */
-   void fix_float_operands(src_reg op[3], nir_alu_instr *instr);
-
-   src_reg fix_3src_operand(const src_reg &src);
-
-   vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
-                               const src_reg &src1 = src_reg());
-
-   src_reg fix_math_operand(const src_reg &src);
-
-   void emit_pack_half_2x16(dst_reg dst, src_reg src0);
-   void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
-   void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
-   void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
-   void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
-   void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
-
-   src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
-                          src_reg surface);
-
-   void emit_ndc_computation();
-   void emit_psiz_and_flags(dst_reg reg);
-   vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
-   virtual void emit_urb_slot(dst_reg reg, int varying);
-
-   src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
-			      src_reg *reladdr, int reg_offset);
-   void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
-			  dst_reg dst,
-			  src_reg orig_src,
-			  int base_offset);
-   void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
-			   int base_offset);
-   void emit_pull_constant_load_reg(dst_reg dst,
-                                    src_reg surf_index,
-                                    src_reg offset,
-                                    bblock_t *before_block,
-                                    vec4_instruction *before_inst);
-   src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
-                                vec4_instruction *inst, src_reg src);
-
-   void resolve_ud_negate(src_reg *reg);
-
-   void emit_shader_float_controls_execution_mode();
-
-   bool lower_minmax();
-
-   src_reg get_timestamp();
-
-   virtual void dump_instruction_to_file(const backend_instruction *inst, FILE *file) const;
-
-   bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate);
-
-   void emit_conversion_from_double(dst_reg dst, src_reg src);
-   void emit_conversion_to_double(dst_reg dst, src_reg src);
-
-   vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
-                                        bool for_write,
-                                        bool for_scratch = false,
-                                        bblock_t *block = NULL,
-                                        vec4_instruction *ref = NULL);
-
-   virtual void emit_nir_code();
-   virtual void nir_setup_uniforms();
-   virtual void nir_emit_impl(nir_function_impl *impl);
-   virtual void nir_emit_cf_list(exec_list *list);
-   virtual void nir_emit_if(nir_if *if_stmt);
-   virtual void nir_emit_loop(nir_loop *loop);
-   virtual void nir_emit_block(nir_block *block);
-   virtual void nir_emit_instr(nir_instr *instr);
-   virtual void nir_emit_load_const(nir_load_const_instr *instr);
-   src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr);
-   virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
-   virtual void nir_emit_alu(nir_alu_instr *instr);
-   virtual void nir_emit_jump(nir_jump_instr *instr);
-   virtual void nir_emit_texture(nir_tex_instr *instr);
-   virtual void nir_emit_undef(nir_undef_instr *instr);
-   virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
-
-   dst_reg get_nir_def(const nir_def &def, enum brw_reg_type type);
-   dst_reg get_nir_def(const nir_def &def, nir_alu_type type);
-   dst_reg get_nir_def(const nir_def &def);
-   src_reg get_nir_src(const nir_src &src, enum brw_reg_type type,
-                       unsigned num_components = 4);
-   src_reg get_nir_src(const nir_src &src, nir_alu_type type,
-                       unsigned num_components = 4);
-   src_reg get_nir_src(const nir_src &src,
-                       unsigned num_components = 4);
-   src_reg get_nir_src_imm(const nir_src &src);
-   src_reg get_indirect_offset(nir_intrinsic_instr *instr);
-
-   dst_reg *nir_ssa_values;
-
-protected:
-   void emit_vertex();
-   void setup_payload_interference(struct ra_graph *g, int first_payload_node,
-                                   int reg_node_count);
-   virtual void setup_payload() = 0;
-   virtual void emit_prolog() = 0;
-   virtual void emit_thread_end() = 0;
-   virtual void emit_urb_write_header(int mrf) = 0;
-   virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
-   virtual void gs_emit_vertex(int stream_id);
-   virtual void gs_end_primitive();
-
-private:
-   /**
-    * If true, then register allocation should fail instead of spilling.
-    */
-   const bool no_spills;
-
-   unsigned last_scratch; /**< measured in 32-byte (register size) units */
-};
-
-} /* namespace brw */
-#endif /* __cplusplus */
-
-#endif /* BRW_VEC4_H */
--- a/src/intel/compiler/brw_vec4_builder.h
+++ b/src/intel/compiler/brw_vec4_builder.h
@ -1,646 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright © 2010-2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_VEC4_BUILDER_H
-#define BRW_VEC4_BUILDER_H
-
-#include "brw_ir_vec4.h"
-#include "brw_ir_allocator.h"
-
-namespace brw {
-   /**
-    * Toolbox to assemble a VEC4 IR program out of individual instructions.
-    *
-    * This object is meant to have an interface consistent with
-    * brw::fs_builder.  They cannot be fully interchangeable because
-    * brw::fs_builder generates scalar code while brw::vec4_builder generates
-    * vector code.
-    */
-   class vec4_builder {
-   public:
-      /** Type used in this IR to represent a source of an instruction. */
-      typedef brw::src_reg src_reg;
-
-      /** Type used in this IR to represent the destination of an instruction. */
-      typedef brw::dst_reg dst_reg;
-
-      /** Type used in this IR to represent an instruction. */
-      typedef vec4_instruction instruction;
-
-      /**
-       * Construct a vec4_builder that inserts instructions into \p shader.
-       */
-      vec4_builder(backend_shader *shader, unsigned dispatch_width = 8) :
-         shader(shader), block(NULL), cursor(NULL),
-         _dispatch_width(dispatch_width), _group(0),
-         force_writemask_all(false),
-         annotation()
-      {
-      }
-
-      /**
-       * Construct a vec4_builder that inserts instructions into \p shader
-       * before instruction \p inst in basic block \p block.  The default
-       * execution controls and debug annotation are initialized from the
-       * instruction passed as argument.
-       */
-      vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) :
-         shader(shader), block(block), cursor(inst),
-         _dispatch_width(inst->exec_size), _group(inst->group),
-         force_writemask_all(inst->force_writemask_all)
-      {
-         annotation.str = inst->annotation;
-         annotation.ir = inst->ir;
-      }
-
-      /**
-       * Construct a vec4_builder that inserts instructions before \p cursor
-       * in basic block \p block, inheriting other code generation parameters
-       * from this.
-       */
-      vec4_builder
-      at(bblock_t *block, exec_node *cursor) const
-      {
-         vec4_builder bld = *this;
-         bld.block = block;
-         bld.cursor = cursor;
-         return bld;
-      }
-
-      /**
-       * Construct a vec4_builder appending instructions at the end of the
-       * instruction list of the shader, inheriting other code generation
-       * parameters from this.
-       */
-      vec4_builder
-      at_end() const
-      {
-         return at(NULL, (exec_node *)&shader->instructions.tail_sentinel);
-      }
-
-      /**
-       * Construct a builder specifying the default SIMD width and group of
-       * channel enable signals, inheriting other code generation parameters
-       * from this.
-       *
-       * \p n gives the default SIMD width, \p i gives the slot group used for
-       * predication and control flow masking in multiples of \p n channels.
-       */
-      vec4_builder
-      group(unsigned n, unsigned i) const
-      {
-         assert(force_writemask_all ||
-                (n <= dispatch_width() && i < dispatch_width() / n));
-         vec4_builder bld = *this;
-         bld._dispatch_width = n;
-         bld._group += i * n;
-         return bld;
-      }
-
-      /**
-       * Construct a builder with per-channel control flow execution masking
-       * disabled if \p b is true.  If control flow execution masking is
-       * already disabled this has no effect.
-       */
-      vec4_builder
-      exec_all(bool b = true) const
-      {
-         vec4_builder bld = *this;
-         if (b)
-            bld.force_writemask_all = true;
-         return bld;
-      }
-
-      /**
-       * Construct a builder with the given debug annotation info.
-       */
-      vec4_builder
-      annotate(const char *str, const void *ir = NULL) const
-      {
-         vec4_builder bld = *this;
-         bld.annotation.str = str;
-         bld.annotation.ir = ir;
-         return bld;
-      }
-
-      /**
-       * Get the SIMD width in use.
-       */
-      unsigned
-      dispatch_width() const
-      {
-         return _dispatch_width;
-      }
-
-      /**
-       * Get the channel group in use.
-       */
-      unsigned
-      group() const
-      {
-         return _group;
-      }
-
-      /**
-       * Allocate a virtual register of natural vector size (four for this IR)
-       * and SIMD width.  \p n gives the amount of space to allocate in
-       * dispatch_width units (which is just enough space for four logical
-       * components in this IR).
-       */
-      dst_reg
-      vgrf(enum brw_reg_type type, unsigned n = 1) const
-      {
-         assert(dispatch_width() <= 32);
-
-         if (n > 0)
-            return retype(dst_reg(VGRF, shader->alloc.allocate(
-                                     n * DIV_ROUND_UP(type_sz(type), 4))),
-                           type);
-         else
-            return retype(null_reg_ud(), type);
-      }
-
-      /**
-       * Create a null register of floating type.
-       */
-      dst_reg
-      null_reg_f() const
-      {
-         return dst_reg(retype(brw_null_vec(dispatch_width()),
-                               BRW_REGISTER_TYPE_F));
-      }
-
-      /**
-       * Create a null register of signed integer type.
-       */
-      dst_reg
-      null_reg_d() const
-      {
-         return dst_reg(retype(brw_null_vec(dispatch_width()),
-                               BRW_REGISTER_TYPE_D));
-      }
-
-      /**
-       * Create a null register of unsigned integer type.
-       */
-      dst_reg
-      null_reg_ud() const
-      {
-         return dst_reg(retype(brw_null_vec(dispatch_width()),
-                               BRW_REGISTER_TYPE_UD));
-      }
-
-      /**
-       * Insert an instruction into the program.
-       */
-      instruction *
-      emit(const instruction &inst) const
-      {
-         return emit(new(shader->mem_ctx) instruction(inst));
-      }
-
-      /**
-       * Create and insert a nullary control instruction into the program.
-       */
-      instruction *
-      emit(enum opcode opcode) const
-      {
-         return emit(instruction(opcode));
-      }
-
-      /**
-       * Create and insert a nullary instruction into the program.
-       */
-      instruction *
-      emit(enum opcode opcode, const dst_reg &dst) const
-      {
-         return emit(instruction(opcode, dst));
-      }
-
-      /**
-       * Create and insert a unary instruction into the program.
-       */
-      instruction *
-      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
-      {
-         switch (opcode) {
-         case SHADER_OPCODE_RCP:
-         case SHADER_OPCODE_RSQ:
-         case SHADER_OPCODE_SQRT:
-         case SHADER_OPCODE_EXP2:
-         case SHADER_OPCODE_LOG2:
-         case SHADER_OPCODE_SIN:
-         case SHADER_OPCODE_COS:
-            return fix_math_instruction(
-               emit(instruction(opcode, dst,
-                                fix_math_operand(src0))));
-
-         default:
-            return emit(instruction(opcode, dst, src0));
-         }
-      }
-
-      /**
-       * Create and insert a binary instruction into the program.
-       */
-      instruction *
-      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
-           const src_reg &src1) const
-      {
-         switch (opcode) {
-         case SHADER_OPCODE_POW:
-         case SHADER_OPCODE_INT_QUOTIENT:
-         case SHADER_OPCODE_INT_REMAINDER:
-            return fix_math_instruction(
-               emit(instruction(opcode, dst,
-                                fix_math_operand(src0),
-                                fix_math_operand(src1))));
-
-         default:
-            return emit(instruction(opcode, dst, src0, src1));
-         }
-      }
-
-      /**
-       * Create and insert a ternary instruction into the program.
-       */
-      instruction *
-      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
-           const src_reg &src1, const src_reg &src2) const
-      {
-         switch (opcode) {
-         case BRW_OPCODE_BFE:
-         case BRW_OPCODE_BFI2:
-         case BRW_OPCODE_MAD:
-         case BRW_OPCODE_LRP:
-            return emit(instruction(opcode, dst,
-                                    fix_3src_operand(src0),
-                                    fix_3src_operand(src1),
-                                    fix_3src_operand(src2)));
-
-         default:
-            return emit(instruction(opcode, dst, src0, src1, src2));
-         }
-      }
-
-      /**
-       * Insert a preallocated instruction into the program.
-       */
-      instruction *
-      emit(instruction *inst) const
-      {
-         inst->exec_size = dispatch_width();
-         inst->group = group();
-         inst->force_writemask_all = force_writemask_all;
-         inst->size_written = inst->exec_size * type_sz(inst->dst.type);
-         inst->annotation = annotation.str;
-         inst->ir = annotation.ir;
-
-         if (block)
-            static_cast<instruction *>(cursor)->insert_before(block, inst);
-         else
-            cursor->insert_before(inst);
-
-         return inst;
-      }
-
-      /**
-       * Select \p src0 if the comparison of both sources with the given
-       * conditional mod evaluates to true, otherwise select \p src1.
-       *
-       * Generally useful to get the minimum or maximum of two values.
-       */
-      instruction *
-      emit_minmax(const dst_reg &dst, const src_reg &src0,
-                  const src_reg &src1, brw_conditional_mod mod) const
-      {
-         assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
-
-         return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
-                                     fix_unsigned_negate(src1)));
-      }
-
-      /**
-       * Copy any live channel from \p src to the first channel of the result.
-       */
-      src_reg
-      emit_uniformize(const src_reg &src) const
-      {
-         const vec4_builder ubld = exec_all();
-         const dst_reg chan_index =
-            writemask(vgrf(BRW_REGISTER_TYPE_UD), WRITEMASK_X);
-         const dst_reg dst = vgrf(src.type);
-
-         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
-         ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index));
-
-         return src_reg(dst);
-      }
-
-      /**
-       * Assorted arithmetic ops.
-       * @{
-       */
-#define ALU1(op)                                        \
-      instruction *                                     \
-      op(const dst_reg &dst, const src_reg &src0) const \
-      {                                                 \
-         return emit(BRW_OPCODE_##op, dst, src0);       \
-      }
-
-#define ALU2(op)                                                        \
-      instruction *                                                     \
-      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
-      {                                                                 \
-         return emit(BRW_OPCODE_##op, dst, src0, src1);                 \
-      }
-
-#define ALU2_ACC(op)                                                    \
-      instruction *                                                     \
-      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
-      {                                                                 \
-         instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1);    \
-         inst->writes_accumulator = true;                               \
-         return inst;                                                   \
-      }
-
-#define ALU3(op)                                                        \
-      instruction *                                                     \
-      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
-         const src_reg &src2) const                                     \
-      {                                                                 \
-         return emit(BRW_OPCODE_##op, dst, src0, src1, src2);           \
-      }
-
-      ALU2(ADD)
-      ALU2_ACC(ADDC)
-      ALU2(AND)
-      ALU2(ASR)
-      ALU2(AVG)
-      ALU3(BFE)
-      ALU2(BFI1)
-      ALU3(BFI2)
-      ALU1(BFREV)
-      ALU1(CBIT)
-      ALU3(CSEL)
-      ALU1(DIM)
-      ALU2(DP2)
-      ALU2(DP3)
-      ALU2(DP4)
-      ALU2(DPH)
-      ALU1(F16TO32)
-      ALU1(F32TO16)
-      ALU1(FBH)
-      ALU1(FBL)
-      ALU1(FRC)
-      ALU2(LINE)
-      ALU1(LZD)
-      ALU2(MAC)
-      ALU2_ACC(MACH)
-      ALU3(MAD)
-      ALU1(MOV)
-      ALU2(MUL)
-      ALU1(NOT)
-      ALU2(OR)
-      ALU2(PLN)
-      ALU1(RNDD)
-      ALU1(RNDE)
-      ALU1(RNDU)
-      ALU1(RNDZ)
-      ALU2(SAD2)
-      ALU2_ACC(SADA2)
-      ALU2(SEL)
-      ALU2(SHL)
-      ALU2(SHR)
-      ALU2_ACC(SUBB)
-      ALU2(XOR)
-
-#undef ALU3
-#undef ALU2_ACC
-#undef ALU2
-#undef ALU1
-      /** @} */
-
-      /**
-       * CMP: Sets the low bit of the destination channels with the result
-       * of the comparison, while the upper bits are undefined, and updates
-       * the flag register with the packed 16 bits of the result.
-       */
-      instruction *
-      CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
-          brw_conditional_mod condition) const
-      {
-         /* Take the instruction:
-          *
-          * CMP null<d> src0<f> src1<f>
-          *
-          * Original gfx4 does type conversion to the destination type
-          * before comparison, producing garbage results for floating
-          * point comparisons.
-          *
-          * The destination type doesn't matter on newer generations,
-          * so we set the type to match src0 so we can compact the
-          * instruction.
-          */
-         return set_condmod(condition,
-                            emit(BRW_OPCODE_CMP, retype(dst, src0.type),
-                                 fix_unsigned_negate(src0),
-                                 fix_unsigned_negate(src1)));
-      }
-
-      /**
-       * CMPN: Behaves like CMP, but produces true if src1 is NaN.
-       */
-      instruction *
-      CMPN(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
-          brw_conditional_mod condition) const
-      {
-         /* Take the instruction:
-          *
-          * CMPN null<d> src0<f> src1<f>
-          *
-          * Original gfx4 does type conversion to the destination type
-          * before comparison, producing garbage results for floating
-          * point comparisons.
-          *
-          * The destination type doesn't matter on newer generations,
-          * so we set the type to match src0 so we can compact the
-          * instruction.
-          */
-         return set_condmod(condition,
-                            emit(BRW_OPCODE_CMPN, retype(dst, src0.type),
-                                 fix_unsigned_negate(src0),
-                                 fix_unsigned_negate(src1)));
-      }
-
-      /**
-       * Gfx4 predicated IF.
-       */
-      instruction *
-      IF(brw_predicate predicate) const
-      {
-         return set_predicate(predicate, emit(BRW_OPCODE_IF));
-      }
-
-      /**
-       * Gfx6 IF with embedded comparison.
-       */
-      instruction *
-      IF(const src_reg &src0, const src_reg &src1,
-         brw_conditional_mod condition) const
-      {
-         assert(shader->devinfo->ver == 6);
-         return set_condmod(condition,
-                            emit(BRW_OPCODE_IF,
-                                 null_reg_d(),
-                                 fix_unsigned_negate(src0),
-                                 fix_unsigned_negate(src1)));
-      }
-
-      /**
-       * Emit a linear interpolation instruction.
-       */
-      instruction *
-      LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
-          const src_reg &a) const
-      {
-         /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
-          * we need to reorder the operands.
-          */
-         assert(shader->devinfo->ver >= 6 && shader->devinfo->ver <= 9);
-         return emit(BRW_OPCODE_LRP, dst, a, y, x);
-      }
-
-      backend_shader *shader;
-
-   protected:
-      /**
-       * Workaround for negation of UD registers.  See comment in
-       * fs_generator::generate_code() for the details.
-       */
-      src_reg
-      fix_unsigned_negate(const src_reg &src) const
-      {
-         if (src.type == BRW_REGISTER_TYPE_UD && src.negate) {
-            dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
-            MOV(temp, src);
-            return src_reg(temp);
-         } else {
-            return src;
-         }
-      }
-
-      /**
-       * Workaround for register access modes not supported by the ternary
-       * instruction encoding.
-       */
-      src_reg
-      fix_3src_operand(const src_reg &src) const
-      {
-         /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
-          * able to use vertical stride of zero to replicate the vec4 uniform, like
-          *
-          *    g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
-          *
-          * But you can't, since vertical stride is always four in three-source
-          * instructions. Instead, insert a MOV instruction to do the replication so
-          * that the three-source instruction can consume it.
-          */
-
-         /* The MOV is only needed if the source is a uniform or immediate. */
-         if (src.file != UNIFORM && src.file != IMM)
-            return src;
-
-         if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
-            return src;
-
-         const dst_reg expanded = vgrf(src.type);
-         emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
-         return src_reg(expanded);
-      }
-
-      /**
-       * Workaround for register access modes not supported by the math
-       * instruction.
-       */
-      src_reg
-      fix_math_operand(const src_reg &src) const
-      {
-         /* The gfx6 math instruction ignores the source modifiers --
-          * swizzle, abs, negate, and at least some parts of the register
-          * region description.
-          *
-          * Rather than trying to enumerate all these cases, *always* expand the
-          * operand to a temp GRF for gfx6.
-          *
-          * For gfx7, keep the operand as-is, except if immediate, which gfx7 still
-          * can't use.
-          */
-         if (shader->devinfo->ver == 6 ||
-             (shader->devinfo->ver == 7 && src.file == IMM)) {
-            const dst_reg tmp = vgrf(src.type);
-            MOV(tmp, src);
-            return src_reg(tmp);
-         } else {
-            return src;
-         }
-      }
-
-      /**
-       * Workaround other weirdness of the math instruction.
-       */
-      instruction *
-      fix_math_instruction(instruction *inst) const
-      {
-         if (shader->devinfo->ver == 6 &&
-             inst->dst.writemask != WRITEMASK_XYZW) {
-            const dst_reg tmp = vgrf(inst->dst.type);
-            MOV(inst->dst, src_reg(tmp));
-            inst->dst = tmp;
-
-         } else if (shader->devinfo->ver < 6) {
-            const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2);
-            inst->base_mrf = 1;
-            inst->mlen = sources;
-         }
-
-         return inst;
-      }
-
-      bblock_t *block;
-      exec_node *cursor;
-
-      unsigned _dispatch_width;
-      unsigned _group;
-      bool force_writemask_all;
-
-      /** Debug annotation info. */
-      struct {
-         const char *str;
-         const void *ir;
-      } annotation;
-   };
-}
-
-#endif
--- a/src/intel/compiler/brw_vec4_cmod_propagation.cpp
+++ b/src/intel/compiler/brw_vec4_cmod_propagation.cpp
@ -1,365 +0,0 @@
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-/** @file brw_vec4_cmod_propagation.cpp
- *
- * Really similar to brw_fs_cmod_propagation but adapted to vec4 needs. Check
- * brw_fs_cmod_propagation for further details on the rationale behind this
- * optimization.
- */
-
-#include "brw_vec4.h"
-#include "brw_cfg.h"
-#include "brw_eu.h"
-
-namespace brw {
-
-static bool
-writemasks_incompatible(const vec4_instruction *earlier,
-                        const vec4_instruction *later)
-{
-   return (earlier->dst.writemask != WRITEMASK_X &&
-           earlier->dst.writemask != WRITEMASK_XYZW) ||
-          (earlier->dst.writemask == WRITEMASK_XYZW &&
-           later->src[0].swizzle != BRW_SWIZZLE_XYZW) ||
-          (later->dst.writemask & ~earlier->dst.writemask) != 0;
-}
-
-static bool
-opt_cmod_propagation_local(bblock_t *block, vec4_visitor *v)
-{
-   bool progress = false;
-   UNUSED int ip = block->end_ip + 1;
-
-   foreach_inst_in_block_reverse_safe(vec4_instruction, inst, block) {
-      ip--;
-
-      if ((inst->opcode != BRW_OPCODE_AND &&
-           inst->opcode != BRW_OPCODE_CMP &&
-           inst->opcode != BRW_OPCODE_MOV) ||
-          inst->predicate != BRW_PREDICATE_NONE ||
-          !inst->dst.is_null() ||
-          (inst->src[0].file != VGRF && inst->src[0].file != ATTR &&
-           inst->src[0].file != UNIFORM))
-         continue;
-
-      /* An ABS source modifier can only be handled when processing a compare
-       * with a value other than zero.
-       */
-      if (inst->src[0].abs &&
-          (inst->opcode != BRW_OPCODE_CMP || inst->src[1].is_zero()))
-         continue;
-
-      if (inst->opcode == BRW_OPCODE_AND &&
-          !(inst->src[1].is_one() &&
-            inst->conditional_mod == BRW_CONDITIONAL_NZ &&
-            !inst->src[0].negate))
-         continue;
-
-      if (inst->opcode == BRW_OPCODE_MOV &&
-          inst->conditional_mod != BRW_CONDITIONAL_NZ)
-         continue;
-
-      bool read_flag = false;
-      foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst, inst) {
-         /* A CMP with a second source of zero can match with anything.  A CMP
-          * with a second source that is not zero can only match with an ADD
-          * instruction.
-          */
-         if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) {
-            bool negate;
-
-            if (scan_inst->opcode != BRW_OPCODE_ADD)
-               goto not_match;
-
-            if (writemasks_incompatible(scan_inst, inst))
-               goto not_match;
-
-            /* A CMP is basically a subtraction.  The result of the
-             * subtraction must be the same as the result of the addition.
-             * This means that one of the operands must be negated.  So (a +
-             * b) vs (a == -b) or (a + -b) vs (a == b).
-             */
-            if ((inst->src[0].equals(scan_inst->src[0]) &&
-                 inst->src[1].negative_equals(scan_inst->src[1])) ||
-                (inst->src[0].equals(scan_inst->src[1]) &&
-                 inst->src[1].negative_equals(scan_inst->src[0]))) {
-               negate = false;
-            } else if ((inst->src[0].negative_equals(scan_inst->src[0]) &&
-                        inst->src[1].equals(scan_inst->src[1])) ||
-                       (inst->src[0].negative_equals(scan_inst->src[1]) &&
-                        inst->src[1].equals(scan_inst->src[0]))) {
-               negate = true;
-            } else {
-               goto not_match;
-            }
-
-            if (scan_inst->exec_size != inst->exec_size ||
-                scan_inst->group != inst->group)
-               goto not_match;
-
-            /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods":
-             *
-             *    * Note that the [post condition signal] bits generated at
-             *      the output of a compute are before the .sat.
-             *
-             * So we don't have to bail if scan_inst has saturate.
-             */
-
-            /* Otherwise, try propagating the conditional. */
-            const enum brw_conditional_mod cond =
-               negate ? brw_swap_cmod(inst->conditional_mod)
-                      : inst->conditional_mod;
-
-            if (scan_inst->can_do_cmod() &&
-                ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
-                 scan_inst->conditional_mod == cond)) {
-               scan_inst->conditional_mod = cond;
-               inst->remove(block);
-               progress = true;
-            }
-            break;
-         }
-
-         if (regions_overlap(inst->src[0], inst->size_read(0),
-                             scan_inst->dst, scan_inst->size_written)) {
-            if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) ||
-                scan_inst->dst.offset != inst->src[0].offset ||
-                scan_inst->exec_size != inst->exec_size ||
-                scan_inst->group != inst->group) {
-               break;
-            }
-
-            /* If scan_inst is a CMP that produces a single value and inst is
-             * a CMP.NZ that consumes only that value, remove inst.
-             */
-            if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
-                (inst->src[0].type == BRW_REGISTER_TYPE_D ||
-                 inst->src[0].type == BRW_REGISTER_TYPE_UD) &&
-                (inst->opcode == BRW_OPCODE_CMP ||
-                 inst->opcode == BRW_OPCODE_MOV) &&
-                scan_inst->opcode == BRW_OPCODE_CMP &&
-                ((inst->src[0].swizzle == BRW_SWIZZLE_XXXX &&
-                  scan_inst->dst.writemask == WRITEMASK_X) ||
-                 (inst->src[0].swizzle == BRW_SWIZZLE_YYYY &&
-                  scan_inst->dst.writemask == WRITEMASK_Y) ||
-                 (inst->src[0].swizzle == BRW_SWIZZLE_ZZZZ &&
-                  scan_inst->dst.writemask == WRITEMASK_Z) ||
-                 (inst->src[0].swizzle == BRW_SWIZZLE_WWWW &&
-                  scan_inst->dst.writemask == WRITEMASK_W))) {
-               if (inst->dst.writemask != scan_inst->dst.writemask) {
-                  src_reg temp(v, glsl_vec4_type(), 1);
-
-                  /* Given a sequence like:
-                   *
-                   *    cmp.ge.f0(8)  g21<1>.zF      g20<4>.xF      g18<4>.xF
-                   *    ...
-                   *    cmp.nz.f0(8)  null<1>D       g21<4>.zD      0D
-                   *
-                   * Replace it with something like:
-                   *
-                   *    cmp.ge.f0(8)  g22<1>.zF      g20<4>.xF      g18<4>.xF
-                   *    mov(8)        g21<1>.xF      g22<1>.zzzzF
-                   *
-                   * The added MOV will most likely be removed later.  In the
-                   * worst case, it should be cheaper to schedule.
-                   */
-                  temp.swizzle = brw_swizzle_for_mask(inst->dst.writemask);
-                  temp.type = scan_inst->src[0].type;
-
-                  vec4_instruction *mov = v->MOV(scan_inst->dst, temp);
-
-                  /* Modify the source swizzles on scan_inst.  If scan_inst
-                   * was
-                   *
-                   *    cmp.ge.f0(8)  g21<1>.zF      g20<4>.wzyxF   g18<4>.yxwzF
-                   *
-                   * replace it with
-                   *
-                   *    cmp.ge.f0(8)  g21<1>.zF      g20<4>.yyyyF   g18<4>.wwwwF
-                   */
-                  unsigned src0_chan;
-                  unsigned src1_chan;
-                  switch (scan_inst->dst.writemask) {
-                  case WRITEMASK_X:
-                     src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 0);
-                     src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 0);
-                     break;
-                  case WRITEMASK_Y:
-                     src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 1);
-                     src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 1);
-                     break;
-                  case WRITEMASK_Z:
-                     src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 2);
-                     src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 2);
-                     break;
-                  case WRITEMASK_W:
-                     src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 3);
-                     src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 3);
-                     break;
-                  default:
-                     unreachable("Impossible writemask");
-                  }
-
-                  scan_inst->src[0].swizzle = BRW_SWIZZLE4(src0_chan,
-                                                           src0_chan,
-                                                           src0_chan,
-                                                           src0_chan);
-
-                  /* There's no swizzle on immediate value sources. */
-                  if (scan_inst->src[1].file != IMM) {
-                     scan_inst->src[1].swizzle = BRW_SWIZZLE4(src1_chan,
-                                                              src1_chan,
-                                                              src1_chan,
-                                                              src1_chan);
-                  }
-
-                  scan_inst->dst = dst_reg(temp);
-                  scan_inst->dst.writemask = inst->dst.writemask;
-
-                  scan_inst->insert_after(block, mov);
-               }
-
-               inst->remove(block);
-               progress = true;
-               break;
-            }
-
-            if (writemasks_incompatible(scan_inst, inst))
-               break;
-
-            /* CMP's result is the same regardless of dest type. */
-            if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
-                scan_inst->opcode == BRW_OPCODE_CMP &&
-                (inst->dst.type == BRW_REGISTER_TYPE_D ||
-                 inst->dst.type == BRW_REGISTER_TYPE_UD)) {
-               inst->remove(block);
-               progress = true;
-               break;
-            }
-
-            /* If the AND wasn't handled by the previous case, it isn't safe
-             * to remove it.
-             */
-            if (inst->opcode == BRW_OPCODE_AND)
-               break;
-
-            /* Comparisons operate differently for ints and floats */
-            if (scan_inst->dst.type != inst->dst.type &&
-                (scan_inst->dst.type == BRW_REGISTER_TYPE_F ||
-                 inst->dst.type == BRW_REGISTER_TYPE_F))
-               break;
-
-            /* If the instruction generating inst's source also wrote the
-             * flag, and inst is doing a simple .nz comparison, then inst
-             * is redundant - the appropriate value is already in the flag
-             * register.  Delete inst.
-             */
-            if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
-                !inst->src[0].negate &&
-                scan_inst->writes_flag(v->devinfo)) {
-               inst->remove(block);
-               progress = true;
-               break;
-            }
-
-            /* The conditional mod of the CMP/CMPN instructions behaves
-             * specially because the flag output is not calculated from the
-             * result of the instruction, but the other way around, which
-             * means that even if the condmod to propagate and the condmod
-             * from the CMP instruction are the same they will in general give
-             * different results because they are evaluated based on different
-             * inputs.
-             */
-            if (scan_inst->opcode == BRW_OPCODE_CMP ||
-                scan_inst->opcode == BRW_OPCODE_CMPN)
-               break;
-
-            /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods":
-             *
-             *    * Note that the [post condition signal] bits generated at
-             *      the output of a compute are before the .sat.
-             */
-            if (scan_inst->saturate)
-               break;
-
-            /* From the Sky Lake PRM, Vol 2a, "Multiply":
-             *
-             *    "When multiplying integer data types, if one of the sources
-             *    is a DW, the resulting full precision data is stored in
-             *    the accumulator. However, if the destination data type is
-             *    either W or DW, the low bits of the result are written to
-             *    the destination register and the remaining high bits are
-             *    discarded. This results in undefined Overflow and Sign
-             *    flags. Therefore, conditional modifiers and saturation
-             *    (.sat) cannot be used in this case.
-             *
-             * We just disallow cmod propagation on all integer multiplies.
-             */
-            if (!brw_reg_type_is_floating_point(scan_inst->dst.type) &&
-                scan_inst->opcode == BRW_OPCODE_MUL)
-               break;
-
-            /* Otherwise, try propagating the conditional. */
-            enum brw_conditional_mod cond =
-               inst->src[0].negate ? brw_swap_cmod(inst->conditional_mod)
-                                   : inst->conditional_mod;
-
-            if (scan_inst->can_do_cmod() &&
-                ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
-                 scan_inst->conditional_mod == cond)) {
-               scan_inst->conditional_mod = cond;
-               inst->remove(block);
-               progress = true;
-            }
-            break;
-         }
-
-      not_match:
-         if (scan_inst->writes_flag(v->devinfo))
-            break;
-
-         read_flag = read_flag || scan_inst->reads_flag();
-      }
-   }
-
-   return progress;
-}
-
-bool
-vec4_visitor::opt_cmod_propagation()
-{
-   bool progress = false;
-
-   foreach_block_reverse(block, cfg) {
-      progress = opt_cmod_propagation_local(block, this) || progress;
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
-
-   return progress;
-}
-
-} /* namespace brw */
--- a/src/intel/compiler/brw_vec4_copy_propagation.cpp
+++ b/src/intel/compiler/brw_vec4_copy_propagation.cpp
@ -1,556 +0,0 @@
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/**
- * @file brw_vec4_copy_propagation.cpp
- *
- * Implements tracking of values copied between registers, and
- * optimizations based on that: copy propagation and constant
- * propagation.
- */
-
-#include "brw_vec4.h"
-#include "brw_cfg.h"
-#include "brw_eu.h"
-
-namespace brw {
-
-struct copy_entry {
-   src_reg *value[4];
-   int saturatemask;
-};
-
-static bool
-is_direct_copy(vec4_instruction *inst)
-{
-   return (inst->opcode == BRW_OPCODE_MOV &&
-	   !inst->predicate &&
-	   inst->dst.file == VGRF &&
-	   inst->dst.offset % REG_SIZE == 0 &&
-	   !inst->dst.reladdr &&
-	   !inst->src[0].reladdr &&
-	   (inst->dst.type == inst->src[0].type ||
-            (inst->dst.type == BRW_REGISTER_TYPE_F &&
-             inst->src[0].type == BRW_REGISTER_TYPE_VF)));
-}
-
-static bool
-is_dominated_by_previous_instruction(vec4_instruction *inst)
-{
-   return (inst->opcode != BRW_OPCODE_DO &&
-	   inst->opcode != BRW_OPCODE_WHILE &&
-	   inst->opcode != BRW_OPCODE_ELSE &&
-	   inst->opcode != BRW_OPCODE_ENDIF);
-}
-
-static bool
-is_channel_updated(vec4_instruction *inst, src_reg *values[4], int ch)
-{
-   const src_reg *src = values[ch];
-
-   /* consider GRF only */
-   assert(inst->dst.file == VGRF);
-   if (!src || src->file != VGRF)
-      return false;
-
-   return regions_overlap(*src, REG_SIZE, inst->dst, inst->size_written) &&
-          (inst->dst.offset != src->offset ||
-           inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, ch)));
-}
-
-/**
- * Get the origin of a copy as a single register if all components present in
- * the given readmask originate from the same register and have compatible
- * regions, otherwise return a BAD_FILE register.
- */
-static src_reg
-get_copy_value(const copy_entry &entry, unsigned readmask)
-{
-   unsigned swz[4] = {};
-   src_reg value;
-
-   for (unsigned i = 0; i < 4; i++) {
-      if (readmask & (1 << i)) {
-         if (entry.value[i]) {
-            src_reg src = *entry.value[i];
-
-            if (src.file == IMM) {
-               swz[i] = i;
-            } else {
-               swz[i] = BRW_GET_SWZ(src.swizzle, i);
-               /* Overwrite the original swizzle so the src_reg::equals call
-                * below doesn't care about it, the correct swizzle will be
-                * calculated once the swizzles of all components are known.
-                */
-               src.swizzle = BRW_SWIZZLE_XYZW;
-            }
-
-            if (value.file == BAD_FILE) {
-               value = src;
-            } else if (!value.equals(src)) {
-               return src_reg();
-            }
-         } else {
-            return src_reg();
-         }
-      }
-   }
-
-   return swizzle(value,
-                  brw_compose_swizzle(brw_swizzle_for_mask(readmask),
-                                      BRW_SWIZZLE4(swz[0], swz[1],
-                                                   swz[2], swz[3])));
-}
-
-static bool
-try_constant_propagate(vec4_instruction *inst,
-                       int arg, const copy_entry *entry)
-{
-   /* For constant propagation, we only handle the same constant
-    * across all 4 channels.  Some day, we should handle the 8-bit
-    * float vector format, which would let us constant propagate
-    * vectors better.
-    * We could be more aggressive here -- some channels might not get used
-    * based on the destination writemask.
-    */
-   src_reg value =
-      get_copy_value(*entry,
-                     brw_apply_inv_swizzle_to_mask(inst->src[arg].swizzle,
-                                                   WRITEMASK_XYZW));
-
-   if (value.file != IMM)
-      return false;
-
-   /* 64-bit types can't be used except for one-source instructions, which
-    * higher levels should have constant folded away, so there's no point in
-    * propagating immediates here.
-    */
-   if (type_sz(value.type) == 8 || type_sz(inst->src[arg].type) == 8)
-      return false;
-
-   if (value.type == BRW_REGISTER_TYPE_VF) {
-      /* The result of bit-casting the component values of a vector float
-       * cannot in general be represented as an immediate.
-       */
-      if (inst->src[arg].type != BRW_REGISTER_TYPE_F)
-         return false;
-   } else {
-      value.type = inst->src[arg].type;
-   }
-
-   if (inst->src[arg].abs) {
-      if (!brw_abs_immediate(value.type, &value.as_brw_reg()))
-         return false;
-   }
-
-   if (inst->src[arg].negate) {
-      if (!brw_negate_immediate(value.type, &value.as_brw_reg()))
-         return false;
-   }
-
-   value = swizzle(value, inst->src[arg].swizzle);
-
-   switch (inst->opcode) {
-   case BRW_OPCODE_MOV:
-   case SHADER_OPCODE_BROADCAST:
-      inst->src[arg] = value;
-      return true;
-
-   case VEC4_OPCODE_UNTYPED_ATOMIC:
-      if (arg == 1) {
-         inst->src[arg] = value;
-         return true;
-      }
-      break;
-
-   case SHADER_OPCODE_POW:
-   case SHADER_OPCODE_INT_QUOTIENT:
-   case SHADER_OPCODE_INT_REMAINDER:
-         break;
-   case BRW_OPCODE_DP2:
-   case BRW_OPCODE_DP3:
-   case BRW_OPCODE_DP4:
-   case BRW_OPCODE_DPH:
-   case BRW_OPCODE_BFI1:
-   case BRW_OPCODE_ASR:
-   case BRW_OPCODE_SHL:
-   case BRW_OPCODE_SHR:
-   case BRW_OPCODE_SUBB:
-      if (arg == 1) {
-         inst->src[arg] = value;
-         return true;
-      }
-      break;
-
-   case BRW_OPCODE_MACH:
-   case BRW_OPCODE_MUL:
-   case SHADER_OPCODE_MULH:
-   case BRW_OPCODE_ADD:
-   case BRW_OPCODE_OR:
-   case BRW_OPCODE_AND:
-   case BRW_OPCODE_XOR:
-   case BRW_OPCODE_ADDC:
-      if (arg == 1) {
-	 inst->src[arg] = value;
-	 return true;
-      } else if (arg == 0 && inst->src[1].file != IMM) {
-	 /* Fit this constant in by commuting the operands.  Exception: we
-	  * can't do this for 32-bit integer MUL/MACH because it's asymmetric.
-	  */
-	 if ((inst->opcode == BRW_OPCODE_MUL ||
-              inst->opcode == BRW_OPCODE_MACH) &&
-	     (inst->src[1].type == BRW_REGISTER_TYPE_D ||
-	      inst->src[1].type == BRW_REGISTER_TYPE_UD))
-	    break;
-	 inst->src[0] = inst->src[1];
-	 inst->src[1] = value;
-	 return true;
-      }
-      break;
-   case GS_OPCODE_SET_WRITE_OFFSET:
-      /* This is just a multiply by a constant with special strides.
-       * The generator will handle immediates in both arguments (generating
-       * a single MOV of the product).  So feel free to propagate in src0.
-       */
-      inst->src[arg] = value;
-      return true;
-
-   case BRW_OPCODE_CMP:
-      if (arg == 1) {
-	 inst->src[arg] = value;
-	 return true;
-      } else if (arg == 0 && inst->src[1].file != IMM) {
-	 enum brw_conditional_mod new_cmod;
-
-	 new_cmod = brw_swap_cmod(inst->conditional_mod);
-	 if (new_cmod != BRW_CONDITIONAL_NONE) {
-	    /* Fit this constant in by swapping the operands and
-	     * flipping the test.
-	     */
-	    inst->src[0] = inst->src[1];
-	    inst->src[1] = value;
-	    inst->conditional_mod = new_cmod;
-	    return true;
-	 }
-      }
-      break;
-
-   case BRW_OPCODE_SEL:
-      if (arg == 1) {
-	 inst->src[arg] = value;
-	 return true;
-      } else if (arg == 0 && inst->src[1].file != IMM) {
-	 inst->src[0] = inst->src[1];
-	 inst->src[1] = value;
-
-	 /* If this was predicated, flipping operands means
-	  * we also need to flip the predicate.
-	  */
-	 if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
-	    inst->predicate_inverse = !inst->predicate_inverse;
-	 }
-	 return true;
-      }
-      break;
-
-   default:
-      break;
-   }
-
-   return false;
-}
-
-static bool
-is_align1_opcode(unsigned opcode)
-{
-   switch (opcode) {
-   case VEC4_OPCODE_DOUBLE_TO_F32:
-   case VEC4_OPCODE_DOUBLE_TO_D32:
-   case VEC4_OPCODE_DOUBLE_TO_U32:
-   case VEC4_OPCODE_TO_DOUBLE:
-   case VEC4_OPCODE_PICK_LOW_32BIT:
-   case VEC4_OPCODE_PICK_HIGH_32BIT:
-   case VEC4_OPCODE_SET_LOW_32BIT:
-   case VEC4_OPCODE_SET_HIGH_32BIT:
-      return true;
-   default:
-      return false;
-   }
-}
-
-static bool
-try_copy_propagate(const struct brw_compiler *compiler,
-                   vec4_instruction *inst, int arg,
-                   const copy_entry *entry, int attributes_per_reg)
-{
-   const struct intel_device_info *devinfo = compiler->devinfo;
-
-   /* Build up the value we are propagating as if it were the source of a
-    * single MOV
-    */
-   src_reg value =
-      get_copy_value(*entry,
-                     brw_apply_inv_swizzle_to_mask(inst->src[arg].swizzle,
-                                                   WRITEMASK_XYZW));
-
-   /* Check that we can propagate that value */
-   if (value.file != UNIFORM &&
-       value.file != VGRF &&
-       value.file != ATTR)
-      return false;
-
-   /* Instructions that write 2 registers also need to read 2 registers. Make
-    * sure we don't break that restriction by copy propagating from a uniform.
-    */
-   if (inst->size_written > REG_SIZE && is_uniform(value))
-      return false;
-
-   /* There is a regioning restriction such that if execsize == width
-    * and hstride != 0 then the vstride can't be 0. When we split instrutions
-    * that take a single-precision source (like F->DF conversions) we end up
-    * with a 4-wide source on an instruction with an execution size of 4.
-    * If we then copy-propagate the source from a uniform we also end up with a
-    * vstride of 0 and we violate the restriction.
-    */
-   if (inst->exec_size == 4 && value.file == UNIFORM &&
-       type_sz(value.type) == 4)
-      return false;
-
-   /* If the type of the copy value is different from the type of the
-    * instruction then the swizzles and writemasks involved don't have the same
-    * meaning and simply replacing the source would produce different semantics.
-    */
-   if (type_sz(value.type) != type_sz(inst->src[arg].type))
-      return false;
-
-   if (inst->src[arg].offset % REG_SIZE || value.offset % REG_SIZE)
-      return false;
-
-   bool has_source_modifiers = value.negate || value.abs;
-
-   /* gfx6 math and gfx7+ SENDs from GRFs ignore source modifiers on
-    * instructions.
-    */
-   if (has_source_modifiers && !inst->can_do_source_mods(devinfo))
-      return false;
-
-   /* Reject cases that would violate register regioning restrictions. */
-   if ((value.file == UNIFORM || value.swizzle != BRW_SWIZZLE_XYZW) &&
-       ((devinfo->ver == 6 && inst->is_math()) ||
-        inst->is_send_from_grf() ||
-        inst->uses_indirect_addressing())) {
-      return false;
-   }
-
-   if (has_source_modifiers &&
-       value.type != inst->src[arg].type &&
-       !inst->can_change_types())
-      return false;
-
-   if (has_source_modifiers &&
-       (inst->opcode == SHADER_OPCODE_GFX4_SCRATCH_WRITE ||
-        inst->opcode == VEC4_OPCODE_PICK_HIGH_32BIT))
-      return false;
-
-   unsigned composed_swizzle = brw_compose_swizzle(inst->src[arg].swizzle,
-                                                   value.swizzle);
-
-   /* Instructions that operate on vectors in ALIGN1 mode will ignore swizzles
-    * so copy-propagation won't be safe if the composed swizzle is anything
-    * other than the identity.
-    */
-   if (is_align1_opcode(inst->opcode) && composed_swizzle != BRW_SWIZZLE_XYZW)
-      return false;
-
-   if (inst->is_3src(compiler) &&
-       (value.file == UNIFORM ||
-        (value.file == ATTR && attributes_per_reg != 1)) &&
-       !brw_is_single_value_swizzle(composed_swizzle))
-      return false;
-
-   if (inst->is_send_from_grf())
-      return false;
-
-   /* we can't generally copy-propagate UD negations because we
-    * end up accessing the resulting values as signed integers
-    * instead. See also resolve_ud_negate().
-    */
-   if (value.negate &&
-       value.type == BRW_REGISTER_TYPE_UD)
-      return false;
-
-   /* Don't report progress if this is a noop. */
-   if (value.equals(inst->src[arg]))
-      return false;
-
-   const unsigned dst_saturate_mask = inst->dst.writemask &
-      brw_apply_swizzle_to_mask(inst->src[arg].swizzle, entry->saturatemask);
-
-   if (dst_saturate_mask) {
-      /* We either saturate all or nothing. */
-      if (dst_saturate_mask != inst->dst.writemask)
-         return false;
-
-      /* Limit saturate propagation only to SEL with src1 bounded within 0.0
-       * and 1.0, otherwise skip copy propagate altogether.
-       */
-      switch(inst->opcode) {
-      case BRW_OPCODE_SEL:
-         if (arg != 0 ||
-             inst->src[0].type != BRW_REGISTER_TYPE_F ||
-             inst->src[1].file != IMM ||
-             inst->src[1].type != BRW_REGISTER_TYPE_F ||
-             inst->src[1].f < 0.0 ||
-             inst->src[1].f > 1.0) {
-            return false;
-         }
-         if (!inst->saturate)
-            inst->saturate = true;
-         break;
-      default:
-         return false;
-      }
-   }
-
-   /* Build the final value */
-   if (inst->src[arg].abs) {
-      value.negate = false;
-      value.abs = true;
-   }
-   if (inst->src[arg].negate)
-      value.negate = !value.negate;
-
-   value.swizzle = composed_swizzle;
-   if (has_source_modifiers &&
-       value.type != inst->src[arg].type) {
-      assert(inst->can_change_types());
-      for (int i = 0; i < 3; i++) {
-         inst->src[i].type = value.type;
-      }
-      inst->dst.type = value.type;
-   } else {
-      value.type = inst->src[arg].type;
-   }
-
-   inst->src[arg] = value;
-   return true;
-}
-
-bool
-vec4_visitor::opt_copy_propagation(bool do_constant_prop)
-{
-   /* If we are in dual instanced or single mode, then attributes are going
-    * to be interleaved, so one register contains two attribute slots.
-    */
-   const int attributes_per_reg =
-      prog_data->dispatch_mode == INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
-   bool progress = false;
-   struct copy_entry entries[alloc.total_size];
-
-   memset(&entries, 0, sizeof(entries));
-
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      /* This pass only works on basic blocks.  If there's flow
-       * control, throw out all our information and start from
-       * scratch.
-       *
-       * This should really be fixed by using a structure like in
-       * src/glsl/opt_copy_propagation.cpp to track available copies.
-       */
-      if (!is_dominated_by_previous_instruction(inst)) {
-	 memset(&entries, 0, sizeof(entries));
-	 continue;
-      }
-
-      /* For each source arg, see if each component comes from a copy
-       * from the same type file (IMM, VGRF, UNIFORM), and try
-       * optimizing out access to the copy result
-       */
-      for (int i = 2; i >= 0; i--) {
-	 /* Copied values end up in GRFs, and we don't track reladdr
-	  * accesses.
-	  */
-	 if (inst->src[i].file != VGRF ||
-	     inst->src[i].reladdr)
-	    continue;
-
-         /* We only handle register-aligned single GRF copies. */
-         if (inst->size_read(i) != REG_SIZE ||
-             inst->src[i].offset % REG_SIZE)
-            continue;
-
-         const unsigned reg = (alloc.offsets[inst->src[i].nr] +
-                               inst->src[i].offset / REG_SIZE);
-         const copy_entry &entry = entries[reg];
-
-         if (do_constant_prop && try_constant_propagate(inst, i, &entry))
-            progress = true;
-         else if (try_copy_propagate(compiler, inst, i, &entry, attributes_per_reg))
-	    progress = true;
-      }
-
-      /* Track available source registers. */
-      if (inst->dst.file == VGRF) {
-	 const int reg =
-            alloc.offsets[inst->dst.nr] + inst->dst.offset / REG_SIZE;
-
-	 /* Update our destination's current channel values.  For a direct copy,
-	  * the value is the newly propagated source.  Otherwise, we don't know
-	  * the new value, so clear it.
-	  */
-	 bool direct_copy = is_direct_copy(inst);
-         entries[reg].saturatemask &= ~inst->dst.writemask;
-	 for (int i = 0; i < 4; i++) {
-	    if (inst->dst.writemask & (1 << i)) {
-               entries[reg].value[i] = direct_copy ? &inst->src[0] : NULL;
-               entries[reg].saturatemask |=
-                  inst->saturate && direct_copy ? 1 << i : 0;
-            }
-	 }
-
-	 /* Clear the records for any registers whose current value came from
-	  * our destination's updated channels, as the two are no longer equal.
-	  */
-	 if (inst->dst.reladdr)
-	    memset(&entries, 0, sizeof(entries));
-	 else {
-	    for (unsigned i = 0; i < alloc.total_size; i++) {
-	       for (int j = 0; j < 4; j++) {
-		  if (is_channel_updated(inst, entries[i].value, j)) {
-		     entries[i].value[j] = NULL;
-		     entries[i].saturatemask &= ~(1 << j);
-                  }
-	       }
-	    }
-	 }
-      }
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |
-                          DEPENDENCY_INSTRUCTION_DETAIL);
-
-   return progress;
-}
-
-} /* namespace brw */
--- a/src/intel/compiler/brw_vec4_cse.cpp
+++ b/src/intel/compiler/brw_vec4_cse.cpp
@ -1,322 +0,0 @@
-/*
- * Copyright © 2012, 2013, 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_vec4.h"
-#include "brw_vec4_live_variables.h"
-#include "brw_cfg.h"
-
-using namespace brw;
-
-/** @file brw_vec4_cse.cpp
- *
- * Support for local common subexpression elimination.
- *
- * See Muchnick's Advanced Compiler Design and Implementation, section
- * 13.1 (p378).
- */
-
-namespace {
-struct aeb_entry : public exec_node {
-   /** The instruction that generates the expression value. */
-   vec4_instruction *generator;
-
-   /** The temporary where the value is stored. */
-   src_reg tmp;
-};
-}
-
-static bool
-is_expression(const vec4_instruction *const inst)
-{
-   switch (inst->opcode) {
-   case BRW_OPCODE_MOV:
-   case BRW_OPCODE_SEL:
-   case BRW_OPCODE_NOT:
-   case BRW_OPCODE_AND:
-   case BRW_OPCODE_OR:
-   case BRW_OPCODE_XOR:
-   case BRW_OPCODE_SHR:
-   case BRW_OPCODE_SHL:
-   case BRW_OPCODE_ASR:
-   case BRW_OPCODE_CMP:
-   case BRW_OPCODE_CMPN:
-   case BRW_OPCODE_ADD:
-   case BRW_OPCODE_MUL:
-   case SHADER_OPCODE_MULH:
-   case BRW_OPCODE_FRC:
-   case BRW_OPCODE_RNDU:
-   case BRW_OPCODE_RNDD:
-   case BRW_OPCODE_RNDE:
-   case BRW_OPCODE_RNDZ:
-   case BRW_OPCODE_LINE:
-   case BRW_OPCODE_PLN:
-   case BRW_OPCODE_MAD:
-   case BRW_OPCODE_LRP:
-   case VEC4_OPCODE_UNPACK_UNIFORM:
-   case SHADER_OPCODE_FIND_LIVE_CHANNEL:
-   case SHADER_OPCODE_BROADCAST:
-   case VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS:
-   case VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
-      return true;
-   case SHADER_OPCODE_RCP:
-   case SHADER_OPCODE_RSQ:
-   case SHADER_OPCODE_SQRT:
-   case SHADER_OPCODE_EXP2:
-   case SHADER_OPCODE_LOG2:
-   case SHADER_OPCODE_POW:
-   case SHADER_OPCODE_INT_QUOTIENT:
-   case SHADER_OPCODE_INT_REMAINDER:
-   case SHADER_OPCODE_SIN:
-   case SHADER_OPCODE_COS:
-      return inst->mlen == 0;
-   default:
-      return false;
-   }
-}
-
-static bool
-operands_match(const vec4_instruction *a, const vec4_instruction *b)
-{
-   const src_reg *xs = a->src;
-   const src_reg *ys = b->src;
-
-   if (a->opcode == BRW_OPCODE_MAD) {
-      return xs[0].equals(ys[0]) &&
-             ((xs[1].equals(ys[1]) && xs[2].equals(ys[2])) ||
-              (xs[2].equals(ys[1]) && xs[1].equals(ys[2])));
-   } else if (a->opcode == BRW_OPCODE_MOV &&
-              xs[0].file == IMM &&
-              xs[0].type == BRW_REGISTER_TYPE_VF) {
-      src_reg tmp_x = xs[0];
-      src_reg tmp_y = ys[0];
-
-      /* Smash out the values that are not part of the writemask.  Otherwise
-       * the equals operator will fail due to mismatches in unused components.
-       */
-      const unsigned ab_writemask = a->dst.writemask & b->dst.writemask;
-      const uint32_t mask = ((ab_writemask & WRITEMASK_X) ? 0x000000ff : 0) |
-                            ((ab_writemask & WRITEMASK_Y) ? 0x0000ff00 : 0) |
-                            ((ab_writemask & WRITEMASK_Z) ? 0x00ff0000 : 0) |
-                            ((ab_writemask & WRITEMASK_W) ? 0xff000000 : 0);
-
-      tmp_x.ud &= mask;
-      tmp_y.ud &= mask;
-
-      return tmp_x.equals(tmp_y);
-   } else if (!a->is_commutative()) {
-      return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]);
-   } else {
-      return (xs[0].equals(ys[0]) && xs[1].equals(ys[1])) ||
-             (xs[1].equals(ys[0]) && xs[0].equals(ys[1]));
-   }
-}
-
-/**
- * Checks if instructions match, exactly for sources, but loosely for
- * destination writemasks.
- *
- * \param 'a' is the generating expression from the AEB entry.
- * \param 'b' is the second occurrence of the expression that we're
- *        considering eliminating.
- */
-static bool
-instructions_match(vec4_instruction *a, vec4_instruction *b)
-{
-   return a->opcode == b->opcode &&
-          a->saturate == b->saturate &&
-          a->predicate == b->predicate &&
-          a->predicate_inverse == b->predicate_inverse &&
-          a->conditional_mod == b->conditional_mod &&
-          a->flag_subreg == b->flag_subreg &&
-          a->dst.type == b->dst.type &&
-          a->offset == b->offset &&
-          a->mlen == b->mlen &&
-          a->base_mrf == b->base_mrf &&
-          a->header_size == b->header_size &&
-          a->shadow_compare == b->shadow_compare &&
-          ((a->dst.writemask & b->dst.writemask) == a->dst.writemask) &&
-          a->force_writemask_all == b->force_writemask_all &&
-          a->size_written == b->size_written &&
-          a->exec_size == b->exec_size &&
-          a->group == b->group &&
-          operands_match(a, b);
-}
-
-bool
-vec4_visitor::opt_cse_local(bblock_t *block, const vec4_live_variables &live)
-{
-   bool progress = false;
-   exec_list aeb;
-
-   void *cse_ctx = ralloc_context(NULL);
-
-   int ip = block->start_ip;
-   foreach_inst_in_block (vec4_instruction, inst, block) {
-      /* Skip some cases. */
-      if (is_expression(inst) && !inst->predicate && inst->mlen == 0 &&
-          ((inst->dst.file != ARF && inst->dst.file != FIXED_GRF) ||
-           inst->dst.is_null()))
-      {
-         bool found = false;
-
-         foreach_in_list_use_after(aeb_entry, entry, &aeb) {
-            /* Match current instruction's expression against those in AEB. */
-            if (!(entry->generator->dst.is_null() && !inst->dst.is_null()) &&
-                instructions_match(inst, entry->generator)) {
-               found = true;
-               progress = true;
-               break;
-            }
-         }
-
-         if (!found) {
-            if (inst->opcode != BRW_OPCODE_MOV ||
-                (inst->opcode == BRW_OPCODE_MOV &&
-                 inst->src[0].file == IMM &&
-                 inst->src[0].type == BRW_REGISTER_TYPE_VF)) {
-               /* Our first sighting of this expression.  Create an entry. */
-               aeb_entry *entry = ralloc(cse_ctx, aeb_entry);
-               entry->tmp = src_reg(); /* file will be BAD_FILE */
-               entry->generator = inst;
-               aeb.push_tail(entry);
-            }
-         } else {
-            /* This is at least our second sighting of this expression.
-             * If we don't have a temporary already, make one.
-             */
-            bool no_existing_temp = entry->tmp.file == BAD_FILE;
-            if (no_existing_temp && !entry->generator->dst.is_null()) {
-               entry->tmp = retype(src_reg(VGRF, alloc.allocate(
-                                              regs_written(entry->generator)),
-                                           NULL), inst->dst.type);
-
-               const unsigned width = entry->generator->exec_size;
-               unsigned component_size = width * type_sz(entry->tmp.type);
-               unsigned num_copy_movs =
-                  DIV_ROUND_UP(entry->generator->size_written, component_size);
-               for (unsigned i = 0; i < num_copy_movs; ++i) {
-                  vec4_instruction *copy =
-                     MOV(offset(entry->generator->dst, width, i),
-                         offset(entry->tmp, width, i));
-                  copy->exec_size = width;
-                  copy->group = entry->generator->group;
-                  copy->force_writemask_all =
-                     entry->generator->force_writemask_all;
-                  entry->generator->insert_after(block, copy);
-               }
-
-               entry->generator->dst = dst_reg(entry->tmp);
-            }
-
-            /* dest <- temp */
-            if (!inst->dst.is_null()) {
-               assert(inst->dst.type == entry->tmp.type);
-               const unsigned width = inst->exec_size;
-               unsigned component_size = width * type_sz(inst->dst.type);
-               unsigned num_copy_movs =
-                  DIV_ROUND_UP(inst->size_written, component_size);
-               for (unsigned i = 0; i < num_copy_movs; ++i) {
-                  vec4_instruction *copy =
-                     MOV(offset(inst->dst, width, i),
-                         offset(entry->tmp, width, i));
-                  copy->exec_size = inst->exec_size;
-                  copy->group = inst->group;
-                  copy->force_writemask_all = inst->force_writemask_all;
-                  inst->insert_before(block, copy);
-               }
-            }
-
-            /* Set our iterator so that next time through the loop inst->next
-             * will get the instruction in the basic block after the one we've
-             * removed.
-             */
-            vec4_instruction *prev = (vec4_instruction *)inst->prev;
-
-            inst->remove(block);
-            inst = prev;
-         }
-      }
-
-      foreach_in_list_safe(aeb_entry, entry, &aeb) {
-         /* Kill all AEB entries that write a different value to or read from
-          * the flag register if we just wrote it.
-          */
-         if (inst->writes_flag(devinfo)) {
-            if (entry->generator->reads_flag() ||
-                (entry->generator->writes_flag(devinfo) &&
-                 !instructions_match(inst, entry->generator))) {
-               entry->remove();
-               ralloc_free(entry);
-               continue;
-            }
-         }
-
-         for (int i = 0; i < 3; i++) {
-            src_reg *src = &entry->generator->src[i];
-
-            /* Kill all AEB entries that use the destination we just
-             * overwrote.
-             */
-            if (inst->dst.file == entry->generator->src[i].file &&
-                inst->dst.nr == entry->generator->src[i].nr) {
-               entry->remove();
-               ralloc_free(entry);
-               break;
-            }
-
-            /* Kill any AEB entries using registers that don't get reused any
-             * more -- a sure sign they'll fail operands_match().
-             */
-            if (src->file == VGRF) {
-               if (live.var_range_end(var_from_reg(alloc, dst_reg(*src)), 8) < ip) {
-                  entry->remove();
-                  ralloc_free(entry);
-                  break;
-               }
-            }
-         }
-      }
-
-      ip++;
-   }
-
-   ralloc_free(cse_ctx);
-
-   return progress;
-}
-
-bool
-vec4_visitor::opt_cse()
-{
-   bool progress = false;
-   const vec4_live_variables &live = live_analysis.require();
-
-   foreach_block (block, cfg) {
-      progress = opt_cse_local(block, live) || progress;
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
-
-   return progress;
-}
--- a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp
+++ b/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp
@ -1,188 +0,0 @@
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_vec4.h"
-#include "brw_vec4_live_variables.h"
-#include "brw_cfg.h"
-
-/** @file brw_vec4_dead_code_eliminate.cpp
- *
- * Dataflow-aware dead code elimination.
- *
- * Walks the instruction list from the bottom, removing instructions that
- * have results that both aren't used in later blocks and haven't been read
- * yet in the tail end of this block.
- */
-
-using namespace brw;
-
-bool
-vec4_visitor::dead_code_eliminate()
-{
-   bool progress = false;
-
-   const vec4_live_variables &live_vars = live_analysis.require();
-   int num_vars = live_vars.num_vars;
-   BITSET_WORD *live = rzalloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars));
-   BITSET_WORD *flag_live = rzalloc_array(NULL, BITSET_WORD, 1);
-
-   foreach_block_reverse_safe(block, cfg) {
-      memcpy(live, live_vars.block_data[block->num].liveout,
-             sizeof(BITSET_WORD) * BITSET_WORDS(num_vars));
-      memcpy(flag_live, live_vars.block_data[block->num].flag_liveout,
-             sizeof(BITSET_WORD));
-
-      foreach_inst_in_block_reverse_safe(vec4_instruction, inst, block) {
-         if ((inst->dst.file == VGRF && !inst->has_side_effects()) ||
-             (inst->dst.is_null() && inst->writes_flag(devinfo))){
-            bool result_live[4] = { false };
-            if (inst->dst.file == VGRF) {
-               for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
-                  for (int c = 0; c < 4; c++) {
-                     const unsigned v = var_from_reg(alloc, inst->dst, c, i);
-                     result_live[c] |= BITSET_TEST(live, v);
-                  }
-               }
-            } else {
-               for (unsigned c = 0; c < 4; c++)
-                  result_live[c] = BITSET_TEST(flag_live, c);
-            }
-
-            /* If the instruction can't do writemasking, then it's all or
-             * nothing.
-             */
-            if (!inst->can_do_writemask(devinfo)) {
-               bool result = result_live[0] | result_live[1] |
-                             result_live[2] | result_live[3];
-               result_live[0] = result;
-               result_live[1] = result;
-               result_live[2] = result;
-               result_live[3] = result;
-            }
-
-            if (inst->writes_flag(devinfo)) {
-               /* Independently calculate the usage of the flag components and
-                * the destination value components.
-                */
-               uint8_t flag_mask = inst->dst.writemask;
-               uint8_t dest_mask = inst->dst.writemask;
-
-               for (int c = 0; c < 4; c++) {
-                  if (!result_live[c] && dest_mask & (1 << c))
-                     dest_mask &= ~(1 << c);
-
-                  if (!BITSET_TEST(flag_live, c))
-                     flag_mask &= ~(1 << c);
-               }
-
-               if (inst->dst.writemask != (flag_mask | dest_mask)) {
-                  progress = true;
-                  inst->dst.writemask = flag_mask | dest_mask;
-               }
-
-               /* If none of the destination components are read, replace the
-                * destination register with the NULL register.
-                */
-               if (dest_mask == 0) {
-                  progress = true;
-                  inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
-               }
-            } else {
-               for (int c = 0; c < 4; c++) {
-                  if (!result_live[c] && inst->dst.writemask & (1 << c)) {
-                     inst->dst.writemask &= ~(1 << c);
-                     progress = true;
-
-                     if (inst->dst.writemask == 0) {
-                        if (inst->writes_accumulator) {
-                           inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
-                        } else {
-                           inst->opcode = BRW_OPCODE_NOP;
-                           break;
-                        }
-                     }
-                  }
-               }
-            }
-         }
-
-         if (inst->dst.is_null() && inst->writes_flag(devinfo)) {
-            bool combined_live = false;
-            for (unsigned c = 0; c < 4; c++)
-               combined_live |= BITSET_TEST(flag_live, c);
-
-            if (!combined_live) {
-               inst->opcode = BRW_OPCODE_NOP;
-               progress = true;
-            }
-         }
-
-         if (inst->dst.file == VGRF && !inst->predicate &&
-             !inst->is_align1_partial_write()) {
-            for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
-               for (int c = 0; c < 4; c++) {
-                  if (inst->dst.writemask & (1 << c)) {
-                     const unsigned v = var_from_reg(alloc, inst->dst, c, i);
-                     BITSET_CLEAR(live, v);
-                  }
-               }
-            }
-         }
-
-         if (inst->writes_flag(devinfo) && !inst->predicate && inst->exec_size == 8) {
-            for (unsigned c = 0; c < 4; c++)
-               BITSET_CLEAR(flag_live, c);
-         }
-
-         if (inst->opcode == BRW_OPCODE_NOP) {
-            inst->remove(block);
-            continue;
-         }
-
-         for (int i = 0; i < 3; i++) {
-            if (inst->src[i].file == VGRF) {
-               for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) {
-                  for (int c = 0; c < 4; c++) {
-                     const unsigned v = var_from_reg(alloc, inst->src[i], c, j);
-                     BITSET_SET(live, v);
-                  }
-               }
-            }
-         }
-
-         for (unsigned c = 0; c < 4; c++) {
-            if (inst->reads_flag(c)) {
-               BITSET_SET(flag_live, c);
-            }
-         }
-      }
-   }
-
-   ralloc_free(live);
-   ralloc_free(flag_live);
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
-
-   return progress;
-}
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
--- a/src/intel/compiler/brw_vec4_gs_nir.cpp
+++ b/src/intel/compiler/brw_vec4_gs_nir.cpp
@ -1,98 +0,0 @@
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_vec4_gs_visitor.h"
-
-namespace brw {
-
-void
-vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
-{
-   dst_reg dest;
-   src_reg src;
-
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_per_vertex_input: {
-      assert(instr->def.bit_size == 32);
-      /* The EmitNoIndirectInput flag guarantees our vertex index will
-       * be constant.  We should handle indirects someday.
-       */
-      const unsigned vertex = nir_src_as_uint(instr->src[0]);
-      const unsigned offset_reg = nir_src_as_uint(instr->src[1]);
-
-      const unsigned input_array_stride = prog_data->urb_read_length * 2;
-
-      /* Make up a type...we have no way of knowing... */
-      const glsl_type *const type = glsl_ivec_type(instr->num_components);
-
-      src = src_reg(ATTR, input_array_stride * vertex +
-                    nir_intrinsic_base(instr) + offset_reg,
-                    type);
-      src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));
-
-      dest = get_nir_def(instr->def, src.type);
-      dest.writemask = brw_writemask_for_size(instr->num_components);
-      emit(MOV(dest, src));
-      break;
-   }
-
-   case nir_intrinsic_load_input:
-      unreachable("nir_lower_io should have produced per_vertex intrinsics");
-
-   case nir_intrinsic_emit_vertex_with_counter:
-      this->vertex_count =
-         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
-      gs_emit_vertex(nir_intrinsic_stream_id(instr));
-      break;
-
-   case nir_intrinsic_end_primitive_with_counter:
-      this->vertex_count =
-         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
-      gs_end_primitive();
-      break;
-
-   case nir_intrinsic_set_vertex_and_primitive_count:
-      this->vertex_count =
-         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
-      break;
-
-   case nir_intrinsic_load_primitive_id:
-      assert(gs_prog_data->include_primitive_id);
-      dest = get_nir_def(instr->def, BRW_REGISTER_TYPE_D);
-      emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D)));
-      break;
-
-   case nir_intrinsic_load_invocation_id: {
-      dest = get_nir_def(instr->def, BRW_REGISTER_TYPE_D);
-      if (gs_prog_data->invocations > 1)
-         emit(GS_OPCODE_GET_INSTANCE_ID, dest);
-      else
-         emit(MOV(dest, brw_imm_ud(0)));
-      break;
-   }
-
-   default:
-      vec4_visitor::nir_emit_intrinsic(instr);
-   }
-}
-}
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@ -1,560 +0,0 @@
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_vec4_gs_visitor.cpp
- *
- * Geometry-shader-specific code derived from the vec4_visitor class.
- */
-
-#include "brw_vec4_gs_visitor.h"
-#include "brw_cfg.h"
-#include "brw_fs.h"
-
-namespace brw {
-
-vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
-                                 const struct brw_compile_params *params,
-                                 struct brw_gs_compile *c,
-                                 struct brw_gs_prog_data *prog_data,
-                                 const nir_shader *shader,
-                                 bool no_spills,
-                                 bool debug_enabled)
-   : vec4_visitor(compiler, params, &c->key.base.tex,
-                  &prog_data->base, shader,
-                  no_spills, debug_enabled),
-     c(c),
-     gs_prog_data(prog_data)
-{
-}
-
-
-static inline struct brw_reg
-attribute_to_hw_reg(int attr, brw_reg_type type, bool interleaved)
-{
-   struct brw_reg reg;
-
-   unsigned width = REG_SIZE / 2 / MAX2(4, type_sz(type));
-   if (interleaved) {
-      reg = stride(brw_vecn_grf(width, attr / 2, (attr % 2) * 4), 0, width, 1);
-   } else {
-      reg = brw_vecn_grf(width, attr, 0);
-   }
-
-   reg.type = type;
-   return reg;
-}
-
-/**
- * Replace each register of type ATTR in this->instructions with a reference
- * to a fixed HW register.
- *
- * If interleaved is true, then each attribute takes up half a register, with
- * register N containing attribute 2*N in its first half and attribute 2*N+1
- * in its second half (this corresponds to the payload setup used by geometry
- * shaders in "single" or "dual instanced" dispatch mode).  If interleaved is
- * false, then each attribute takes up a whole register, with register N
- * containing attribute N (this corresponds to the payload setup used by
- * vertex shaders, and by geometry shaders in "dual object" dispatch mode).
- */
-int
-vec4_gs_visitor::setup_varying_inputs(int payload_reg,
-                                      int attributes_per_reg)
-{
-   /* For geometry shaders there are N copies of the input attributes, where N
-    * is the number of input vertices.  attribute_map[BRW_VARYING_SLOT_COUNT *
-    * i + j] represents attribute j for vertex i.
-    *
-    * Note that GS inputs are read from the VUE 256 bits (2 vec4's) at a time,
-    * so the total number of input slots that will be delivered to the GS (and
-    * thus the stride of the input arrays) is urb_read_length * 2.
-    */
-   const unsigned num_input_vertices = nir->info.gs.vertices_in;
-   assert(num_input_vertices <= MAX_GS_INPUT_VERTICES);
-   unsigned input_array_stride = prog_data->urb_read_length * 2;
-
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file != ATTR)
-            continue;
-
-         assert(inst->src[i].offset % REG_SIZE == 0);
-         int grf = payload_reg * attributes_per_reg +
-                   inst->src[i].nr + inst->src[i].offset / REG_SIZE;
-
-         struct brw_reg reg =
-            attribute_to_hw_reg(grf, inst->src[i].type, attributes_per_reg > 1);
-         reg.swizzle = inst->src[i].swizzle;
-         if (inst->src[i].abs)
-            reg = brw_abs(reg);
-         if (inst->src[i].negate)
-            reg = negate(reg);
-
-         inst->src[i] = reg;
-      }
-   }
-
-   int regs_used = ALIGN(input_array_stride * num_input_vertices,
-                         attributes_per_reg) / attributes_per_reg;
-   return payload_reg + regs_used;
-}
-
-void
-vec4_gs_visitor::setup_payload()
-{
-   /* If we are in dual instanced or single mode, then attributes are going
-    * to be interleaved, so one register contains two attribute slots.
-    */
-   int attributes_per_reg =
-      prog_data->dispatch_mode == INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
-
-   int reg = 0;
-
-   /* The payload always contains important data in r0, which contains
-    * the URB handles that are passed on to the URB write at the end
-    * of the thread.
-    */
-   reg++;
-
-   /* If the shader uses gl_PrimitiveIDIn, that goes in r1. */
-   if (gs_prog_data->include_primitive_id)
-      reg++;
-
-   reg = setup_uniforms(reg);
-
-   reg = setup_varying_inputs(reg, attributes_per_reg);
-
-   this->first_non_payload_grf = reg;
-}
-
-
-void
-vec4_gs_visitor::emit_prolog()
-{
-   /* In vertex shaders, r0.2 is guaranteed to be initialized to zero.  In
-    * geometry shaders, it isn't (it contains a bunch of information we don't
-    * need, like the input primitive type).  We need r0.2 to be zero in order
-    * to build scratch read/write messages correctly (otherwise this value
-    * will be interpreted as a global offset, causing us to do our scratch
-    * reads/writes to garbage memory).  So just set it to zero at the top of
-    * the shader.
-    */
-   this->current_annotation = "clear r0.2";
-   dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD));
-   vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, brw_imm_ud(0u));
-   inst->force_writemask_all = true;
-
-   /* Create a virtual register to hold the vertex count */
-   this->vertex_count = src_reg(this, glsl_uint_type());
-
-   /* Initialize the vertex_count register to 0 */
-   this->current_annotation = "initialize vertex_count";
-   inst = emit(MOV(dst_reg(this->vertex_count), brw_imm_ud(0u)));
-   inst->force_writemask_all = true;
-
-   if (c->control_data_header_size_bits > 0) {
-      /* Create a virtual register to hold the current set of control data
-       * bits.
-       */
-      this->control_data_bits = src_reg(this, glsl_uint_type());
-
-      /* If we're outputting more than 32 control data bits, then EmitVertex()
-       * will set control_data_bits to 0 after emitting the first vertex.
-       * Otherwise, we need to initialize it to 0 here.
-       */
-      if (c->control_data_header_size_bits <= 32) {
-         this->current_annotation = "initialize control data bits";
-         inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u)));
-         inst->force_writemask_all = true;
-      }
-   }
-
-   this->current_annotation = NULL;
-}
-
-void
-vec4_gs_visitor::emit_thread_end()
-{
-   if (c->control_data_header_size_bits > 0) {
-      /* During shader execution, we only ever call emit_control_data_bits()
-       * just prior to outputting a vertex.  Therefore, the control data bits
-       * corresponding to the most recently output vertex still need to be
-       * emitted.
-       */
-      current_annotation = "thread end: emit control data bits";
-      emit_control_data_bits();
-   }
-
-   /* MRF 0 is reserved for the debugger, so start with message header
-    * in MRF 1.
-    */
-   int base_mrf = 1;
-
-   current_annotation = "thread end";
-   dst_reg mrf_reg(MRF, base_mrf);
-   src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
-   vec4_instruction *inst = emit(MOV(mrf_reg, r0));
-   inst->force_writemask_all = true;
-   emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
-   inst = emit(GS_OPCODE_THREAD_END);
-   inst->base_mrf = base_mrf;
-   inst->mlen = 1;
-}
-
-
-void
-vec4_gs_visitor::emit_urb_write_header(int mrf)
-{
-   /* The SEND instruction that writes the vertex data to the VUE will use
-    * per_slot_offset=true, which means that DWORDs 3 and 4 of the message
-    * header specify an offset (in multiples of 256 bits) into the URB entry
-    * at which the write should take place.
-    *
-    * So we have to prepare a message header with the appropriate offset
-    * values.
-    */
-   dst_reg mrf_reg(MRF, mrf);
-   src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
-   this->current_annotation = "URB write header";
-   vec4_instruction *inst = emit(MOV(mrf_reg, r0));
-   inst->force_writemask_all = true;
-   emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count,
-        brw_imm_ud(gs_prog_data->output_vertex_size_hwords));
-}
-
-
-vec4_instruction *
-vec4_gs_visitor::emit_urb_write_opcode(bool complete)
-{
-   /* We don't care whether the vertex is complete, because in general
-    * geometry shaders output multiple vertices, and we don't terminate the
-    * thread until all vertices are complete.
-    */
-   (void) complete;
-
-   vec4_instruction *inst = emit(VEC4_GS_OPCODE_URB_WRITE);
-   inst->offset = gs_prog_data->control_data_header_size_hwords;
-
-   inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
-   return inst;
-}
-
-
-/**
- * Write out a batch of 32 control data bits from the control_data_bits
- * register to the URB.
- *
- * The current value of the vertex_count register determines which DWORD in
- * the URB receives the control data bits.  The control_data_bits register is
- * assumed to contain the correct data for the vertex that was most recently
- * output, and all previous vertices that share the same DWORD.
- *
- * This function takes care of ensuring that if no vertices have been output
- * yet, no control bits are emitted.
- */
-void
-vec4_gs_visitor::emit_control_data_bits()
-{
-   assert(c->control_data_bits_per_vertex != 0);
-
-   /* Since the URB_WRITE_OWORD message operates with 128-bit (vec4 sized)
-    * granularity, we need to use two tricks to ensure that the batch of 32
-    * control data bits is written to the appropriate DWORD in the URB.  To
-    * select which vec4 we are writing to, we use the "slot {0,1} offset"
-    * fields of the message header.  To select which DWORD in the vec4 we are
-    * writing to, we use the channel mask fields of the message header.  To
-    * avoid penalizing geometry shaders that emit a small number of vertices
-    * with extra bookkeeping, we only do each of these tricks when
-    * c->prog_data.control_data_header_size_bits is large enough to make it
-    * necessary.
-    *
-    * Note: this means that if we're outputting just a single DWORD of control
-    * data bits, we'll actually replicate it four times since we won't do any
-    * channel masking.  But that's not a problem since in this case the
-    * hardware only pays attention to the first DWORD.
-    */
-   enum brw_urb_write_flags urb_write_flags = BRW_URB_WRITE_OWORD;
-   if (c->control_data_header_size_bits > 32)
-      urb_write_flags = urb_write_flags | BRW_URB_WRITE_USE_CHANNEL_MASKS;
-   if (c->control_data_header_size_bits > 128)
-      urb_write_flags = urb_write_flags | BRW_URB_WRITE_PER_SLOT_OFFSET;
-
-   /* If we are using either channel masks or a per-slot offset, then we
-    * need to figure out which DWORD we are trying to write to, using the
-    * formula:
-    *
-    *     dword_index = (vertex_count - 1) * bits_per_vertex / 32
-    *
-    * Since bits_per_vertex is a power of two, and is known at compile
-    * time, this can be optimized to:
-    *
-    *     dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex))
-    */
-   src_reg dword_index(this, glsl_uint_type());
-   if (urb_write_flags) {
-      src_reg prev_count(this, glsl_uint_type());
-      emit(ADD(dst_reg(prev_count), this->vertex_count,
-               brw_imm_ud(0xffffffffu)));
-      unsigned log2_bits_per_vertex =
-         util_last_bit(c->control_data_bits_per_vertex);
-      emit(SHR(dst_reg(dword_index), prev_count,
-               brw_imm_ud(6 - log2_bits_per_vertex)));
-   }
-
-   /* Start building the URB write message.  The first MRF gets a copy of
-    * R0.
-    */
-   int base_mrf = 1;
-   dst_reg mrf_reg(MRF, base_mrf);
-   src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
-   vec4_instruction *inst = emit(MOV(mrf_reg, r0));
-   inst->force_writemask_all = true;
-
-   if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) {
-      /* Set the per-slot offset to dword_index / 4, to that we'll write to
-       * the appropriate OWORD within the control data header.
-       */
-      src_reg per_slot_offset(this, glsl_uint_type());
-      emit(SHR(dst_reg(per_slot_offset), dword_index, brw_imm_ud(2u)));
-      emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset,
-           brw_imm_ud(1u));
-   }
-
-   if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) {
-      /* Set the channel masks to 1 << (dword_index % 4), so that we'll
-       * write to the appropriate DWORD within the OWORD.  We need to do
-       * this computation with force_writemask_all, otherwise garbage data
-       * from invocation 0 might clobber the mask for invocation 1 when
-       * GS_OPCODE_PREPARE_CHANNEL_MASKS tries to OR the two masks
-       * together.
-       */
-      src_reg channel(this, glsl_uint_type());
-      inst = emit(AND(dst_reg(channel), dword_index, brw_imm_ud(3u)));
-      inst->force_writemask_all = true;
-      src_reg one(this, glsl_uint_type());
-      inst = emit(MOV(dst_reg(one), brw_imm_ud(1u)));
-      inst->force_writemask_all = true;
-      src_reg channel_mask(this, glsl_uint_type());
-      inst = emit(SHL(dst_reg(channel_mask), one, channel));
-      inst->force_writemask_all = true;
-      emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask),
-                                            channel_mask);
-      emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask);
-   }
-
-   /* Store the control data bits in the message payload and send it. */
-   dst_reg mrf_reg2(MRF, base_mrf + 1);
-   inst = emit(MOV(mrf_reg2, this->control_data_bits));
-   inst->force_writemask_all = true;
-   inst = emit(VEC4_GS_OPCODE_URB_WRITE);
-   inst->urb_write_flags = urb_write_flags;
-   inst->base_mrf = base_mrf;
-   inst->mlen = 2;
-}
-
-void
-vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id)
-{
-   /* control_data_bits |= stream_id << ((2 * (vertex_count - 1)) % 32) */
-
-   /* Note: we are calling this *before* increasing vertex_count, so
-    * this->vertex_count == vertex_count - 1 in the formula above.
-    */
-
-   /* Stream mode uses 2 bits per vertex */
-   assert(c->control_data_bits_per_vertex == 2);
-
-   /* Must be a valid stream */
-   assert(stream_id < 4); /* MAX_VERTEX_STREAMS */
-
-   /* Control data bits are initialized to 0 so we don't have to set any
-    * bits when sending vertices to stream 0.
-    */
-   if (stream_id == 0)
-      return;
-
-   /* reg::sid = stream_id */
-   src_reg sid(this, glsl_uint_type());
-   emit(MOV(dst_reg(sid), brw_imm_ud(stream_id)));
-
-   /* reg:shift_count = 2 * (vertex_count - 1) */
-   src_reg shift_count(this, glsl_uint_type());
-   emit(SHL(dst_reg(shift_count), this->vertex_count, brw_imm_ud(1u)));
-
-   /* Note: we're relying on the fact that the GEN SHL instruction only pays
-    * attention to the lower 5 bits of its second source argument, so on this
-    * architecture, stream_id << 2 * (vertex_count - 1) is equivalent to
-    * stream_id << ((2 * (vertex_count - 1)) % 32).
-    */
-   src_reg mask(this, glsl_uint_type());
-   emit(SHL(dst_reg(mask), sid, shift_count));
-   emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
-}
-
-void
-vec4_gs_visitor::gs_emit_vertex(int stream_id)
-{
-   this->current_annotation = "emit vertex: safety check";
-
-   /* Haswell and later hardware ignores the "Render Stream Select" bits
-    * from the 3DSTATE_STREAMOUT packet when the SOL stage is disabled,
-    * and instead sends all primitives down the pipeline for rasterization.
-    * If the SOL stage is enabled, "Render Stream Select" is honored and
-    * primitives bound to non-zero streams are discarded after stream output.
-    *
-    * Since the only purpose of primives sent to non-zero streams is to
-    * be recorded by transform feedback, we can simply discard all geometry
-    * bound to these streams when transform feedback is disabled.
-    */
-   if (stream_id > 0 && !nir->info.has_transform_feedback_varyings)
-      return;
-
-   /* If we're outputting 32 control data bits or less, then we can wait
-    * until the shader is over to output them all.  Otherwise we need to
-    * output them as we go.  Now is the time to do it, since we're about to
-    * output the vertex_count'th vertex, so it's guaranteed that the
-    * control data bits associated with the (vertex_count - 1)th vertex are
-    * correct.
-    */
-   if (c->control_data_header_size_bits > 32) {
-      this->current_annotation = "emit vertex: emit control data bits";
-      /* Only emit control data bits if we've finished accumulating a batch
-       * of 32 bits.  This is the case when:
-       *
-       *     (vertex_count * bits_per_vertex) % 32 == 0
-       *
-       * (in other words, when the last 5 bits of vertex_count *
-       * bits_per_vertex are 0).  Assuming bits_per_vertex == 2^n for some
-       * integer n (which is always the case, since bits_per_vertex is
-       * always 1 or 2), this is equivalent to requiring that the last 5-n
-       * bits of vertex_count are 0:
-       *
-       *     vertex_count & (2^(5-n) - 1) == 0
-       *
-       * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is
-       * equivalent to:
-       *
-       *     vertex_count & (32 / bits_per_vertex - 1) == 0
-       */
-      vec4_instruction *inst =
-         emit(AND(dst_null_ud(), this->vertex_count,
-                  brw_imm_ud(32 / c->control_data_bits_per_vertex - 1)));
-      inst->conditional_mod = BRW_CONDITIONAL_Z;
-
-      emit(IF(BRW_PREDICATE_NORMAL));
-      {
-         /* If vertex_count is 0, then no control data bits have been
-          * accumulated yet, so we skip emitting them.
-          */
-         emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u),
-                  BRW_CONDITIONAL_NEQ));
-         emit(IF(BRW_PREDICATE_NORMAL));
-         emit_control_data_bits();
-         emit(BRW_OPCODE_ENDIF);
-
-         /* Reset control_data_bits to 0 so we can start accumulating a new
-          * batch.
-          *
-          * Note: in the case where vertex_count == 0, this neutralizes the
-          * effect of any call to EndPrimitive() that the shader may have
-          * made before outputting its first vertex.
-          */
-         inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u)));
-         inst->force_writemask_all = true;
-      }
-      emit(BRW_OPCODE_ENDIF);
-   }
-
-   this->current_annotation = "emit vertex: vertex data";
-   emit_vertex();
-
-   /* In stream mode we have to set control data bits for all vertices
-    * unless we have disabled control data bits completely (which we do
-    * do for MESA_PRIM_POINTS outputs that don't use streams).
-    */
-   if (c->control_data_header_size_bits > 0 &&
-       gs_prog_data->control_data_format ==
-          GFX7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
-       this->current_annotation = "emit vertex: Stream control data bits";
-       set_stream_control_data_bits(stream_id);
-   }
-
-   this->current_annotation = NULL;
-}
-
-void
-vec4_gs_visitor::gs_end_primitive()
-{
-   /* We can only do EndPrimitive() functionality when the control data
-    * consists of cut bits.  Fortunately, the only time it isn't is when the
-    * output type is points, in which case EndPrimitive() is a no-op.
-    */
-   if (gs_prog_data->control_data_format !=
-       GFX7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT) {
-      return;
-   }
-
-   if (c->control_data_header_size_bits == 0)
-      return;
-
-   /* Cut bits use one bit per vertex. */
-   assert(c->control_data_bits_per_vertex == 1);
-
-   /* Cut bit n should be set to 1 if EndPrimitive() was called after emitting
-    * vertex n, 0 otherwise.  So all we need to do here is mark bit
-    * (vertex_count - 1) % 32 in the cut_bits register to indicate that
-    * EndPrimitive() was called after emitting vertex (vertex_count - 1);
-    * vec4_gs_visitor::emit_control_data_bits() will take care of the rest.
-    *
-    * Note that if EndPrimitve() is called before emitting any vertices, this
-    * will cause us to set bit 31 of the control_data_bits register to 1.
-    * That's fine because:
-    *
-    * - If max_vertices < 32, then vertex number 31 (zero-based) will never be
-    *   output, so the hardware will ignore cut bit 31.
-    *
-    * - If max_vertices == 32, then vertex number 31 is guaranteed to be the
-    *   last vertex, so setting cut bit 31 has no effect (since the primitive
-    *   is automatically ended when the GS terminates).
-    *
-    * - If max_vertices > 32, then the ir_emit_vertex visitor will reset the
-    *   control_data_bits register to 0 when the first vertex is emitted.
-    */
-
-   /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */
-   src_reg one(this, glsl_uint_type());
-   emit(MOV(dst_reg(one), brw_imm_ud(1u)));
-   src_reg prev_count(this, glsl_uint_type());
-   emit(ADD(dst_reg(prev_count), this->vertex_count, brw_imm_ud(0xffffffffu)));
-   src_reg mask(this, glsl_uint_type());
-   /* Note: we're relying on the fact that the GEN SHL instruction only pays
-    * attention to the lower 5 bits of its second source argument, so on this
-    * architecture, 1 << (vertex_count - 1) is equivalent to 1 <<
-    * ((vertex_count - 1) % 32).
-    */
-   emit(SHL(dst_reg(mask), one, prev_count));
-   emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
-}
-
-} /* namespace brw */
-
--- a/src/intel/compiler/brw_vec4_gs_visitor.h
+++ b/src/intel/compiler/brw_vec4_gs_visitor.h
@ -1,75 +0,0 @@
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_vec4_gs_visitor.h
- *
- * Geometry-shader-specific code derived from the vec4_visitor class.
- */
-
-#ifndef BRW_VEC4_GS_VISITOR_H
-#define BRW_VEC4_GS_VISITOR_H
-
-#include "brw_vec4.h"
-
-#define MAX_GS_INPUT_VERTICES 6
-
-#ifdef __cplusplus
-namespace brw {
-
-class vec4_gs_visitor : public vec4_visitor
-{
-public:
-   vec4_gs_visitor(const struct brw_compiler *compiler,
-                   const struct brw_compile_params *params,
-                   struct brw_gs_compile *c,
-                   struct brw_gs_prog_data *prog_data,
-                   const nir_shader *shader,
-                   bool no_spills,
-                   bool debug_enabled);
-
-protected:
-   virtual void setup_payload();
-   virtual void emit_prolog();
-   virtual void emit_thread_end();
-   virtual void emit_urb_write_header(int mrf);
-   virtual vec4_instruction *emit_urb_write_opcode(bool complete);
-   virtual void gs_emit_vertex(int stream_id);
-   virtual void gs_end_primitive();
-   virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
-
-protected:
-   int setup_varying_inputs(int payload_reg, int attributes_per_reg);
-   void emit_control_data_bits();
-   void set_stream_control_data_bits(unsigned stream_id);
-
-   src_reg vertex_count;
-   src_reg control_data_bits;
-   const struct brw_gs_compile * const c;
-   struct brw_gs_prog_data * const gs_prog_data;
-};
-
-} /* namespace brw */
-#endif /* __cplusplus */
-
-#endif /* BRW_VEC4_GS_VISITOR_H */
--- a/src/intel/compiler/brw_vec4_live_variables.cpp
+++ b/src/intel/compiler/brw_vec4_live_variables.cpp
@ -1,331 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *
- */
-
-#include "brw_vec4.h"
-#include "brw_vec4_live_variables.h"
-
-using namespace brw;
-
-#define MAX_INSTRUCTION (1 << 30)
-
-/** @file brw_vec4_live_variables.cpp
- *
- * Support for computing at the basic block level which variables
- * (virtual GRFs in our case) are live at entry and exit.
- *
- * See Muchnick's Advanced Compiler Design and Implementation, section
- * 14.1 (p444).
- */
-
-/**
- * Sets up the use/def arrays and block-local approximation of the live ranges.
- *
- * The basic-block-level live variable analysis needs to know which
- * variables get used before they're completely defined, and which
- * variables are completely defined before they're used.
- *
- * We independently track each channel of a vec4.  This is because we need to
- * be able to recognize a sequence like:
- *
- * ...
- * DP4 tmp.x a b;
- * DP4 tmp.y c d;
- * MUL result.xy tmp.xy e.xy
- * ...
- *
- * as having tmp live only across that sequence (assuming it's used nowhere
- * else), because it's a common pattern.  A more conservative approach that
- * doesn't get tmp marked a deffed in this block will tend to result in
- * spilling.
- */
-void
-vec4_live_variables::setup_def_use()
-{
-   int ip = 0;
-
-   foreach_block (block, cfg) {
-      assert(ip == block->start_ip);
-      if (block->num > 0)
-	 assert(cfg->blocks[block->num - 1]->end_ip == ip - 1);
-
-      foreach_inst_in_block(vec4_instruction, inst, block) {
-         struct block_data *bd = &block_data[block->num];
-
-         /* Set up the instruction uses. */
-	 for (unsigned int i = 0; i < 3; i++) {
-	    if (inst->src[i].file == VGRF) {
-               for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) {
-                  for (int c = 0; c < 4; c++) {
-                     const unsigned v = var_from_reg(alloc, inst->src[i], c, j);
-
-                     start[v] = MIN2(start[v], ip);
-                     end[v] = ip;
-
-                     if (!BITSET_TEST(bd->def, v))
-                        BITSET_SET(bd->use, v);
-                  }
-               }
-	    }
-	 }
-         for (unsigned c = 0; c < 4; c++) {
-            if (inst->reads_flag(c) &&
-                !BITSET_TEST(bd->flag_def, c)) {
-               BITSET_SET(bd->flag_use, c);
-            }
-         }
-
-         /* Set up the instruction defs. */
-         if (inst->dst.file == VGRF) {
-            for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
-               for (int c = 0; c < 4; c++) {
-                  if (inst->dst.writemask & (1 << c)) {
-                     const unsigned v = var_from_reg(alloc, inst->dst, c, i);
-
-                     start[v] = MIN2(start[v], ip);
-                     end[v] = ip;
-
-                     /* Check for unconditional register writes, these are the
-                      * things that screen off preceding definitions of a
-                      * variable, and thus qualify for being in def[].
-                      */
-                     if ((!inst->predicate || inst->opcode == BRW_OPCODE_SEL) &&
-                         !BITSET_TEST(bd->use, v))
-                        BITSET_SET(bd->def, v);
-                  }
-               }
-            }
-         }
-         if (inst->writes_flag(devinfo)) {
-            for (unsigned c = 0; c < 4; c++) {
-               if ((inst->dst.writemask & (1 << c)) &&
-                   !BITSET_TEST(bd->flag_use, c)) {
-                  BITSET_SET(bd->flag_def, c);
-               }
-            }
-         }
-
-	 ip++;
-      }
-   }
-}
-
-/**
- * The algorithm incrementally sets bits in liveout and livein,
- * propagating it through control flow.  It will eventually terminate
- * because it only ever adds bits, and stops when no bits are added in
- * a pass.
- */
-void
-vec4_live_variables::compute_live_variables()
-{
-   bool cont = true;
-
-   while (cont) {
-      cont = false;
-
-      foreach_block_reverse (block, cfg) {
-         struct block_data *bd = &block_data[block->num];
-
-	 /* Update liveout */
-	 foreach_list_typed(bblock_link, child_link, link, &block->children) {
-       struct block_data *child_bd = &block_data[child_link->block->num];
-
-	    for (int i = 0; i < bitset_words; i++) {
-               BITSET_WORD new_liveout = (child_bd->livein[i] &
-                                          ~bd->liveout[i]);
-               if (new_liveout) {
-                  bd->liveout[i] |= new_liveout;
-		  cont = true;
-	       }
-	    }
-            BITSET_WORD new_liveout = (child_bd->flag_livein[0] &
-                                       ~bd->flag_liveout[0]);
-            if (new_liveout) {
-               bd->flag_liveout[0] |= new_liveout;
-               cont = true;
-            }
-	 }
-
-         /* Update livein */
-         for (int i = 0; i < bitset_words; i++) {
-            BITSET_WORD new_livein = (bd->use[i] |
-                                      (bd->liveout[i] &
-                                       ~bd->def[i]));
-            if (new_livein & ~bd->livein[i]) {
-               bd->livein[i] |= new_livein;
-               cont = true;
-            }
-         }
-         BITSET_WORD new_livein = (bd->flag_use[0] |
-                                   (bd->flag_liveout[0] &
-                                    ~bd->flag_def[0]));
-         if (new_livein & ~bd->flag_livein[0]) {
-            bd->flag_livein[0] |= new_livein;
-            cont = true;
-         }
-      }
-   }
-}
-
-/**
- * Extend the start/end ranges for each variable to account for the
- * new information calculated from control flow.
- */
-void
-vec4_live_variables::compute_start_end()
-{
-   foreach_block (block, cfg) {
-      const struct block_data &bd = block_data[block->num];
-
-      for (int i = 0; i < num_vars; i++) {
-         if (BITSET_TEST(bd.livein, i)) {
-            start[i] = MIN2(start[i], block->start_ip);
-            end[i] = MAX2(end[i], block->start_ip);
-         }
-
-         if (BITSET_TEST(bd.liveout, i)) {
-            start[i] = MIN2(start[i], block->end_ip);
-            end[i] = MAX2(end[i], block->end_ip);
-         }
-      }
-   }
-}
-
-vec4_live_variables::vec4_live_variables(const backend_shader *s)
-   : alloc(s->alloc), cfg(s->cfg)
-{
-   mem_ctx = ralloc_context(NULL);
-
-   num_vars = alloc.total_size * 8;
-   start = ralloc_array(mem_ctx, int, num_vars);
-   end = ralloc_array(mem_ctx, int, num_vars);
-
-   for (int i = 0; i < num_vars; i++) {
-      start[i] = MAX_INSTRUCTION;
-      end[i] = -1;
-   }
-
-   devinfo = s->compiler->devinfo;
-
-   block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
-
-   bitset_words = BITSET_WORDS(num_vars);
-   for (int i = 0; i < cfg->num_blocks; i++) {
-      block_data[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
-      block_data[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
-      block_data[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
-      block_data[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
-
-      block_data[i].flag_def[0] = 0;
-      block_data[i].flag_use[0] = 0;
-      block_data[i].flag_livein[0] = 0;
-      block_data[i].flag_liveout[0] = 0;
-   }
-
-   setup_def_use();
-   compute_live_variables();
-   compute_start_end();
-}
-
-vec4_live_variables::~vec4_live_variables()
-{
-   ralloc_free(mem_ctx);
-}
-
-static bool
-check_register_live_range(const vec4_live_variables *live, int ip,
-                          unsigned var, unsigned n)
-{
-   for (unsigned j = 0; j < n; j += 4) {
-      if (var + j >= unsigned(live->num_vars) ||
-          live->start[var + j] > ip || live->end[var + j] < ip)
-         return false;
-   }
-
-   return true;
-}
-
-bool
-vec4_live_variables::validate(const backend_shader *s) const
-{
-   unsigned ip = 0;
-
-   foreach_block_and_inst(block, vec4_instruction, inst, s->cfg) {
-      for (unsigned c = 0; c < 4; c++) {
-         if (inst->dst.writemask & (1 << c)) {
-            for (unsigned i = 0; i < 3; i++) {
-               if (inst->src[i].file == VGRF &&
-                   !check_register_live_range(this, ip,
-                                              var_from_reg(alloc, inst->src[i], c),
-                                              regs_read(inst, i)))
-                  return false;
-            }
-
-            if (inst->dst.file == VGRF &&
-                !check_register_live_range(this, ip,
-                                           var_from_reg(alloc, inst->dst, c),
-                                           regs_written(inst)))
-               return false;
-         }
-      }
-
-      ip++;
-   }
-
-   return true;
-}
-
-int
-vec4_live_variables::var_range_start(unsigned v, unsigned n) const
-{
-   int ip = INT_MAX;
-
-   for (unsigned i = 0; i < n; i++)
-      ip = MIN2(ip, start[v + i]);
-
-   return ip;
-}
-
-int
-vec4_live_variables::var_range_end(unsigned v, unsigned n) const
-{
-   int ip = INT_MIN;
-
-   for (unsigned i = 0; i < n; i++)
-      ip = MAX2(ip, end[v + i]);
-
-   return ip;
-}
-
-bool
-vec4_live_variables::vgrfs_interfere(int a, int b) const
-{
-   return !((var_range_end(8 * alloc.offsets[a], 8 * alloc.sizes[a]) <=
-             var_range_start(8 * alloc.offsets[b], 8 * alloc.sizes[b])) ||
-            (var_range_end(8 * alloc.offsets[b], 8 * alloc.sizes[b]) <=
-             var_range_start(8 * alloc.offsets[a], 8 * alloc.sizes[a])));
-}
--- a/src/intel/compiler/brw_vec4_live_variables.h
+++ b/src/intel/compiler/brw_vec4_live_variables.h
@ -1,143 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *
- */
-
-#ifndef BRW_VEC4_LIVE_VARIABLES_H
-#define BRW_VEC4_LIVE_VARIABLES_H
-
-#include "brw_ir_vec4.h"
-#include "brw_ir_analysis.h"
-#include "util/bitset.h"
-
-struct backend_shader;
-
-namespace brw {
-
-class vec4_live_variables {
-public:
-   struct block_data {
-      /**
-       * Which variables are defined before being used in the block.
-       *
-       * Note that for our purposes, "defined" means unconditionally, completely
-       * defined.
-       */
-      BITSET_WORD *def;
-
-      /**
-       * Which variables are used before being defined in the block.
-       */
-      BITSET_WORD *use;
-
-      /** Which defs reach the entry point of the block. */
-      BITSET_WORD *livein;
-
-      /** Which defs reach the exit point of the block. */
-      BITSET_WORD *liveout;
-
-      BITSET_WORD flag_def[1];
-      BITSET_WORD flag_use[1];
-      BITSET_WORD flag_livein[1];
-      BITSET_WORD flag_liveout[1];
-   };
-
-   vec4_live_variables(const backend_shader *s);
-   ~vec4_live_variables();
-
-   bool
-   validate(const backend_shader *s) const;
-
-   analysis_dependency_class
-   dependency_class() const
-   {
-      return (DEPENDENCY_INSTRUCTION_IDENTITY |
-              DEPENDENCY_INSTRUCTION_DATA_FLOW |
-              DEPENDENCY_VARIABLES);
-   }
-
-   int num_vars;
-   int bitset_words;
-
-   const struct intel_device_info *devinfo;
-
-   /** Per-basic-block information on live variables */
-   struct block_data *block_data;
-
-   /** @{
-    * Final computed live ranges for each variable.
-    */
-   int *start;
-   int *end;
-   /** @} */
-
-   int var_range_start(unsigned v, unsigned n) const;
-   int var_range_end(unsigned v, unsigned n) const;
-   bool vgrfs_interfere(int a, int b) const;
-
-protected:
-   void setup_def_use();
-   void compute_live_variables();
-   void compute_start_end();
-
-   const simple_allocator &alloc;
-   cfg_t *cfg;
-   void *mem_ctx;
-};
-
-/* Returns the variable index for the k-th dword of the c-th component of
- * register reg.
- */
-inline unsigned
-var_from_reg(const simple_allocator &alloc, const src_reg &reg,
-             unsigned c = 0, unsigned k = 0)
-{
-   assert(reg.file == VGRF && reg.nr < alloc.count && c < 4);
-   const unsigned csize = DIV_ROUND_UP(type_sz(reg.type), 4);
-   unsigned result =
-      8 * alloc.offsets[reg.nr] + reg.offset / 4 +
-      (BRW_GET_SWZ(reg.swizzle, c) + k / csize * 4) * csize + k % csize;
-   /* Do not exceed the limit for this register */
-   assert(result < 8 * (alloc.offsets[reg.nr] + alloc.sizes[reg.nr]));
-   return result;
-}
-
-inline unsigned
-var_from_reg(const simple_allocator &alloc, const dst_reg &reg,
-             unsigned c = 0, unsigned k = 0)
-{
-   assert(reg.file == VGRF && reg.nr < alloc.count && c < 4);
-   const unsigned csize = DIV_ROUND_UP(type_sz(reg.type), 4);
-   unsigned result =
-      8 * alloc.offsets[reg.nr] + reg.offset / 4 +
-      (c + k / csize * 4) * csize + k % csize;
-   /* Do not exceed the limit for this register */
-   assert(result < 8 * (alloc.offsets[reg.nr] + alloc.sizes[reg.nr]));
-   return result;
-}
-
-} /* namespace brw */
-
-#endif /* BRW_VEC4_LIVE_VARIABLES_H */
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
--- a/src/intel/compiler/brw_vec4_reg_allocate.cpp
+++ b/src/intel/compiler/brw_vec4_reg_allocate.cpp
@ -1,512 +0,0 @@
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "util/register_allocate.h"
-#include "brw_vec4.h"
-#include "brw_cfg.h"
-
-using namespace brw;
-
-#define REG_CLASS_COUNT 20
-
-namespace brw {
-
-static void
-assign(unsigned int *reg_hw_locations, backend_reg *reg)
-{
-   if (reg->file == VGRF) {
-      reg->nr = reg_hw_locations[reg->nr] + reg->offset / REG_SIZE;
-      reg->offset %= REG_SIZE;
-   }
-}
-
-bool
-vec4_visitor::reg_allocate_trivial()
-{
-   unsigned int hw_reg_mapping[this->alloc.count];
-   bool virtual_grf_used[this->alloc.count];
-   int next;
-
-   /* Calculate which virtual GRFs are actually in use after whatever
-    * optimization passes have occurred.
-    */
-   for (unsigned i = 0; i < this->alloc.count; i++) {
-      virtual_grf_used[i] = false;
-   }
-
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      if (inst->dst.file == VGRF)
-         virtual_grf_used[inst->dst.nr] = true;
-
-      for (unsigned i = 0; i < 3; i++) {
-	 if (inst->src[i].file == VGRF)
-            virtual_grf_used[inst->src[i].nr] = true;
-      }
-   }
-
-   hw_reg_mapping[0] = this->first_non_payload_grf;
-   next = hw_reg_mapping[0] + this->alloc.sizes[0];
-   for (unsigned i = 1; i < this->alloc.count; i++) {
-      if (virtual_grf_used[i]) {
-	 hw_reg_mapping[i] = next;
-	 next += this->alloc.sizes[i];
-      }
-   }
-   prog_data->total_grf = next;
-
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      assign(hw_reg_mapping, &inst->dst);
-      assign(hw_reg_mapping, &inst->src[0]);
-      assign(hw_reg_mapping, &inst->src[1]);
-      assign(hw_reg_mapping, &inst->src[2]);
-   }
-
-   if (prog_data->total_grf > max_grf) {
-      fail("Ran out of regs on trivial allocator (%d/%d)\n",
-	   prog_data->total_grf, max_grf);
-      return false;
-   }
-
-   return true;
-}
-
-extern "C" void
-brw_vec4_alloc_reg_set(struct brw_compiler *compiler)
-{
-   int base_reg_count =
-      compiler->devinfo->ver >= 7 ? GFX7_MRF_HACK_START : BRW_MAX_GRF;
-
-   assert(compiler->devinfo->ver < 8);
-
-   /* After running split_virtual_grfs(), almost all VGRFs will be of size 1.
-    * SEND-from-GRF sources cannot be split, so we also need classes for each
-    * potential message length.
-    */
-   assert(REG_CLASS_COUNT == MAX_VGRF_SIZE(compiler->devinfo));
-   int class_sizes[REG_CLASS_COUNT];
-
-   for (int i = 0; i < REG_CLASS_COUNT; i++)
-      class_sizes[i] = i + 1;
-
-
-   ralloc_free(compiler->vec4_reg_set.regs);
-   compiler->vec4_reg_set.regs = ra_alloc_reg_set(compiler, base_reg_count, false);
-   if (compiler->devinfo->ver >= 6)
-      ra_set_allocate_round_robin(compiler->vec4_reg_set.regs);
-   ralloc_free(compiler->vec4_reg_set.classes);
-   compiler->vec4_reg_set.classes = ralloc_array(compiler, struct ra_class *, REG_CLASS_COUNT);
-
-   /* Now, add the registers to their classes, and add the conflicts
-    * between them and the base GRF registers (and also each other).
-    */
-   for (int i = 0; i < REG_CLASS_COUNT; i++) {
-      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
-      compiler->vec4_reg_set.classes[i] =
-         ra_alloc_contig_reg_class(compiler->vec4_reg_set.regs, class_sizes[i]);
-
-      for (int j = 0; j < class_reg_count; j++)
-         ra_class_add_reg(compiler->vec4_reg_set.classes[i], j);
-   }
-
-   ra_set_finalize(compiler->vec4_reg_set.regs, NULL);
-}
-
-void
-vec4_visitor::setup_payload_interference(struct ra_graph *g,
-                                         int first_payload_node,
-                                         int reg_node_count)
-{
-   int payload_node_count = this->first_non_payload_grf;
-
-   for (int i = 0; i < payload_node_count; i++) {
-      /* Mark each payload reg node as being allocated to its physical register.
-       *
-       * The alternative would be to have per-physical register classes, which
-       * would just be silly.
-       */
-      ra_set_node_reg(g, first_payload_node + i, i);
-
-      /* For now, just mark each payload node as interfering with every other
-       * node to be allocated.
-       */
-      for (int j = 0; j < reg_node_count; j++) {
-         ra_add_node_interference(g, first_payload_node + i, j);
-      }
-   }
-}
-
-bool
-vec4_visitor::reg_allocate()
-{
-   unsigned int hw_reg_mapping[alloc.count];
-   int payload_reg_count = this->first_non_payload_grf;
-
-   /* Using the trivial allocator can be useful in debugging undefined
-    * register access as a result of broken optimization passes.
-    */
-   if (0)
-      return reg_allocate_trivial();
-
-   assert(devinfo->ver < 8);
-
-   const vec4_live_variables &live = live_analysis.require();
-   int node_count = alloc.count;
-   int first_payload_node = node_count;
-   node_count += payload_reg_count;
-   struct ra_graph *g =
-      ra_alloc_interference_graph(compiler->vec4_reg_set.regs, node_count);
-
-   for (unsigned i = 0; i < alloc.count; i++) {
-      int size = this->alloc.sizes[i];
-      assert(size >= 1 && size <= MAX_VGRF_SIZE(devinfo));
-      ra_set_node_class(g, i, compiler->vec4_reg_set.classes[size - 1]);
-
-      for (unsigned j = 0; j < i; j++) {
-	 if (live.vgrfs_interfere(i, j)) {
-	    ra_add_node_interference(g, i, j);
-	 }
-      }
-   }
-
-   /* Certain instructions can't safely use the same register for their
-    * sources and destination.  Add interference.
-    */
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      if (inst->dst.file == VGRF && inst->has_source_and_destination_hazard()) {
-         for (unsigned i = 0; i < 3; i++) {
-            if (inst->src[i].file == VGRF) {
-               ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr);
-            }
-         }
-      }
-   }
-
-   setup_payload_interference(g, first_payload_node, node_count);
-
-   if (!ra_allocate(g)) {
-      /* Failed to allocate registers.  Spill a reg, and the caller will
-       * loop back into here to try again.
-       */
-      int reg = choose_spill_reg(g);
-      if (this->no_spills) {
-         fail("Failure to register allocate.  Reduce number of live "
-              "values to avoid this.");
-      } else if (reg == -1) {
-         fail("no register to spill\n");
-      } else {
-         spill_reg(reg);
-      }
-      ralloc_free(g);
-      return false;
-   }
-
-   /* Get the chosen virtual registers for each node, and map virtual
-    * regs in the register classes back down to real hardware reg
-    * numbers.
-    */
-   prog_data->total_grf = payload_reg_count;
-   for (unsigned i = 0; i < alloc.count; i++) {
-      hw_reg_mapping[i] = ra_get_node_reg(g, i);
-      prog_data->total_grf = MAX2(prog_data->total_grf,
-				  hw_reg_mapping[i] + alloc.sizes[i]);
-   }
-
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      assign(hw_reg_mapping, &inst->dst);
-      assign(hw_reg_mapping, &inst->src[0]);
-      assign(hw_reg_mapping, &inst->src[1]);
-      assign(hw_reg_mapping, &inst->src[2]);
-   }
-
-   ralloc_free(g);
-
-   return true;
-}
-
-/**
- * When we decide to spill a register, instead of blindly spilling every use,
- * save unspills when the spill register is used (read) in consecutive
- * instructions. This can potentially save a bunch of unspills that would
- * have very little impact in register allocation anyway.
- *
- * Notice that we need to account for this behavior when spilling a register
- * and when evaluating spilling costs. This function is designed so it can
- * be called from both places and avoid repeating the logic.
- *
- *  - When we call this function from spill_reg(), we pass in scratch_reg the
- *    actual unspill/spill register that we want to reuse in the current
- *    instruction.
- *
- *  - When we call this from evaluate_spill_costs(), we pass the register for
- *    which we are evaluating spilling costs.
- *
- * In either case, we check if the previous instructions read scratch_reg until
- * we find one that writes to it with a compatible mask or does not read/write
- * scratch_reg at all.
- */
-static bool
-can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
-                           unsigned scratch_reg)
-{
-   assert(inst->src[i].file == VGRF);
-   bool prev_inst_read_scratch_reg = false;
-
-   /* See if any previous source in the same instructions reads scratch_reg */
-   for (unsigned n = 0; n < i; n++) {
-      if (inst->src[n].file == VGRF && inst->src[n].nr == scratch_reg)
-         prev_inst_read_scratch_reg = true;
-   }
-
-   /* Now check if previous instructions read/write scratch_reg */
-   for (vec4_instruction *prev_inst = (vec4_instruction *) inst->prev;
-        !prev_inst->is_head_sentinel();
-        prev_inst = (vec4_instruction *) prev_inst->prev) {
-
-      /* If the previous instruction writes to scratch_reg then we can reuse
-       * it if the write is not conditional and the channels we write are
-       * compatible with our read mask
-       */
-      if (prev_inst->dst.file == VGRF && prev_inst->dst.nr == scratch_reg) {
-         return (!prev_inst->predicate || prev_inst->opcode == BRW_OPCODE_SEL) &&
-                (brw_mask_for_swizzle(inst->src[i].swizzle) &
-                 ~prev_inst->dst.writemask) == 0;
-      }
-
-      /* Skip scratch read/writes so that instructions generated by spilling
-       * other registers (that won't read/write scratch_reg) do not stop us from
-       * reusing scratch_reg for this instruction.
-       */
-      if (prev_inst->opcode == SHADER_OPCODE_GFX4_SCRATCH_WRITE ||
-          prev_inst->opcode == SHADER_OPCODE_GFX4_SCRATCH_READ)
-         continue;
-
-      /* If the previous instruction does not write to scratch_reg, then check
-       * if it reads it
-       */
-      int n;
-      for (n = 0; n < 3; n++) {
-         if (prev_inst->src[n].file == VGRF &&
-             prev_inst->src[n].nr == scratch_reg) {
-            prev_inst_read_scratch_reg = true;
-            break;
-         }
-      }
-      if (n == 3) {
-         /* The previous instruction does not read scratch_reg. At this point,
-          * if no previous instruction has read scratch_reg it means that we
-          * will need to unspill it here and we can't reuse it (so we return
-          * false). Otherwise, if we found at least one consecutive instruction
-          * that read scratch_reg, then we know that we got here from
-          * evaluate_spill_costs (since for the spill_reg path any block of
-          * consecutive instructions using scratch_reg must start with a write
-          * to that register, so we would've exited the loop in the check for
-          * the write that we have at the start of this loop), and in that case
-          * it means that we found the point at which the scratch_reg would be
-          * unspilled. Since we always unspill a full vec4, it means that we
-          * have all the channels available and we can just return true to
-          * signal that we can reuse the register in the current instruction
-          * too.
-          */
-         return prev_inst_read_scratch_reg;
-      }
-   }
-
-   return prev_inst_read_scratch_reg;
-}
-
-static inline float
-spill_cost_for_type(enum brw_reg_type type)
-{
-   /* Spilling of a 64-bit register involves emitting 2 32-bit scratch
-    * messages plus the 64b/32b shuffling code.
-    */
-   return type_sz(type) == 8 ? 2.25f : 1.0f;
-}
-
-void
-vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
-{
-   float loop_scale = 1.0;
-
-   unsigned *reg_type_size = (unsigned *)
-      ralloc_size(NULL, this->alloc.count * sizeof(unsigned));
-
-   for (unsigned i = 0; i < this->alloc.count; i++) {
-      spill_costs[i] = 0.0;
-      no_spill[i] = alloc.sizes[i] != 1 && alloc.sizes[i] != 2;
-      reg_type_size[i] = 0;
-   }
-
-   /* Calculate costs for spilling nodes.  Call it a cost of 1 per
-    * spill/unspill we'll have to do, and guess that the insides of
-    * loops run 10 times.
-    */
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      for (unsigned int i = 0; i < 3; i++) {
-         if (inst->src[i].file == VGRF && !no_spill[inst->src[i].nr]) {
-            /* We will only unspill src[i] it it wasn't unspilled for the
-             * previous instruction, in which case we'll just reuse the scratch
-             * reg for this instruction.
-             */
-            if (!can_use_scratch_for_source(inst, i, inst->src[i].nr)) {
-               spill_costs[inst->src[i].nr] +=
-                  loop_scale * spill_cost_for_type(inst->src[i].type);
-               if (inst->src[i].reladdr ||
-                   inst->src[i].offset >= REG_SIZE)
-                  no_spill[inst->src[i].nr] = true;
-
-               /* We don't support unspills of partial DF reads.
-                *
-                * Our 64-bit unspills are implemented with two 32-bit scratch
-                * messages, each one reading that for both SIMD4x2 threads that
-                * we need to shuffle into correct 64-bit data. Ensure that we
-                * are reading data for both threads.
-                */
-               if (type_sz(inst->src[i].type) == 8 && inst->exec_size != 8)
-                  no_spill[inst->src[i].nr] = true;
-            }
-
-            /* We can't spill registers that mix 32-bit and 64-bit access (that
-             * contain 64-bit data that is operated on via 32-bit instructions)
-             */
-            unsigned type_size = type_sz(inst->src[i].type);
-            if (reg_type_size[inst->src[i].nr] == 0)
-               reg_type_size[inst->src[i].nr] = type_size;
-            else if (reg_type_size[inst->src[i].nr] != type_size)
-               no_spill[inst->src[i].nr] = true;
-         }
-      }
-
-      if (inst->dst.file == VGRF && !no_spill[inst->dst.nr]) {
-         spill_costs[inst->dst.nr] +=
-            loop_scale * spill_cost_for_type(inst->dst.type);
-         if (inst->dst.reladdr || inst->dst.offset >= REG_SIZE)
-            no_spill[inst->dst.nr] = true;
-
-         /* We don't support spills of partial DF writes.
-          *
-          * Our 64-bit spills are implemented with two 32-bit scratch messages,
-          * each one writing that for both SIMD4x2 threads. Ensure that we
-          * are writing data for both threads.
-          */
-         if (type_sz(inst->dst.type) == 8 && inst->exec_size != 8)
-            no_spill[inst->dst.nr] = true;
-
-         /* We can't spill registers that mix 32-bit and 64-bit access (that
-          * contain 64-bit data that is operated on via 32-bit instructions)
-          */
-         unsigned type_size = type_sz(inst->dst.type);
-         if (reg_type_size[inst->dst.nr] == 0)
-            reg_type_size[inst->dst.nr] = type_size;
-         else if (reg_type_size[inst->dst.nr] != type_size)
-            no_spill[inst->dst.nr] = true;
-      }
-
-      switch (inst->opcode) {
-
-      case BRW_OPCODE_DO:
-         loop_scale *= 10;
-         break;
-
-      case BRW_OPCODE_WHILE:
-         loop_scale /= 10;
-         break;
-
-      case SHADER_OPCODE_GFX4_SCRATCH_READ:
-      case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
-      case VEC4_OPCODE_MOV_FOR_SCRATCH:
-         for (int i = 0; i < 3; i++) {
-            if (inst->src[i].file == VGRF)
-               no_spill[inst->src[i].nr] = true;
-         }
-         if (inst->dst.file == VGRF)
-            no_spill[inst->dst.nr] = true;
-         break;
-
-      default:
-         break;
-      }
-   }
-
-   ralloc_free(reg_type_size);
-}
-
-int
-vec4_visitor::choose_spill_reg(struct ra_graph *g)
-{
-   float spill_costs[this->alloc.count];
-   bool no_spill[this->alloc.count];
-
-   evaluate_spill_costs(spill_costs, no_spill);
-
-   for (unsigned i = 0; i < this->alloc.count; i++) {
-      if (!no_spill[i])
-         ra_set_node_spill_cost(g, i, spill_costs[i]);
-   }
-
-   return ra_get_best_spill_node(g);
-}
-
-void
-vec4_visitor::spill_reg(unsigned spill_reg_nr)
-{
-   assert(alloc.sizes[spill_reg_nr] == 1 || alloc.sizes[spill_reg_nr] == 2);
-   unsigned spill_offset = last_scratch;
-   last_scratch += alloc.sizes[spill_reg_nr];
-
-   /* Generate spill/unspill instructions for the objects being spilled. */
-   unsigned scratch_reg = ~0u;
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      for (unsigned i = 0; i < 3; i++) {
-         if (inst->src[i].file == VGRF && inst->src[i].nr == spill_reg_nr) {
-            if (scratch_reg == ~0u ||
-                !can_use_scratch_for_source(inst, i, scratch_reg)) {
-               /* We need to unspill anyway so make sure we read the full vec4
-                * in any case. This way, the cached register can be reused
-                * for consecutive instructions that read different channels of
-                * the same vec4.
-                */
-               scratch_reg = alloc.allocate(alloc.sizes[spill_reg_nr]);
-               src_reg temp = inst->src[i];
-               temp.nr = scratch_reg;
-               temp.offset = 0;
-               temp.swizzle = BRW_SWIZZLE_XYZW;
-               emit_scratch_read(block, inst,
-                                 dst_reg(temp), inst->src[i], spill_offset);
-               temp.offset = inst->src[i].offset;
-            }
-            assert(scratch_reg != ~0u);
-            inst->src[i].nr = scratch_reg;
-         }
-      }
-
-      if (inst->dst.file == VGRF && inst->dst.nr == spill_reg_nr) {
-         emit_scratch_write(block, inst, spill_offset);
-         scratch_reg = inst->dst.nr;
-      }
-   }
-
-   invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
-}
-
-} /* namespace brw */
--- a/src/intel/compiler/brw_vec4_surface_builder.cpp
+++ b/src/intel/compiler/brw_vec4_surface_builder.cpp
@ -1,213 +0,0 @@
-/*
- * Copyright © 2013-2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_vec4_surface_builder.h"
-
-using namespace brw;
-
-namespace {
-   namespace array_utils {
-      /**
-       * Copy one every \p src_stride logical components of the argument into
-       * one every \p dst_stride logical components of the result.
-       */
-      static src_reg
-      emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size,
-                  unsigned dst_stride, unsigned src_stride)
-      {
-         if (src_stride == 1 && dst_stride == 1) {
-            return src;
-         } else {
-            const dst_reg dst = bld.vgrf(src.type,
-                                         DIV_ROUND_UP(size * dst_stride, 4));
-
-            for (unsigned i = 0; i < size; ++i)
-               bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4),
-                                 1 << (i * dst_stride % 4)),
-                       swizzle(offset(src, 8, i * src_stride / 4),
-                               brw_swizzle_for_mask(1 << (i * src_stride % 4))));
-
-            return src_reg(dst);
-         }
-      }
-
-      /**
-       * Convert a VEC4 into an array of registers with the layout expected by
-       * the recipient shared unit.  If \p has_simd4x2 is true the argument is
-       * left unmodified in SIMD4x2 form, otherwise it will be rearranged into
-       * a SIMD8 vector.
-       */
-      static src_reg
-      emit_insert(const vec4_builder &bld, const src_reg &src,
-                  unsigned n, bool has_simd4x2)
-      {
-         if (src.file == BAD_FILE || n == 0) {
-            return src_reg();
-
-         } else {
-            /* Pad unused components with zeroes. */
-            const unsigned mask = (1 << n) - 1;
-            const dst_reg tmp = bld.vgrf(src.type);
-
-            bld.MOV(writemask(tmp, mask), src);
-            if (n < 4)
-               bld.MOV(writemask(tmp, ~mask), brw_imm_d(0));
-
-            return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
-         }
-      }
-   }
-}
-
-namespace brw {
-   namespace surface_access {
-      namespace {
-         using namespace array_utils;
-
-         /**
-          * Generate a send opcode for a surface message and return the
-          * result.
-          */
-         src_reg
-         emit_send(const vec4_builder &bld, enum opcode op,
-                   const src_reg &header,
-                   const src_reg &addr, unsigned addr_sz,
-                   const src_reg &src, unsigned src_sz,
-                   const src_reg &surface,
-                   unsigned arg, unsigned ret_sz,
-                   brw_predicate pred = BRW_PREDICATE_NONE)
-         {
-            /* Calculate the total number of components of the payload. */
-            const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1);
-            const unsigned sz = header_sz + addr_sz + src_sz;
-
-            /* Construct the payload. */
-            const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
-            unsigned n = 0;
-
-            if (header_sz)
-               bld.exec_all().MOV(offset(payload, 8, n++),
-                                  retype(header, BRW_REGISTER_TYPE_UD));
-
-            for (unsigned i = 0; i < addr_sz; i++)
-               bld.MOV(offset(payload, 8, n++),
-                       offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i));
-
-            for (unsigned i = 0; i < src_sz; i++)
-               bld.MOV(offset(payload, 8, n++),
-                       offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i));
-
-            /* Reduce the dynamically uniform surface index to a single
-             * scalar.
-             */
-            const src_reg usurface = bld.emit_uniformize(surface);
-
-            /* Emit the message send instruction. */
-            const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz);
-            vec4_instruction *inst =
-               bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg));
-            inst->mlen = sz;
-            inst->size_written = ret_sz * REG_SIZE;
-            inst->header_size = header_sz;
-            inst->predicate = pred;
-
-            return src_reg(dst);
-         }
-      }
-
-      /**
-       * Emit an untyped surface read opcode.  \p dims determines the number
-       * of components of the address and \p size the number of components of
-       * the returned value.
-       */
-      src_reg
-      emit_untyped_read(const vec4_builder &bld,
-                        const src_reg &surface, const src_reg &addr,
-                        unsigned dims, unsigned size,
-                        brw_predicate pred)
-      {
-         return emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_READ, src_reg(),
-                          emit_insert(bld, addr, dims, true), 1,
-                          src_reg(), 0,
-                          surface, size, 1, pred);
-      }
-
-      /**
-       * Emit an untyped surface write opcode.  \p dims determines the number
-       * of components of the address and \p size the number of components of
-       * the argument.
-       */
-      void
-      emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
-                         const src_reg &addr, const src_reg &src,
-                         unsigned dims, unsigned size,
-                         brw_predicate pred)
-      {
-         const bool has_simd4x2 = bld.shader->devinfo->verx10 == 75;
-         emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
-                   emit_insert(bld, addr, dims, has_simd4x2),
-                   has_simd4x2 ? 1 : dims,
-                   emit_insert(bld, src, size, has_simd4x2),
-                   has_simd4x2 ? 1 : size,
-                   surface, size, 0, pred);
-      }
-
-      /**
-       * Emit an untyped surface atomic opcode.  \p dims determines the number
-       * of components of the address and \p rsize the number of components of
-       * the returned value (either zero or one).
-       */
-      src_reg
-      emit_untyped_atomic(const vec4_builder &bld,
-                          const src_reg &surface, const src_reg &addr,
-                          const src_reg &src0, const src_reg &src1,
-                          unsigned dims, unsigned rsize, unsigned op,
-                          brw_predicate pred)
-      {
-         const bool has_simd4x2 = bld.shader->devinfo->verx10 == 75;
-
-         /* Zip the components of both sources, they are represented as the X
-          * and Y components of the same vector.
-          */
-         const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
-         const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
-
-         if (size >= 1) {
-            bld.MOV(writemask(srcs, WRITEMASK_X),
-                    swizzle(src0, BRW_SWIZZLE_XXXX));
-         }
-
-         if (size >= 2) {
-            bld.MOV(writemask(srcs, WRITEMASK_Y),
-                    swizzle(src1, BRW_SWIZZLE_XXXX));
-         }
-
-         return emit_send(bld, VEC4_OPCODE_UNTYPED_ATOMIC, src_reg(),
-                          emit_insert(bld, addr, dims, has_simd4x2),
-                          has_simd4x2 ? 1 : dims,
-                          emit_insert(bld, src_reg(srcs), size, has_simd4x2),
-                          has_simd4x2 && size ? 1 : size,
-                          surface, op, rsize, pred);
-      }
-   }
-}
--- a/src/intel/compiler/brw_vec4_surface_builder.h
+++ b/src/intel/compiler/brw_vec4_surface_builder.h
@ -1,53 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright © 2013-2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_VEC4_SURFACE_BUILDER_H
-#define BRW_VEC4_SURFACE_BUILDER_H
-
-#include "brw_vec4_builder.h"
-
-namespace brw {
-   namespace surface_access {
-      src_reg
-      emit_untyped_read(const vec4_builder &bld,
-                        const src_reg &surface, const src_reg &addr,
-                        unsigned dims, unsigned size,
-                        brw_predicate pred = BRW_PREDICATE_NONE);
-
-      void
-      emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
-                         const src_reg &addr, const src_reg &src,
-                         unsigned dims, unsigned size,
-                         brw_predicate pred = BRW_PREDICATE_NONE);
-
-      src_reg
-      emit_untyped_atomic(const vec4_builder &bld,
-                          const src_reg &surface, const src_reg &addr,
-                          const src_reg &src0, const src_reg &src1,
-                          unsigned dims, unsigned rsize, unsigned op,
-                          brw_predicate pred = BRW_PREDICATE_NONE);
-   }
-}
-
-#endif
--- a/src/intel/compiler/brw_vec4_tcs.cpp
+++ b/src/intel/compiler/brw_vec4_tcs.cpp
@ -1,320 +0,0 @@
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_vec4_tcs.cpp
- *
- * Tessellaton control shader specific code derived from the vec4_visitor class.
- */
-
-#include "intel_nir.h"
-#include "brw_vec4_tcs.h"
-
-namespace brw {
-
-vec4_tcs_visitor::vec4_tcs_visitor(const struct brw_compiler *compiler,
-                                   const struct brw_compile_params *params,
-                                   const struct brw_tcs_prog_key *key,
-                                   struct brw_tcs_prog_data *prog_data,
-                                   const nir_shader *nir,
-                                   bool debug_enabled)
-   : vec4_visitor(compiler, params, &key->base.tex, &prog_data->base,
-                  nir, false, debug_enabled),
-     key(key)
-{
-}
-
-
-void
-vec4_tcs_visitor::setup_payload()
-{
-   int reg = 0;
-
-   /* The payload always contains important data in r0, which contains
-    * the URB handles that are passed on to the URB write at the end
-    * of the thread.
-    */
-   reg++;
-
-   /* r1.0 - r4.7 may contain the input control point URB handles,
-    * which we use to pull vertex data.
-    */
-   reg += 4;
-
-   /* Push constants may start at r5.0 */
-   reg = setup_uniforms(reg);
-
-   this->first_non_payload_grf = reg;
-}
-
-
-void
-vec4_tcs_visitor::emit_prolog()
-{
-   invocation_id = src_reg(this, glsl_uint_type());
-   emit(TCS_OPCODE_GET_INSTANCE_ID, dst_reg(invocation_id));
-
-   /* HS threads are dispatched with the dispatch mask set to 0xFF.
-    * If there are an odd number of output vertices, then the final
-    * HS instance dispatched will only have its bottom half doing real
-    * work, and so we need to disable the upper half:
-    */
-   if (nir->info.tess.tcs_vertices_out % 2) {
-      emit(CMP(dst_null_d(), invocation_id,
-               brw_imm_ud(nir->info.tess.tcs_vertices_out),
-               BRW_CONDITIONAL_L));
-
-      /* Matching ENDIF is in emit_thread_end() */
-      emit(IF(BRW_PREDICATE_NORMAL));
-   }
-}
-
-
-void
-vec4_tcs_visitor::emit_thread_end()
-{
-   vec4_instruction *inst;
-   current_annotation = "thread end";
-
-   if (nir->info.tess.tcs_vertices_out % 2) {
-      emit(BRW_OPCODE_ENDIF);
-   }
-
-   if (devinfo->ver == 7) {
-      struct brw_tcs_prog_data *tcs_prog_data =
-         (struct brw_tcs_prog_data *) prog_data;
-
-      current_annotation = "release input vertices";
-
-      /* Synchronize all threads, so we know that no one is still
-       * using the input URB handles.
-       */
-      if (tcs_prog_data->instances > 1) {
-         dst_reg header = dst_reg(this, glsl_uvec4_type());
-         emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header);
-         emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header));
-      }
-
-      /* Make thread 0 (invocations <1, 0>) release pairs of ICP handles.
-       * We want to compare the bottom half of invocation_id with 0, but
-       * use that truth value for the top half as well.  Unfortunately,
-       * we don't have stride in the vec4 world, nor UV immediates in
-       * align16, so we need an opcode to get invocation_id<0,4,0>.
-       */
-      set_condmod(BRW_CONDITIONAL_Z,
-                  emit(TCS_OPCODE_SRC0_010_IS_ZERO, dst_null_d(),
-                       invocation_id));
-      emit(IF(BRW_PREDICATE_NORMAL));
-      for (unsigned i = 0; i < key->input_vertices; i += 2) {
-         /* If we have an odd number of input vertices, the last will be
-          * unpaired.  We don't want to use an interleaved URB write in
-          * that case.
-          */
-         const bool is_unpaired = i == key->input_vertices - 1;
-
-         dst_reg header(this, glsl_uvec4_type());
-         emit(TCS_OPCODE_RELEASE_INPUT, header, brw_imm_ud(i),
-              brw_imm_ud(is_unpaired));
-      }
-      emit(BRW_OPCODE_ENDIF);
-   }
-
-   inst = emit(TCS_OPCODE_THREAD_END);
-   inst->base_mrf = 14;
-   inst->mlen = 2;
-}
-
-
-void
-vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst,
-                                      const src_reg &vertex_index,
-                                      unsigned base_offset,
-                                      unsigned first_component,
-                                      const src_reg &indirect_offset)
-{
-   vec4_instruction *inst;
-   dst_reg temp(this, glsl_ivec4_type());
-   temp.type = dst.type;
-
-   /* Set up the message header to reference the proper parts of the URB */
-   dst_reg header = dst_reg(this, glsl_uvec4_type());
-   inst = emit(VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS, header, vertex_index,
-               indirect_offset);
-   inst->force_writemask_all = true;
-
-   /* Read into a temporary, ignoring writemasking. */
-   inst = emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
-   inst->offset = base_offset;
-   inst->mlen = 1;
-   inst->base_mrf = -1;
-
-   /* Copy the temporary to the destination to deal with writemasking.
-    *
-    * Also attempt to deal with gl_PointSize being in the .w component.
-    */
-   if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
-      emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WWWW)));
-   } else {
-      src_reg src = src_reg(temp);
-      src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
-      emit(MOV(dst, src));
-   }
-}
-
-void
-vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst,
-                                       unsigned base_offset,
-                                       unsigned first_component,
-                                       const src_reg &indirect_offset)
-{
-   vec4_instruction *inst;
-
-   /* Set up the message header to reference the proper parts of the URB */
-   dst_reg header = dst_reg(this, glsl_uvec4_type());
-   inst = emit(VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, header,
-               brw_imm_ud(dst.writemask << first_component), indirect_offset);
-   inst->force_writemask_all = true;
-
-   vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, dst, src_reg(header));
-   read->offset = base_offset;
-   read->mlen = 1;
-   read->base_mrf = -1;
-
-   if (first_component) {
-      /* Read into a temporary and copy with a swizzle and writemask. */
-      read->dst = retype(dst_reg(this, glsl_ivec4_type()), dst.type);
-      emit(MOV(dst, swizzle(src_reg(read->dst),
-                            BRW_SWZ_COMP_INPUT(first_component))));
-   }
-}
-
-void
-vec4_tcs_visitor::emit_urb_write(const src_reg &value,
-                                 unsigned writemask,
-                                 unsigned base_offset,
-                                 const src_reg &indirect_offset)
-{
-   if (writemask == 0)
-      return;
-
-   src_reg message(this, glsl_uvec4_type(), 2);
-   vec4_instruction *inst;
-
-   inst = emit(VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, dst_reg(message),
-               brw_imm_ud(writemask), indirect_offset);
-   inst->force_writemask_all = true;
-   inst = emit(MOV(byte_offset(dst_reg(retype(message, value.type)), REG_SIZE),
-                   value));
-   inst->force_writemask_all = true;
-
-   inst = emit(VEC4_TCS_OPCODE_URB_WRITE, dst_null_f(), message);
-   inst->offset = base_offset;
-   inst->mlen = 2;
-   inst->base_mrf = -1;
-}
-
-void
-vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
-{
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_invocation_id:
-      emit(MOV(get_nir_def(instr->def, BRW_REGISTER_TYPE_UD),
-               invocation_id));
-      break;
-   case nir_intrinsic_load_primitive_id:
-      emit(TCS_OPCODE_GET_PRIMITIVE_ID,
-           get_nir_def(instr->def, BRW_REGISTER_TYPE_UD));
-      break;
-   case nir_intrinsic_load_patch_vertices_in:
-      emit(MOV(get_nir_def(instr->def, BRW_REGISTER_TYPE_D),
-               brw_imm_d(key->input_vertices)));
-      break;
-   case nir_intrinsic_load_per_vertex_input: {
-      assert(instr->def.bit_size == 32);
-      src_reg indirect_offset = get_indirect_offset(instr);
-      unsigned imm_offset = nir_intrinsic_base(instr);
-
-      src_reg vertex_index = retype(get_nir_src_imm(instr->src[0]),
-                                    BRW_REGISTER_TYPE_UD);
-
-      unsigned first_component = nir_intrinsic_component(instr);
-      dst_reg dst = get_nir_def(instr->def, BRW_REGISTER_TYPE_D);
-      dst.writemask = brw_writemask_for_size(instr->num_components);
-      emit_input_urb_read(dst, vertex_index, imm_offset,
-                          first_component, indirect_offset);
-      break;
-   }
-   case nir_intrinsic_load_input:
-      unreachable("nir_lower_io should use load_per_vertex_input intrinsics");
-      break;
-   case nir_intrinsic_load_output:
-   case nir_intrinsic_load_per_vertex_output: {
-      src_reg indirect_offset = get_indirect_offset(instr);
-      unsigned imm_offset = nir_intrinsic_base(instr);
-
-      dst_reg dst = get_nir_def(instr->def, BRW_REGISTER_TYPE_D);
-      dst.writemask = brw_writemask_for_size(instr->num_components);
-
-      emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr),
-                           indirect_offset);
-      break;
-   }
-   case nir_intrinsic_store_output:
-   case nir_intrinsic_store_per_vertex_output: {
-      assert(nir_src_bit_size(instr->src[0]) == 32);
-      src_reg value = get_nir_src(instr->src[0]);
-      unsigned mask = nir_intrinsic_write_mask(instr);
-      unsigned swiz = BRW_SWIZZLE_XYZW;
-
-      src_reg indirect_offset = get_indirect_offset(instr);
-      unsigned imm_offset = nir_intrinsic_base(instr);
-
-      unsigned first_component = nir_intrinsic_component(instr);
-      if (first_component) {
-         assert(swiz == BRW_SWIZZLE_XYZW);
-         swiz = BRW_SWZ_COMP_OUTPUT(first_component);
-         mask = mask << first_component;
-      }
-
-      emit_urb_write(swizzle(value, swiz), mask,
-                     imm_offset, indirect_offset);
-      break;
-   }
-
-   case nir_intrinsic_barrier:
-      if (nir_intrinsic_memory_scope(instr) != SCOPE_NONE)
-         vec4_visitor::nir_emit_intrinsic(instr);
-      if (nir_intrinsic_execution_scope(instr) == SCOPE_WORKGROUP) {
-         dst_reg header = dst_reg(this, glsl_uvec4_type());
-         emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header);
-         emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header));
-      }
-      break;
-
-   default:
-      vec4_visitor::nir_emit_intrinsic(instr);
-   }
-}
-
-} /* namespace brw */
-
--- a/src/intel/compiler/brw_vec4_tcs.h
+++ b/src/intel/compiler/brw_vec4_tcs.h
@ -1,83 +0,0 @@
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_vec4_tcs.h
- *
- * The vec4-mode tessellation control shader compiler backend.
- */
-
-#ifndef BRW_VEC4_TCS_H
-#define BRW_VEC4_TCS_H
-
-#include "brw_compiler.h"
-#include "brw_eu.h"
-#include "brw_vec4.h"
-
-#ifdef __cplusplus
-namespace brw {
-
-class vec4_tcs_visitor : public vec4_visitor
-{
-public:
-   vec4_tcs_visitor(const struct brw_compiler *compiler,
-                    const struct brw_compile_params *params,
-                    const struct brw_tcs_prog_key *key,
-                    struct brw_tcs_prog_data *prog_data,
-                    const nir_shader *nir,
-                    bool debug_enabled);
-
-protected:
-   virtual void setup_payload();
-   virtual void emit_prolog();
-   virtual void emit_thread_end();
-
-   virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
-
-   void emit_input_urb_read(const dst_reg &dst,
-                            const src_reg &vertex_index,
-                            unsigned base_offset,
-                            unsigned first_component,
-                            const src_reg &indirect_offset);
-   void emit_output_urb_read(const dst_reg &dst,
-                             unsigned base_offset,
-                             unsigned first_component,
-                             const src_reg &indirect_offset);
-
-   void emit_urb_write(const src_reg &value, unsigned writemask,
-                       unsigned base_offset, const src_reg &indirect_offset);
-
-   /* we do not use the normal end-of-shader URB write mechanism -- but every
-    * vec4 stage must provide implementations of these:
-    */
-   virtual void emit_urb_write_header(int /* mrf */) {}
-   virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */) { return NULL; }
-
-   const struct brw_tcs_prog_key *key;
-   src_reg invocation_id;
-};
-
-} /* namespace brw */
-#endif /* __cplusplus */
-
-#endif /* BRW_VEC4_TCS_H */
--- a/src/intel/compiler/brw_vec4_tes.cpp
+++ b/src/intel/compiler/brw_vec4_tes.cpp
@ -1,223 +0,0 @@
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_vec4_tes.cpp
- *
- * Tessellaton evaluation shader specific code derived from the vec4_visitor class.
- */
-
-#include "brw_vec4_tes.h"
-#include "brw_cfg.h"
-#include "dev/intel_debug.h"
-
-namespace brw {
-
-vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
-                                   const struct brw_compile_params *params,
-                                  const struct brw_tes_prog_key *key,
-                                  struct brw_tes_prog_data *prog_data,
-                                  const nir_shader *shader,
-                                  bool debug_enabled)
-   : vec4_visitor(compiler, params, &key->base.tex, &prog_data->base,
-                  shader, false, debug_enabled)
-{
-}
-
-void
-vec4_tes_visitor::setup_payload()
-{
-   int reg = 0;
-
-   /* The payload always contains important data in r0 and r1, which contains
-    * the URB handles that are passed on to the URB write at the end
-    * of the thread.
-    */
-   reg += 2;
-
-   reg = setup_uniforms(reg);
-
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file != ATTR)
-            continue;
-
-         unsigned slot = inst->src[i].nr + inst->src[i].offset / 16;
-         struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2));
-         grf = stride(grf, 0, 4, 1);
-         grf.swizzle = inst->src[i].swizzle;
-         grf.type = inst->src[i].type;
-         grf.abs = inst->src[i].abs;
-         grf.negate = inst->src[i].negate;
-         inst->src[i] = grf;
-      }
-   }
-
-   reg += 8 * prog_data->urb_read_length;
-
-   this->first_non_payload_grf = reg;
-}
-
-
-void
-vec4_tes_visitor::emit_prolog()
-{
-   input_read_header = src_reg(this, glsl_uvec4_type());
-   emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
-
-   this->current_annotation = NULL;
-}
-
-
-void
-vec4_tes_visitor::emit_urb_write_header(int mrf)
-{
-   /* No need to do anything for DS; an implied write to this MRF will be
-    * performed by VEC4_VS_OPCODE_URB_WRITE.
-    */
-   (void) mrf;
-}
-
-
-vec4_instruction *
-vec4_tes_visitor::emit_urb_write_opcode(bool complete)
-{
-   vec4_instruction *inst = emit(VEC4_VS_OPCODE_URB_WRITE);
-   inst->urb_write_flags = complete ?
-      BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
-
-   return inst;
-}
-
-void
-vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
-{
-   const struct brw_tes_prog_data *tes_prog_data =
-      (const struct brw_tes_prog_data *) prog_data;
-
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_tess_coord:
-      /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
-      emit(MOV(get_nir_def(instr->def, BRW_REGISTER_TYPE_F),
-               src_reg(brw_vec8_grf(1, 0))));
-      break;
-   case nir_intrinsic_load_tess_level_outer:
-      if (tes_prog_data->domain == INTEL_TESS_DOMAIN_ISOLINE) {
-         emit(MOV(get_nir_def(instr->def, BRW_REGISTER_TYPE_F),
-                  swizzle(src_reg(ATTR, 1, glsl_vec4_type()),
-                          BRW_SWIZZLE_ZWZW)));
-      } else {
-         emit(MOV(get_nir_def(instr->def, BRW_REGISTER_TYPE_F),
-                  swizzle(src_reg(ATTR, 1, glsl_vec4_type()),
-                          BRW_SWIZZLE_WZYX)));
-      }
-      break;
-   case nir_intrinsic_load_tess_level_inner:
-      if (tes_prog_data->domain == INTEL_TESS_DOMAIN_QUAD) {
-         emit(MOV(get_nir_def(instr->def, BRW_REGISTER_TYPE_F),
-                  swizzle(src_reg(ATTR, 0, glsl_vec4_type()),
-                          BRW_SWIZZLE_WZYX)));
-      } else {
-         emit(MOV(get_nir_def(instr->def, BRW_REGISTER_TYPE_F),
-                  src_reg(ATTR, 1, glsl_float_type())));
-      }
-      break;
-   case nir_intrinsic_load_primitive_id:
-      emit(TES_OPCODE_GET_PRIMITIVE_ID,
-           get_nir_def(instr->def, BRW_REGISTER_TYPE_UD));
-      break;
-
-   case nir_intrinsic_load_input:
-   case nir_intrinsic_load_per_vertex_input: {
-      assert(instr->def.bit_size == 32);
-      src_reg indirect_offset = get_indirect_offset(instr);
-      unsigned imm_offset = instr->const_index[0];
-      src_reg header = input_read_header;
-      unsigned first_component = nir_intrinsic_component(instr);
-
-      if (indirect_offset.file != BAD_FILE) {
-         src_reg clamped_indirect_offset = src_reg(this, glsl_uvec4_type());
-
-         /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the
-          * valid range of the offset is [0, 0FFFFFFFh].
-          */
-         emit_minmax(BRW_CONDITIONAL_L,
-                     dst_reg(clamped_indirect_offset),
-                     retype(indirect_offset, BRW_REGISTER_TYPE_UD),
-                     brw_imm_ud(0x0fffffffu));
-
-         header = src_reg(this, glsl_uvec4_type());
-         emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
-              input_read_header, clamped_indirect_offset);
-      } else {
-         /* Arbitrarily only push up to 24 vec4 slots worth of data,
-          * which is 12 registers (since each holds 2 vec4 slots).
-          */
-         const unsigned max_push_slots = 24;
-         if (imm_offset < max_push_slots) {
-            src_reg src = src_reg(ATTR, imm_offset, glsl_ivec4_type());
-            src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
-
-            emit(MOV(get_nir_def(instr->def, BRW_REGISTER_TYPE_D), src));
-
-            prog_data->urb_read_length =
-               MAX2(prog_data->urb_read_length,
-                    DIV_ROUND_UP(imm_offset + 1, 2));
-            break;
-         }
-      }
-
-      dst_reg temp(this, glsl_ivec4_type());
-      vec4_instruction *read =
-         emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
-      read->offset = imm_offset;
-      read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
-
-      src_reg src = src_reg(temp);
-      src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
-
-      /* Copy to target.  We might end up with some funky writemasks landing
-       * in here, but we really don't want them in the above pseudo-ops.
-       */
-      dst_reg dst = get_nir_def(instr->def, BRW_REGISTER_TYPE_D);
-      dst.writemask = brw_writemask_for_size(instr->num_components);
-      emit(MOV(dst, src));
-      break;
-   }
-   default:
-      vec4_visitor::nir_emit_intrinsic(instr);
-   }
-}
-
-
-void
-vec4_tes_visitor::emit_thread_end()
-{
-   /* For DS, we always end the thread by emitting a single vertex.
-    * emit_urb_write_opcode() will take care of setting the eot flag on the
-    * SEND instruction.
-    */
-   emit_vertex();
-}
-
-} /* namespace brw */
--- a/src/intel/compiler/brw_vec4_tes.h
+++ b/src/intel/compiler/brw_vec4_tes.h
@ -1,65 +0,0 @@
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_vec4_tes.h
- *
- * The vec4 mode tessellation evaluation shader compiler backend.
- */
-
-#ifndef BRW_VEC4_TES_H
-#define BRW_VEC4_TES_H
-
-#include "brw_vec4.h"
-
-#ifdef __cplusplus
-namespace brw {
-
-class vec4_tes_visitor : public vec4_visitor
-{
-public:
-   vec4_tes_visitor(const struct brw_compiler *compiler,
-                    const struct brw_compile_params *params,
-                   const struct brw_tes_prog_key *key,
-                   struct brw_tes_prog_data *prog_data,
-                   const nir_shader *nir,
-                   bool debug_enabled);
-
-protected:
-   virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
-
-   virtual void setup_payload();
-   virtual void emit_prolog();
-   virtual void emit_thread_end();
-
-   virtual void emit_urb_write_header(int mrf);
-   virtual vec4_instruction *emit_urb_write_opcode(bool complete);
-
-private:
-   src_reg input_read_header;
-};
-
-} /* namespace brw */
-#endif /* __cplusplus */
-
-#endif /* BRW_VEC4_TES_H */
--- a/src/intel/compiler/brw_vec4_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_visitor.cpp
--- a/src/intel/compiler/brw_vec4_vs.h
+++ b/src/intel/compiler/brw_vec4_vs.h
@ -1,58 +0,0 @@
-/*
- * Copyright © 2006 - 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_VEC4_VS_VISITOR_H
-#define BRW_VEC4_VS_VISITOR_H
-
-#include "brw_vec4.h"
-
-namespace brw {
-
-class vec4_vs_visitor : public vec4_visitor
-{
-public:
-   vec4_vs_visitor(const struct brw_compiler *compiler,
-                   const struct brw_compile_params *params,
-                   const struct brw_vs_prog_key *key,
-                   struct brw_vs_prog_data *vs_prog_data,
-                   const nir_shader *shader,
-                   bool debug_enabled);
-
-protected:
-   virtual void setup_payload();
-   virtual void emit_prolog();
-   virtual void emit_thread_end();
-   virtual void emit_urb_write_header(int mrf);
-   virtual void emit_urb_slot(dst_reg reg, int varying);
-   virtual vec4_instruction *emit_urb_write_opcode(bool complete);
-
-private:
-   int setup_attributes(int payload_reg);
-
-   const struct brw_vs_prog_key *const key;
-   struct brw_vs_prog_data * const vs_prog_data;
-};
-
-} /* namespace brw */
-
-#endif /* BRW_VEC4_VS_VISITOR_H */
--- a/src/intel/compiler/brw_vec4_vs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_vs_visitor.cpp
@ -1,108 +0,0 @@
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-
-#include "brw_vec4_vs.h"
-#include "dev/intel_debug.h"
-
-namespace brw {
-
-void
-vec4_vs_visitor::emit_prolog()
-{
-}
-
-
-void
-vec4_vs_visitor::emit_urb_write_header(int mrf)
-{
-   /* No need to do anything for VS; an implied write to this MRF will be
-    * performed by VEC4_VS_OPCODE_URB_WRITE.
-    */
-   (void) mrf;
-}
-
-
-vec4_instruction *
-vec4_vs_visitor::emit_urb_write_opcode(bool complete)
-{
-   vec4_instruction *inst = emit(VEC4_VS_OPCODE_URB_WRITE);
-   inst->urb_write_flags = complete ?
-      BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
-
-   return inst;
-}
-
-
-void
-vec4_vs_visitor::emit_urb_slot(dst_reg reg, int varying)
-{
-   reg.type = BRW_REGISTER_TYPE_F;
-   output_reg[varying][0].type = reg.type;
-
-   switch (varying) {
-   case VARYING_SLOT_COL0:
-   case VARYING_SLOT_COL1:
-   case VARYING_SLOT_BFC0:
-   case VARYING_SLOT_BFC1: {
-      /* These built-in varyings are only supported in compatibility mode,
-       * and we only support GS in core profile.  So, this must be a vertex
-       * shader.
-       */
-      vec4_instruction *inst = emit_generic_urb_slot(reg, varying, 0);
-      if (inst && key->clamp_vertex_color)
-         inst->saturate = true;
-      break;
-   }
-   default:
-      return vec4_visitor::emit_urb_slot(reg, varying);
-   }
-}
-
-
-void
-vec4_vs_visitor::emit_thread_end()
-{
-   /* For VS, we always end the thread by emitting a single vertex.
-    * emit_urb_write_opcode() will take care of setting the eot flag on the
-    * SEND instruction.
-    */
-   emit_vertex();
-}
-
-
-vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
-                                 const struct brw_compile_params *params,
-                                 const struct brw_vs_prog_key *key,
-                                 struct brw_vs_prog_data *vs_prog_data,
-                                 const nir_shader *shader,
-                                 bool debug_enabled)
-   : vec4_visitor(compiler, params, &key->base.tex, &vs_prog_data->base,
-                  shader, false /* no_spills */, debug_enabled),
-     key(key),
-     vs_prog_data(vs_prog_data)
-{
-}
-
-
-} /* namespace brw */
--- a/src/intel/compiler/gfx6_gs_visitor.cpp
+++ b/src/intel/compiler/gfx6_gs_visitor.cpp
@ -1,702 +0,0 @@
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * This code is based on original work by Ilia Mirkin.
- */
-
-/**
- * \file gfx6_gs_visitor.cpp
- *
- * Gfx6 geometry shader implementation
- */
-
-#include "gfx6_gs_visitor.h"
-#include "brw_eu.h"
-#include "brw_prim.h"
-
-namespace brw {
-
-void
-gfx6_gs_visitor::emit_prolog()
-{
-   vec4_gs_visitor::emit_prolog();
-
-   /* Gfx6 geometry shaders require to allocate an initial VUE handle via
-    * FF_SYNC message, however the documentation remarks that only one thread
-    * can write to the URB simultaneously and the FF_SYNC message provides the
-    * synchronization mechanism for this, so using this message effectively
-    * stalls the thread until it is its turn to write to the URB. Because of
-    * this, the best way to implement geometry shader algorithms in gfx6 is to
-    * execute the algorithm before the FF_SYNC message to maximize parallelism.
-    *
-    * To achieve this we buffer the geometry shader outputs for each emitted
-    * vertex in vertex_output during operation. Then, when we have processed
-    * the last vertex (that is, at thread end time), we send the FF_SYNC
-    * message to allocate the initial VUE handle and write all buffered vertex
-    * data to the URB in one go.
-    *
-    * For each emitted vertex, vertex_output will hold vue_map.num_slots
-    * data items plus one additional item to hold required flags
-    * (PrimType, PrimStart, PrimEnd, as expected by the URB_WRITE message)
-    * which come right after the data items for that vertex. Vertex data and
-    * flags for the next vertex come right after the data items and flags for
-    * the previous vertex.
-    */
-   this->current_annotation = "gfx6 prolog";
-   this->vertex_output = src_reg(this,
-                                 glsl_uint_type(),
-                                 (prog_data->vue_map.num_slots + 1) *
-                                 nir->info.gs.vertices_out);
-   this->vertex_output_offset = src_reg(this, glsl_uint_type());
-   emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u)));
-
-   /* MRF 1 will be the header for all messages (FF_SYNC and URB_WRITES),
-    * so initialize it once to R0.
-    */
-   vec4_instruction *inst = emit(MOV(dst_reg(MRF, 1),
-                                     retype(brw_vec8_grf(0, 0),
-                                            BRW_REGISTER_TYPE_UD)));
-   inst->force_writemask_all = true;
-
-   /* This will be used as a temporary to store writeback data of FF_SYNC
-    * and URB_WRITE messages.
-    */
-   this->temp = src_reg(this, glsl_uint_type());
-
-   /* This will be used to know when we are processing the first vertex of
-    * a primitive. We will set this to URB_WRITE_PRIM_START only when we know
-    * that we are processing the first vertex in the primitive and to zero
-    * otherwise. This way we can use its value directly in the URB write
-    * headers.
-    */
-   this->first_vertex = src_reg(this, glsl_uint_type());
-   emit(MOV(dst_reg(this->first_vertex), brw_imm_ud(URB_WRITE_PRIM_START)));
-
-   /* The FF_SYNC message requires to know the number of primitives generated,
-    * so keep a counter for this.
-    */
-   this->prim_count = src_reg(this, glsl_uint_type());
-   emit(MOV(dst_reg(this->prim_count), brw_imm_ud(0u)));
-
-   if (gs_prog_data->num_transform_feedback_bindings) {
-      /* Create a virtual register to hold destination indices in SOL */
-      this->destination_indices = src_reg(this, glsl_uvec4_type());
-      /* Create a virtual register to hold number of written primitives */
-      this->sol_prim_written = src_reg(this, glsl_uint_type());
-      /* Create a virtual register to hold Streamed Vertex Buffer Indices */
-      this->svbi = src_reg(this, glsl_uvec4_type());
-      /* Create a virtual register to hold max values of SVBI */
-      this->max_svbi = src_reg(this, glsl_uvec4_type());
-      emit(MOV(dst_reg(this->max_svbi),
-               src_reg(retype(brw_vec1_grf(1, 4), BRW_REGISTER_TYPE_UD))));
-   }
-
-   /* PrimitveID is delivered in r0.1 of the thread payload. If the program
-    * needs it we have to move it to a separate register where we can map
-    * the attribute.
-    *
-    * Notice that we cannot use a virtual register for this, because we need to
-    * map all input attributes to hardware registers in setup_payload(),
-    * which happens before virtual registers are mapped to hardware registers.
-    * We could work around that issue if we were able to compute the first
-    * non-payload register here and move the PrimitiveID information to that
-    * register, but we can't because at this point we don't know the final
-    * number uniforms that will be included in the payload.
-    *
-    * So, what we do is to place PrimitiveID information in r1, which is always
-    * delivered as part of the payload, but its only populated with data
-    * relevant for transform feedback when we set GFX6_GS_SVBI_PAYLOAD_ENABLE
-    * in the 3DSTATE_GS state packet. That information can be obtained by other
-    * means though, so we can safely use r1 for this purpose.
-    */
-   if (gs_prog_data->include_primitive_id) {
-      this->primitive_id =
-         src_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
-      emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this->primitive_id));
-   }
-}
-
-void
-gfx6_gs_visitor::gs_emit_vertex(int stream_id)
-{
-   this->current_annotation = "gfx6 emit vertex";
-
-   /* Buffer all output slots for this vertex in vertex_output */
-   for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
-      int varying = prog_data->vue_map.slot_to_varying[slot];
-      if (varying != VARYING_SLOT_PSIZ) {
-         dst_reg dst(this->vertex_output);
-         dst.reladdr = ralloc(mem_ctx, src_reg);
-         memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-         emit_urb_slot(dst, varying);
-      } else {
-         /* The PSIZ slot can pack multiple varyings in different channels
-          * and emit_urb_slot() will produce a MOV instruction for each of
-          * them. Since we are writing to an array, that will translate to
-          * possibly multiple MOV instructions with an array destination and
-          * each will generate a scratch write with the same offset into
-          * scratch space (thus, each one overwriting the previous). This is
-          * not what we want. What we will do instead is emit PSIZ to a
-          * a regular temporary register, then move that register into the
-          * array. This way we only have one instruction with an array
-          * destination and we only produce a single scratch write.
-          */
-         dst_reg tmp = dst_reg(src_reg(this, glsl_uvec4_type()));
-         emit_urb_slot(tmp, varying);
-         dst_reg dst(this->vertex_output);
-         dst.reladdr = ralloc(mem_ctx, src_reg);
-         memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-         vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
-         inst->force_writemask_all = true;
-      }
-
-      emit(ADD(dst_reg(this->vertex_output_offset),
-               this->vertex_output_offset, brw_imm_ud(1u)));
-   }
-
-   /* Now buffer flags for this vertex */
-   dst_reg dst(this->vertex_output);
-   dst.reladdr = ralloc(mem_ctx, src_reg);
-   memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-   if (nir->info.gs.output_primitive == MESA_PRIM_POINTS) {
-      /* If we are outputting points, then every vertex has PrimStart and
-       * PrimEnd set.
-       */
-      emit(MOV(dst, brw_imm_d((_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
-                              URB_WRITE_PRIM_START | URB_WRITE_PRIM_END)));
-      emit(ADD(dst_reg(this->prim_count), this->prim_count, brw_imm_ud(1u)));
-   } else {
-      /* Otherwise, we can only set the PrimStart flag, which we have stored
-       * in the first_vertex register. We will have to wait until we execute
-       * EndPrimitive() or we end the thread to set the PrimEnd flag on a
-       * vertex.
-       */
-      emit(OR(dst, this->first_vertex,
-              brw_imm_ud(gs_prog_data->output_topology <<
-                         URB_WRITE_PRIM_TYPE_SHIFT)));
-      emit(MOV(dst_reg(this->first_vertex), brw_imm_ud(0u)));
-   }
-   emit(ADD(dst_reg(this->vertex_output_offset),
-            this->vertex_output_offset, brw_imm_ud(1u)));
-}
-
-void
-gfx6_gs_visitor::gs_end_primitive()
-{
-   this->current_annotation = "gfx6 end primitive";
-   /* Calling EndPrimitive() is optional for point output. In this case we set
-    * the PrimEnd flag when we process EmitVertex().
-    */
-   if (nir->info.gs.output_primitive == MESA_PRIM_POINTS)
-      return;
-
-   /* Otherwise we know that the last vertex we have processed was the last
-    * vertex in the primitive and we need to set its PrimEnd flag, so do this
-    * unless we haven't emitted that vertex at all (vertex_count != 0).
-    *
-    * Notice that we have already incremented vertex_count when we processed
-    * the last emit_vertex, so we need to take that into account in the
-    * comparison below (hence the num_output_vertices + 1 in the comparison
-    * below).
-    */
-   unsigned num_output_vertices = nir->info.gs.vertices_out;
-   emit(CMP(dst_null_ud(), this->vertex_count,
-            brw_imm_ud(num_output_vertices + 1), BRW_CONDITIONAL_L));
-   vec4_instruction *inst = emit(CMP(dst_null_ud(),
-                                     this->vertex_count, brw_imm_ud(0u),
-                                     BRW_CONDITIONAL_NEQ));
-   inst->predicate = BRW_PREDICATE_NORMAL;
-   emit(IF(BRW_PREDICATE_NORMAL));
-   {
-      /* vertex_output_offset is already pointing at the first entry of the
-       * next vertex. So subtract 1 to modify the flags for the previous
-       * vertex.
-       */
-      src_reg offset(this, glsl_uint_type());
-      emit(ADD(dst_reg(offset), this->vertex_output_offset, brw_imm_d(-1)));
-
-      src_reg dst(this->vertex_output);
-      dst.reladdr = ralloc(mem_ctx, src_reg);
-      memcpy(dst.reladdr, &offset, sizeof(src_reg));
-
-      emit(OR(dst_reg(dst), dst, brw_imm_d(URB_WRITE_PRIM_END)));
-      emit(ADD(dst_reg(this->prim_count), this->prim_count, brw_imm_ud(1u)));
-
-      /* Set the first vertex flag to indicate that the next vertex will start
-       * a primitive.
-       */
-      emit(MOV(dst_reg(this->first_vertex), brw_imm_d(URB_WRITE_PRIM_START)));
-   }
-   emit(BRW_OPCODE_ENDIF);
-}
-
-void
-gfx6_gs_visitor::emit_urb_write_header(int mrf)
-{
-   this->current_annotation = "gfx6 urb header";
-   /* Compute offset of the flags for the current vertex in vertex_output and
-    * write them in dw2 of the message header.
-    *
-    * Notice that by the time that emit_thread_end() calls here
-    * vertex_output_offset should point to the first data item of the current
-    * vertex in vertex_output, thus we only need to add the number of output
-    * slots per vertex to that offset to obtain the flags data offset.
-    */
-   src_reg flags_offset(this, glsl_uint_type());
-   emit(ADD(dst_reg(flags_offset),
-            this->vertex_output_offset,
-            brw_imm_d(prog_data->vue_map.num_slots)));
-
-   src_reg flags_data(this->vertex_output);
-   flags_data.reladdr = ralloc(mem_ctx, src_reg);
-   memcpy(flags_data.reladdr, &flags_offset, sizeof(src_reg));
-
-   emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, mrf), flags_data);
-}
-
-static unsigned
-align_interleaved_urb_mlen(unsigned mlen)
-{
-   /* URB data written (does not include the message header reg) must
-    * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
-    * section 5.4.3.2.2: URB_INTERLEAVED.
-    */
-   if ((mlen % 2) != 1)
-      mlen++;
-   return mlen;
-}
-
-void
-gfx6_gs_visitor::emit_snb_gs_urb_write_opcode(bool complete, int base_mrf,
-                                              int last_mrf, int urb_offset)
-{
-   vec4_instruction *inst = NULL;
-
-   if (!complete) {
-      /* If the vertex is not complete we don't have to do anything special */
-      inst = emit(VEC4_GS_OPCODE_URB_WRITE);
-      inst->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
-   } else {
-      /* Otherwise we always request to allocate a new VUE handle. If this is
-       * the last write before the EOT message and the new handle never gets
-       * used it will be dereferenced when we send the EOT message. This is
-       * necessary to avoid different setups for the EOT message (one for the
-       * case when there is no output and another for the case when there is)
-       * which would require to end the program with an IF/ELSE/ENDIF block,
-       * something we do not want.
-       */
-      inst = emit(VEC4_GS_OPCODE_URB_WRITE_ALLOCATE);
-      inst->urb_write_flags = BRW_URB_WRITE_COMPLETE;
-      inst->dst = dst_reg(MRF, base_mrf);
-      inst->src[0] = this->temp;
-   }
-
-   inst->base_mrf = base_mrf;
-   inst->mlen = align_interleaved_urb_mlen(last_mrf - base_mrf);
-   inst->offset = urb_offset;
-}
-
-void
-gfx6_gs_visitor::emit_thread_end()
-{
-   /* Make sure the current primitive is ended: we know it is not ended when
-    * first_vertex is not zero. This is only relevant for outputs other than
-    * points because in the point case we set PrimEnd on all vertices.
-    */
-   if (nir->info.gs.output_primitive != MESA_PRIM_POINTS) {
-      emit(CMP(dst_null_ud(), this->first_vertex, brw_imm_ud(0u), BRW_CONDITIONAL_Z));
-      emit(IF(BRW_PREDICATE_NORMAL));
-      gs_end_primitive();
-      emit(BRW_OPCODE_ENDIF);
-   }
-
-   /* Here we have to:
-    * 1) Emit an FF_SYNC message to obtain an initial VUE handle.
-    * 2) Loop over all buffered vertex data and write it to corresponding
-    *    URB entries.
-    * 3) Allocate new VUE handles for all vertices other than the first.
-    * 4) Send a final EOT message.
-    */
-
-   /* MRF 0 is reserved for the debugger, so start with message header
-    * in MRF 1.
-    */
-   int base_mrf = 1;
-
-   /* In the process of generating our URB write message contents, we
-    * may need to unspill a register or load from an array.  Those
-    * reads would use MRFs 21..23
-    */
-   int max_usable_mrf = FIRST_SPILL_MRF(devinfo->ver);
-
-   /* Issue the FF_SYNC message and obtain the initial VUE handle. */
-   this->current_annotation = "gfx6 thread end: ff_sync";
-
-   vec4_instruction *inst = NULL;
-   if (gs_prog_data->num_transform_feedback_bindings) {
-      src_reg sol_temp(this, glsl_uvec4_type());
-      emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
-           dst_reg(this->svbi),
-           this->vertex_count,
-           this->prim_count,
-           sol_temp);
-      inst = emit(GS_OPCODE_FF_SYNC,
-                  dst_reg(this->temp), this->prim_count, this->svbi);
-   } else {
-      inst = emit(GS_OPCODE_FF_SYNC,
-                  dst_reg(this->temp), this->prim_count, brw_imm_ud(0u));
-   }
-   inst->base_mrf = base_mrf;
-
-   emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_G));
-   emit(IF(BRW_PREDICATE_NORMAL));
-   {
-      /* Loop over all buffered vertices and emit URB write messages */
-      this->current_annotation = "gfx6 thread end: urb writes init";
-      src_reg vertex(this, glsl_uint_type());
-      emit(MOV(dst_reg(vertex), brw_imm_ud(0u)));
-      emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u)));
-
-      this->current_annotation = "gfx6 thread end: urb writes";
-      emit(BRW_OPCODE_DO);
-      {
-         emit(CMP(dst_null_d(), vertex, this->vertex_count, BRW_CONDITIONAL_GE));
-         inst = emit(BRW_OPCODE_BREAK);
-         inst->predicate = BRW_PREDICATE_NORMAL;
-
-         /* First we prepare the message header */
-         emit_urb_write_header(base_mrf);
-
-         /* Then add vertex data to the message in interleaved fashion */
-         int slot = 0;
-         bool complete = false;
-         do {
-            int mrf = base_mrf + 1;
-
-            /* URB offset is in URB row increments, and each of our MRFs is half
-             * of one of those, since we're doing interleaved writes.
-             */
-            int urb_offset = slot / 2;
-
-            for (; slot < prog_data->vue_map.num_slots; ++slot) {
-               int varying = prog_data->vue_map.slot_to_varying[slot];
-               current_annotation = output_reg_annotation[varying];
-
-               /* Compute offset of this slot for the current vertex
-                * in vertex_output
-                */
-               src_reg data(this->vertex_output);
-               data.reladdr = ralloc(mem_ctx, src_reg);
-               memcpy(data.reladdr, &this->vertex_output_offset,
-                      sizeof(src_reg));
-
-               /* Copy this slot to the appropriate message register */
-               dst_reg reg = dst_reg(MRF, mrf);
-               reg.type = output_reg[varying][0].type;
-               data.type = reg.type;
-               inst = emit(MOV(reg, data));
-               inst->force_writemask_all = true;
-
-               mrf++;
-               emit(ADD(dst_reg(this->vertex_output_offset),
-                        this->vertex_output_offset, brw_imm_ud(1u)));
-
-               /* If this was max_usable_mrf, we can't fit anything more into
-                * this URB WRITE. Same if we reached the max. message length.
-                */
-               if (mrf > max_usable_mrf ||
-                   align_interleaved_urb_mlen(mrf - base_mrf + 1) > BRW_MAX_MSG_LENGTH) {
-                  slot++;
-                  break;
-               }
-            }
-
-            complete = slot >= prog_data->vue_map.num_slots;
-            emit_snb_gs_urb_write_opcode(complete, base_mrf, mrf, urb_offset);
-         } while (!complete);
-
-         /* Skip over the flags data item so that vertex_output_offset points
-          * to the first data item of the next vertex, so that we can start
-          * writing the next vertex.
-          */
-         emit(ADD(dst_reg(this->vertex_output_offset),
-                  this->vertex_output_offset, brw_imm_ud(1u)));
-
-         emit(ADD(dst_reg(vertex), vertex, brw_imm_ud(1u)));
-      }
-      emit(BRW_OPCODE_WHILE);
-
-      if (gs_prog_data->num_transform_feedback_bindings)
-         xfb_write();
-   }
-   emit(BRW_OPCODE_ENDIF);
-
-   /* Finally, emit EOT message.
-    *
-    * In gfx6 we need to end the thread differently depending on whether we have
-    * emitted at least one vertex or not. In case we did, the EOT message must
-    * always include the COMPLETE flag or else the GPU hangs. If we have not
-    * produced any output we can't use the COMPLETE flag.
-    *
-    * However, this would lead us to end the program with an ENDIF opcode,
-    * which we want to avoid, so what we do is that we always request a new
-    * VUE handle every time, even if GS produces no output.
-    * With this we make sure that whether we have emitted at least one vertex
-    * or none at all, we have to finish the thread without writing to the URB,
-    * which works for both cases by setting the COMPLETE and UNUSED flags in
-    * the EOT message.
-    */
-   this->current_annotation = "gfx6 thread end: EOT";
-
-   if (gs_prog_data->num_transform_feedback_bindings) {
-      /* When emitting EOT, set SONumPrimsWritten Increment Value. */
-      src_reg data(this, glsl_uint_type());
-      emit(AND(dst_reg(data), this->sol_prim_written, brw_imm_ud(0xffffu)));
-      emit(SHL(dst_reg(data), data, brw_imm_ud(16u)));
-      emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data);
-   }
-
-   inst = emit(GS_OPCODE_THREAD_END);
-   inst->urb_write_flags = BRW_URB_WRITE_COMPLETE | BRW_URB_WRITE_UNUSED;
-   inst->base_mrf = base_mrf;
-   inst->mlen = 1;
-}
-
-void
-gfx6_gs_visitor::setup_payload()
-{
-   int attribute_map[BRW_VARYING_SLOT_COUNT * MAX_GS_INPUT_VERTICES];
-
-   /* Attributes are going to be interleaved, so one register contains two
-    * attribute slots.
-    */
-   int attributes_per_reg = 2;
-
-   /* If a geometry shader tries to read from an input that wasn't written by
-    * the vertex shader, that produces undefined results, but it shouldn't
-    * crash anything.  So initialize attribute_map to zeros--that ensures that
-    * these undefined results are read from r0.
-    */
-   memset(attribute_map, 0, sizeof(attribute_map));
-
-   int reg = 0;
-
-   /* The payload always contains important data in r0. */
-   reg++;
-
-   /* r1 is always part of the payload and it holds information relevant
-    * for transform feedback when we set the GFX6_GS_SVBI_PAYLOAD_ENABLE bit in
-    * the 3DSTATE_GS packet. We will overwrite it with the PrimitiveID
-    * information (and move the original value to a virtual register if
-    * necessary).
-    */
-   if (gs_prog_data->include_primitive_id)
-      attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg;
-   reg++;
-
-   reg = setup_uniforms(reg);
-
-   reg = setup_varying_inputs(reg, attributes_per_reg);
-
-   this->first_non_payload_grf = reg;
-}
-
-void
-gfx6_gs_visitor::xfb_write()
-{
-   unsigned num_verts;
-
-   switch (gs_prog_data->output_topology) {
-   case _3DPRIM_POINTLIST:
-      num_verts = 1;
-      break;
-   case _3DPRIM_LINELIST:
-   case _3DPRIM_LINESTRIP:
-   case _3DPRIM_LINELOOP:
-      num_verts = 2;
-      break;
-   case _3DPRIM_TRILIST:
-   case _3DPRIM_TRIFAN:
-   case _3DPRIM_TRISTRIP:
-   case _3DPRIM_RECTLIST:
-      num_verts = 3;
-      break;
-   case _3DPRIM_QUADLIST:
-   case _3DPRIM_QUADSTRIP:
-   case _3DPRIM_POLYGON:
-      num_verts = 3;
-      break;
-   default:
-      unreachable("Unexpected primitive type in Gfx6 SOL program.");
-   }
-
-   this->current_annotation = "gfx6 thread end: svb writes init";
-
-   emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u)));
-   emit(MOV(dst_reg(this->sol_prim_written), brw_imm_ud(0u)));
-
-   /* Check that at least one primitive can be written
-    *
-    * Note: since we use the binding table to keep track of buffer offsets
-    * and stride, the GS doesn't need to keep track of a separate pointer
-    * into each buffer; it uses a single pointer which increments by 1 for
-    * each vertex.  So we use SVBI0 for this pointer, regardless of whether
-    * transform feedback is in interleaved or separate attribs mode.
-    */
-   src_reg sol_temp(this, glsl_uvec4_type());
-   emit(ADD(dst_reg(sol_temp), this->svbi, brw_imm_ud(num_verts)));
-
-   /* Compare SVBI calculated number with the maximum value, which is
-    * in R1.4 (previously saved in this->max_svbi) for gfx6.
-    */
-   emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
-   emit(IF(BRW_PREDICATE_NORMAL));
-   {
-      vec4_instruction *inst = emit(MOV(dst_reg(destination_indices),
-                                        brw_imm_vf4(brw_float_to_vf(0.0),
-                                                    brw_float_to_vf(1.0),
-                                                    brw_float_to_vf(2.0),
-                                                    brw_float_to_vf(0.0))));
-      inst->force_writemask_all = true;
-
-      emit(ADD(dst_reg(this->destination_indices),
-               this->destination_indices,
-               this->svbi));
-   }
-   emit(BRW_OPCODE_ENDIF);
-
-   /* Write transform feedback data for all processed vertices. */
-   for (int i = 0; i < (int)nir->info.gs.vertices_out; i++) {
-      emit(MOV(dst_reg(sol_temp), brw_imm_d(i)));
-      emit(CMP(dst_null_d(), sol_temp, this->vertex_count,
-               BRW_CONDITIONAL_L));
-      emit(IF(BRW_PREDICATE_NORMAL));
-      {
-         xfb_program(i, num_verts);
-      }
-      emit(BRW_OPCODE_ENDIF);
-   }
-}
-
-void
-gfx6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
-{
-   unsigned binding;
-   unsigned num_bindings = gs_prog_data->num_transform_feedback_bindings;
-   src_reg sol_temp(this, glsl_uvec4_type());
-
-   /* Check for buffer overflow: we need room to write the complete primitive
-    * (all vertices). Otherwise, avoid writing any vertices for it
-    */
-   emit(ADD(dst_reg(sol_temp), this->sol_prim_written, brw_imm_ud(1u)));
-   emit(MUL(dst_reg(sol_temp), sol_temp, brw_imm_ud(num_verts)));
-   emit(ADD(dst_reg(sol_temp), sol_temp, this->svbi));
-   emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
-   emit(IF(BRW_PREDICATE_NORMAL));
-   {
-      /* Avoid overwriting MRF 1 as it is used as URB write message header */
-      dst_reg mrf_reg(MRF, 2);
-
-      this->current_annotation = "gfx6: emit SOL vertex data";
-      /* For each vertex, generate code to output each varying using the
-       * appropriate binding table entry.
-       */
-      for (binding = 0; binding < num_bindings; ++binding) {
-         unsigned char varying =
-            gs_prog_data->transform_feedback_bindings[binding];
-
-         /* Set up the correct destination index for this vertex */
-         vec4_instruction *inst = emit(GS_OPCODE_SVB_SET_DST_INDEX,
-                                       mrf_reg,
-                                       this->destination_indices);
-         inst->sol_vertex = vertex % num_verts;
-
-         /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
-          *
-          *   "Prior to End of Thread with a URB_WRITE, the kernel must
-          *   ensure that all writes are complete by sending the final
-          *   write as a committed write."
-          */
-         bool final_write = binding == (unsigned) num_bindings - 1 &&
-                            inst->sol_vertex == num_verts - 1;
-
-         /* Compute offset of this varying for the current vertex
-          * in vertex_output
-          */
-         this->current_annotation = output_reg_annotation[varying];
-         src_reg data(this->vertex_output);
-         data.reladdr = ralloc(mem_ctx, src_reg);
-         int offset = get_vertex_output_offset_for_varying(vertex, varying);
-         emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_d(offset)));
-         memcpy(data.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-         data.type = output_reg[varying][0].type;
-         data.swizzle = gs_prog_data->transform_feedback_swizzles[binding];
-
-         /* Write data */
-         inst = emit(GS_OPCODE_SVB_WRITE, mrf_reg, data, sol_temp);
-         inst->sol_binding = binding;
-         inst->sol_final_write = final_write;
-
-         if (final_write) {
-            /* This is the last vertex of the primitive, then increment
-             * SO num primitive counter and destination indices.
-             */
-            emit(ADD(dst_reg(this->destination_indices),
-                     this->destination_indices,
-                     brw_imm_ud(num_verts)));
-            emit(ADD(dst_reg(this->sol_prim_written),
-                     this->sol_prim_written, brw_imm_ud(1u)));
-         }
-
-      }
-      this->current_annotation = NULL;
-   }
-   emit(BRW_OPCODE_ENDIF);
-}
-
-int
-gfx6_gs_visitor::get_vertex_output_offset_for_varying(int vertex, int varying)
-{
-   /* Find the output slot assigned to this varying.
-    *
-    * VARYING_SLOT_LAYER and VARYING_SLOT_VIEWPORT are packed in the same slot
-    * as VARYING_SLOT_PSIZ.
-    */
-   if (varying == VARYING_SLOT_LAYER || varying == VARYING_SLOT_VIEWPORT)
-      varying = VARYING_SLOT_PSIZ;
-   int slot = prog_data->vue_map.varying_to_slot[varying];
-
-   if (slot < 0) {
-      /* This varying does not exist in the VUE so we are not writing to it
-       * and its value is undefined. We still want to return a valid offset
-       * into vertex_output though, to prevent any out-of-bound accesses into
-       * the vertex_output array. Since the value for this varying is undefined
-       * we don't really care for the value we assign to it, so any offset
-       * within the limits of vertex_output will do.
-       */
-      slot = 0;
-   }
-
-   return vertex * (prog_data->vue_map.num_slots + 1) + slot;
-}
-
-} /* namespace brw */
--- a/src/intel/compiler/gfx6_gs_visitor.h
+++ b/src/intel/compiler/gfx6_gs_visitor.h
@ -1,84 +0,0 @@
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef GFX6_GS_VISITOR_H
-#define GFX6_GS_VISITOR_H
-
-#include "brw_vec4.h"
-#include "brw_vec4_gs_visitor.h"
-
-#ifdef __cplusplus
-
-namespace brw {
-
-class gfx6_gs_visitor : public vec4_gs_visitor
-{
-public:
-   gfx6_gs_visitor(const struct brw_compiler *comp,
-                   const struct brw_compile_params *params,
-                   struct brw_gs_compile *c,
-                   struct brw_gs_prog_data *prog_data,
-                   const nir_shader *shader,
-                   bool no_spills,
-                   bool debug_enabled) :
-      vec4_gs_visitor(comp, params, c, prog_data, shader, no_spills, debug_enabled)
-      {
-      }
-
-protected:
-   virtual void emit_prolog();
-   virtual void emit_thread_end();
-   virtual void gs_emit_vertex(int stream_id);
-   virtual void gs_end_primitive();
-   virtual void emit_urb_write_header(int mrf);
-   virtual void setup_payload();
-
-private:
-   void xfb_write();
-   void xfb_program(unsigned vertex, unsigned num_verts);
-   int get_vertex_output_offset_for_varying(int vertex, int varying);
-   void emit_snb_gs_urb_write_opcode(bool complete,
-                                     int base_mrf,
-                                     int last_mrf,
-                                     int urb_offset);
-
-   src_reg vertex_output;
-   src_reg vertex_output_offset;
-   src_reg temp;
-   src_reg first_vertex;
-   src_reg prim_count;
-   src_reg primitive_id;
-
-   /* Transform Feedback members */
-   src_reg sol_prim_written;
-   src_reg svbi;
-   src_reg max_svbi;
-   src_reg destination_indices;
-};
-
-} /* namespace brw */
-
-#endif /* __cplusplus */
-
-#endif /* GFX6_GS_VISITOR_H */
--- a/src/intel/compiler/meson.build
+++ b/src/intel/compiler/meson.build
@ -105,7 +105,6 @@ libintel_compiler_brw_files = files(
  'brw_ir_fs.h',
  'brw_ir_performance.h',
  'brw_ir_performance.cpp',
-  'brw_ir_vec4.h',
  'brw_isa_info.h',
  'brw_lower_logical_sends.cpp',
  'brw_mesh.cpp',
@ -137,33 +136,7 @@ libintel_compiler_brw_files = files(
  'brw_shader.cpp',
  'brw_shader.h',
  'brw_simd_selection.cpp',
-  'brw_vec4_builder.h',
-  'brw_vec4_cmod_propagation.cpp',
-  'brw_vec4_copy_propagation.cpp',
-  'brw_vec4.cpp',
-  'brw_vec4_cse.cpp',
-  'brw_vec4_dead_code_eliminate.cpp',
-  'brw_vec4_generator.cpp',
-  'brw_vec4_gs_visitor.cpp',
-  'brw_vec4_gs_visitor.h',
-  'brw_vec4.h',
-  'brw_vec4_live_variables.cpp',
-  'brw_vec4_live_variables.h',
-  'brw_vec4_nir.cpp',
-  'brw_vec4_gs_nir.cpp',
-  'brw_vec4_reg_allocate.cpp',
-  'brw_vec4_surface_builder.cpp',
-  'brw_vec4_surface_builder.h',
-  'brw_vec4_tcs.cpp',
-  'brw_vec4_tcs.h',
-  'brw_vec4_tes.cpp',
-  'brw_vec4_tes.h',
-  'brw_vec4_visitor.cpp',
-  'brw_vec4_vs_visitor.cpp',
-  'brw_vec4_vs.h',
  'brw_vue_map.c',
-  'gfx6_gs_visitor.cpp',
-  'gfx6_gs_visitor.h',
 )

 brw_device_sha1_gen_src = custom_target('brw_device_sha1_gen.c',
@ -236,10 +209,6 @@ if with_tests
        'test_fs_saturate_propagation.cpp',
        'test_fs_scoreboard.cpp',
        'test_simd_selection.cpp',
-        'test_vec4_cmod_propagation.cpp',
-        'test_vec4_copy_propagation.cpp',
-        'test_vec4_dead_code_eliminate.cpp',
-        'test_vec4_register_coalesce.cpp',
        'test_vf_float_conversions.cpp',
      ),
      ir_expression_operation_h,
--- a/src/intel/compiler/test_vec4_cmod_propagation.cpp
+++ b/src/intel/compiler/test_vec4_cmod_propagation.cpp
--- a/src/intel/compiler/test_vec4_copy_propagation.cpp
+++ b/src/intel/compiler/test_vec4_copy_propagation.cpp
@ -1,195 +0,0 @@
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <gtest/gtest.h>
-#include "brw_vec4.h"
-
-using namespace brw;
-
-class copy_propagation_vec4_test : public ::testing::Test {
-   virtual void SetUp();
-   virtual void TearDown();
-
-public:
-   struct brw_compiler *compiler;
-   struct brw_compile_params params;
-   struct intel_device_info *devinfo;
-   void *ctx;
-   struct gl_shader_program *shader_prog;
-   struct brw_vue_prog_data *prog_data;
-   vec4_visitor *v;
-};
-
-class copy_propagation_vec4_visitor : public vec4_visitor
-{
-public:
-   copy_propagation_vec4_visitor(struct brw_compiler *compiler,
-                                 struct brw_compile_params *params,
-                                 nir_shader *shader,
-                                 struct brw_vue_prog_data *prog_data)
-      : vec4_visitor(compiler, params, NULL, prog_data, shader,
-                     false /* no_spills */, false)
-   {
-      prog_data->dispatch_mode = INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT;
-   }
-
-protected:
-   virtual dst_reg *make_reg_for_system_value(int /* location */)
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void setup_payload()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_prolog()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_thread_end()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_urb_write_header(int /* mrf */)
-   {
-      unreachable("Not reached");
-   }
-
-   virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */)
-   {
-      unreachable("Not reached");
-   }
-};
-
-
-void copy_propagation_vec4_test::SetUp()
-{
-   ctx = ralloc_context(NULL);
-   compiler = rzalloc(ctx, struct brw_compiler);
-   devinfo = rzalloc(ctx, struct intel_device_info);
-   compiler->devinfo = devinfo;
-
-   params = {};
-   params.mem_ctx = ctx;
-
-   prog_data = ralloc(ctx, struct brw_vue_prog_data);
-   nir_shader *shader =
-      nir_shader_create(ctx, MESA_SHADER_VERTEX, NULL, NULL);
-
-   v = new copy_propagation_vec4_visitor(compiler, &params, shader, prog_data);
-
-   devinfo->ver = 4;
-   devinfo->verx10 = devinfo->ver * 10;
-}
-
-void copy_propagation_vec4_test::TearDown()
-{
-   delete v;
-   v = NULL;
-
-   ralloc_free(ctx);
-   ctx = NULL;
-}
-
-
-static void
-copy_propagation(vec4_visitor *v)
-{
-   const bool print = getenv("TEST_DEBUG");
-
-   if (print) {
-      fprintf(stderr, "instructions before:\n");
-      v->dump_instructions();
-   }
-
-   v->calculate_cfg();
-   v->opt_copy_propagation();
-
-   if (print) {
-      fprintf(stderr, "instructions after:\n");
-      v->dump_instructions();
-   }
-}
-
-TEST_F(copy_propagation_vec4_test, test_swizzle_swizzle)
-{
-   dst_reg a = dst_reg(v, glsl_vec4_type());
-   dst_reg b = dst_reg(v, glsl_vec4_type());
-   dst_reg c = dst_reg(v, glsl_vec4_type());
-
-   v->emit(v->ADD(a, src_reg(a), src_reg(a)));
-
-   v->emit(v->MOV(b, swizzle(src_reg(a), BRW_SWIZZLE4(BRW_SWIZZLE_Y,
-                                                      BRW_SWIZZLE_Z,
-                                                      BRW_SWIZZLE_W,
-                                                      BRW_SWIZZLE_X))));
-
-   vec4_instruction *test_mov =
-      v->MOV(c, swizzle(src_reg(b), BRW_SWIZZLE4(BRW_SWIZZLE_Y,
-                                                 BRW_SWIZZLE_Z,
-                                                 BRW_SWIZZLE_W,
-                                                 BRW_SWIZZLE_X)));
-   v->emit(test_mov);
-
-   copy_propagation(v);
-
-   EXPECT_EQ(test_mov->src[0].nr, a.nr);
-   EXPECT_EQ(test_mov->src[0].swizzle, BRW_SWIZZLE4(BRW_SWIZZLE_Z,
-                                                    BRW_SWIZZLE_W,
-                                                    BRW_SWIZZLE_X,
-                                                    BRW_SWIZZLE_Y));
-}
-
-TEST_F(copy_propagation_vec4_test, test_swizzle_writemask)
-{
-   dst_reg a = dst_reg(v, glsl_vec4_type());
-   dst_reg b = dst_reg(v, glsl_vec4_type());
-   dst_reg c = dst_reg(v, glsl_vec4_type());
-
-   v->emit(v->MOV(b, swizzle(src_reg(a), BRW_SWIZZLE4(BRW_SWIZZLE_X,
-                                                      BRW_SWIZZLE_Y,
-                                                      BRW_SWIZZLE_X,
-                                                      BRW_SWIZZLE_Z))));
-
-   v->emit(v->MOV(writemask(a, WRITEMASK_XYZ), brw_imm_f(1.0f)));
-
-   vec4_instruction *test_mov =
-      v->MOV(c, swizzle(src_reg(b), BRW_SWIZZLE4(BRW_SWIZZLE_W,
-                                                 BRW_SWIZZLE_W,
-                                                 BRW_SWIZZLE_W,
-                                                 BRW_SWIZZLE_W)));
-   v->emit(test_mov);
-
-   copy_propagation(v);
-
-   /* should not copy propagate */
-   EXPECT_EQ(test_mov->src[0].nr, b.nr);
-   EXPECT_EQ(test_mov->src[0].swizzle, BRW_SWIZZLE4(BRW_SWIZZLE_W,
-                                                    BRW_SWIZZLE_W,
-                                                    BRW_SWIZZLE_W,
-                                                    BRW_SWIZZLE_W));
-}
--- a/src/intel/compiler/test_vec4_dead_code_eliminate.cpp
+++ b/src/intel/compiler/test_vec4_dead_code_eliminate.cpp
@ -1,178 +0,0 @@
-/*
- * Copyright © 2018 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <gtest/gtest.h>
-#include "brw_vec4.h"
-
-using namespace brw;
-
-class dead_code_eliminate_vec4_test : public ::testing::Test {
-   virtual void SetUp();
-   virtual void TearDown();
-
-public:
-   struct brw_compiler *compiler;
-   struct brw_compile_params params;
-   struct intel_device_info *devinfo;
-   void *ctx;
-   struct gl_shader_program *shader_prog;
-   struct brw_vue_prog_data *prog_data;
-   vec4_visitor *v;
-};
-
-class dead_code_eliminate_vec4_visitor : public vec4_visitor
-{
-public:
-   dead_code_eliminate_vec4_visitor(struct brw_compiler *compiler,
-                                    struct brw_compile_params *params,
-                                 nir_shader *shader,
-                                 struct brw_vue_prog_data *prog_data)
-      : vec4_visitor(compiler, params, NULL, prog_data, shader,
-                     false /* no_spills */, false)
-   {
-      prog_data->dispatch_mode = INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT;
-   }
-
-protected:
-   virtual dst_reg *make_reg_for_system_value(int /* location */)
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void setup_payload()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_prolog()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_thread_end()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_urb_write_header(int /* mrf */)
-   {
-      unreachable("Not reached");
-   }
-
-   virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */)
-   {
-      unreachable("Not reached");
-   }
-};
-
-
-void dead_code_eliminate_vec4_test::SetUp()
-{
-   ctx = ralloc_context(NULL);
-   compiler = rzalloc(ctx, struct brw_compiler);
-   devinfo = rzalloc(ctx, struct intel_device_info);
-   compiler->devinfo = devinfo;
-
-   params = {};
-   params.mem_ctx = ctx;
-
-   prog_data = ralloc(ctx, struct brw_vue_prog_data);
-   nir_shader *shader =
-      nir_shader_create(ctx, MESA_SHADER_VERTEX, NULL, NULL);
-
-  v = new dead_code_eliminate_vec4_visitor(compiler, &params, shader, prog_data);
-
-   devinfo->ver = 4;
-   devinfo->verx10 = devinfo->ver * 10;
-}
-
-void dead_code_eliminate_vec4_test::TearDown()
-{
-   delete v;
-   v = NULL;
-
-   ralloc_free(ctx);
-   ctx = NULL;
-}
-
-static void
-dead_code_eliminate(vec4_visitor *v)
-{
-   const bool print = getenv("TEST_DEBUG");
-
-   if (print) {
-      fprintf(stderr, "instructions before:\n");
-      v->dump_instructions();
-   }
-
-   v->calculate_cfg();
-   v->dead_code_eliminate();
-
-   if (print) {
-      fprintf(stderr, "instructions after:\n");
-      v->dump_instructions();
-   }
-}
-
-TEST_F(dead_code_eliminate_vec4_test, some_dead_channels_all_flags_used)
-{
-   const vec4_builder bld = vec4_builder(v).at_end();
-   src_reg r1 = src_reg(v, glsl_vec4_type());
-   src_reg r2 = src_reg(v, glsl_vec4_type());
-   src_reg r3 = src_reg(v, glsl_vec4_type());
-   src_reg r4 = src_reg(v, glsl_vec4_type());
-   src_reg r5 = src_reg(v, glsl_vec4_type());
-   src_reg r6 = src_reg(v, glsl_vec4_type());
-
-   /* Sequence like the following should not be modified by DCE.
-    *
-    *     cmp.l.f0(8)     g4<1>F         g2<4,4,1>.wF   g1<4,4,1>.xF
-    *     mov(8)          g5<1>.xF       g4<4,4,1>.xF
-    *     (+f0.x) sel(8)  g6<1>UD        g3<4>UD        g6<4>UD
-    */
-   vec4_instruction *test_cmp =
-      bld.CMP(dst_reg(r4), r2, r1, BRW_CONDITIONAL_L);
-
-   test_cmp->src[0].swizzle = BRW_SWIZZLE_WWWW;
-   test_cmp->src[1].swizzle = BRW_SWIZZLE_XXXX;
-
-   vec4_instruction *test_mov =
-      bld.MOV(dst_reg(r5), r4);
-
-   test_mov->dst.writemask = WRITEMASK_X;
-   test_mov->src[0].swizzle = BRW_SWIZZLE_XXXX;
-
-   vec4_instruction *test_sel =
-      bld.SEL(dst_reg(r6), r3, r6);
-
-   set_predicate(BRW_PREDICATE_NORMAL, test_sel);
-
-   /* The scratch write is here just to make r5 and r6 be live so that the
-    * whole program doesn't get eliminated by DCE.
-    */
-   v->emit(v->SCRATCH_WRITE(dst_reg(r4), r6, r5));
-
-   dead_code_eliminate(v);
-
-   EXPECT_EQ(test_cmp->dst.writemask, WRITEMASK_XYZW);
-}
--- a/src/intel/compiler/test_vec4_register_coalesce.cpp
+++ b/src/intel/compiler/test_vec4_register_coalesce.cpp
@ -1,256 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <gtest/gtest.h>
-#include "brw_vec4.h"
-
-using namespace brw;
-
-#define register_coalesce(v) _register_coalesce(v, __func__)
-
-class register_coalesce_vec4_test : public ::testing::Test {
-   virtual void SetUp();
-   virtual void TearDown();
-
-public:
-   struct brw_compiler *compiler;
-   struct brw_compile_params params;
-   struct intel_device_info *devinfo;
-   void *ctx;
-   struct gl_shader_program *shader_prog;
-   struct brw_vue_prog_data *prog_data;
-   vec4_visitor *v;
-};
-
-
-class register_coalesce_vec4_visitor : public vec4_visitor
-{
-public:
-   register_coalesce_vec4_visitor(struct brw_compiler *compiler,
-                                  struct brw_compile_params *params,
-                                  nir_shader *shader,
-                                  struct brw_vue_prog_data *prog_data)
-      : vec4_visitor(compiler, params, NULL, prog_data, shader,
-                     false /* no_spills */, false)
-   {
-      prog_data->dispatch_mode = INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT;
-   }
-
-protected:
-   virtual dst_reg *make_reg_for_system_value(int /* location */)
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void setup_payload()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_prolog()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_thread_end()
-   {
-      unreachable("Not reached");
-   }
-
-   virtual void emit_urb_write_header(int /* mrf */)
-   {
-      unreachable("Not reached");
-   }
-
-   virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */)
-   {
-      unreachable("Not reached");
-   }
-};
-
-
-void register_coalesce_vec4_test::SetUp()
-{
-   ctx = ralloc_context(NULL);
-   compiler = rzalloc(ctx, struct brw_compiler);
-   devinfo = rzalloc(ctx, struct intel_device_info);
-   compiler->devinfo = devinfo;
-
-   prog_data = ralloc(ctx, struct brw_vue_prog_data);
-
-   params = {};
-   params.mem_ctx = ctx;
-
-   nir_shader *shader =
-      nir_shader_create(ctx, MESA_SHADER_VERTEX, NULL, NULL);
-
-   v = new register_coalesce_vec4_visitor(compiler, &params, shader, prog_data);
-
-   devinfo->ver = 4;
-   devinfo->verx10 = devinfo->ver * 10;
-}
-
-void register_coalesce_vec4_test::TearDown()
-{
-   delete v;
-   v = NULL;
-
-   ralloc_free(ctx);
-   ctx = NULL;
-}
-
-static void
-_register_coalesce(vec4_visitor *v, const char *func)
-{
-   const bool print = getenv("TEST_DEBUG");
-
-   if (print) {
-      printf("%s: instructions before:\n", func);
-      v->dump_instructions();
-   }
-
-   v->calculate_cfg();
-   v->opt_register_coalesce();
-
-   if (print) {
-      printf("%s: instructions after:\n", func);
-      v->dump_instructions();
-   }
-}
-
-TEST_F(register_coalesce_vec4_test, test_compute_to_mrf)
-{
-   src_reg something = src_reg(v, glsl_float_type());
-   dst_reg temp = dst_reg(v, glsl_float_type());
-   dst_reg init;
-
-   dst_reg m0 = dst_reg(MRF, 0);
-   m0.writemask = WRITEMASK_X;
-   m0.type = BRW_REGISTER_TYPE_F;
-
-   vec4_instruction *mul = v->emit(v->MUL(temp, something, brw_imm_f(1.0f)));
-   v->emit(v->MOV(m0, src_reg(temp)));
-
-   register_coalesce(v);
-
-   EXPECT_EQ(mul->dst.file, MRF);
-}
-
-
-TEST_F(register_coalesce_vec4_test, test_multiple_use)
-{
-   src_reg something = src_reg(v, glsl_float_type());
-   dst_reg temp = dst_reg(v, glsl_vec4_type());
-   dst_reg init;
-
-   dst_reg m0 = dst_reg(MRF, 0);
-   m0.writemask = WRITEMASK_X;
-   m0.type = BRW_REGISTER_TYPE_F;
-
-   dst_reg m1 = dst_reg(MRF, 1);
-   m1.writemask = WRITEMASK_XYZW;
-   m1.type = BRW_REGISTER_TYPE_F;
-
-   src_reg src = src_reg(temp);
-   vec4_instruction *mul = v->emit(v->MUL(temp, something, brw_imm_f(1.0f)));
-   src.swizzle = BRW_SWIZZLE_XXXX;
-   v->emit(v->MOV(m0, src));
-   src.swizzle = BRW_SWIZZLE_XYZW;
-   v->emit(v->MOV(m1, src));
-
-   register_coalesce(v);
-
-   EXPECT_NE(mul->dst.file, MRF);
-}
-
-TEST_F(register_coalesce_vec4_test, test_dp4_mrf)
-{
-   src_reg some_src_1 = src_reg(v, glsl_vec4_type());
-   src_reg some_src_2 = src_reg(v, glsl_vec4_type());
-   dst_reg init;
-
-   dst_reg m0 = dst_reg(MRF, 0);
-   m0.writemask = WRITEMASK_Y;
-   m0.type = BRW_REGISTER_TYPE_F;
-
-   dst_reg temp = dst_reg(v, glsl_float_type());
-
-   vec4_instruction *dp4 = v->emit(v->DP4(temp, some_src_1, some_src_2));
-   v->emit(v->MOV(m0, src_reg(temp)));
-
-   register_coalesce(v);
-
-   EXPECT_EQ(dp4->dst.file, MRF);
-   EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y);
-}
-
-TEST_F(register_coalesce_vec4_test, test_dp4_grf)
-{
-   src_reg some_src_1 = src_reg(v, glsl_vec4_type());
-   src_reg some_src_2 = src_reg(v, glsl_vec4_type());
-   dst_reg init;
-
-   dst_reg to = dst_reg(v, glsl_vec4_type());
-   dst_reg temp = dst_reg(v, glsl_float_type());
-
-   vec4_instruction *dp4 = v->emit(v->DP4(temp, some_src_1, some_src_2));
-   to.writemask = WRITEMASK_Y;
-   v->emit(v->MOV(to, src_reg(temp)));
-
-   /* if we don't do something with the result, the automatic dead code
-    * elimination will remove all our instructions.
-    */
-   src_reg src = src_reg(to);
-   src.negate = true;
-   v->emit(v->MOV(dst_reg(MRF, 0), src));
-
-   register_coalesce(v);
-
-   EXPECT_EQ(dp4->dst.nr, to.nr);
-   EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y);
-}
-
-TEST_F(register_coalesce_vec4_test, test_channel_mul_grf)
-{
-   src_reg some_src_1 = src_reg(v, glsl_vec4_type());
-   src_reg some_src_2 = src_reg(v, glsl_vec4_type());
-   dst_reg init;
-
-   dst_reg to = dst_reg(v, glsl_vec4_type());
-   dst_reg temp = dst_reg(v, glsl_float_type());
-
-   vec4_instruction *mul = v->emit(v->MUL(temp, some_src_1, some_src_2));
-   to.writemask = WRITEMASK_Y;
-   v->emit(v->MOV(to, src_reg(temp)));
-
-   /* if we don't do something with the result, the automatic dead code
-    * elimination will remove all our instructions.
-    */
-   src_reg src = src_reg(to);
-   src.negate = true;
-   v->emit(v->MOV(dst_reg(MRF, 0), src));
-
-   register_coalesce(v);
-
-   EXPECT_EQ(mul->dst.nr, to.nr);
-}