intel/elk: Remove a bunch of files that don't apply for Gfx8-

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27563>
2026-01-10 06:00:14 +01:00 · 2024-01-19 11:54:46 -08:00 · 2024-01-19 11:54:46 -08:00 · dcf29202d4
commit dcf29202d4
parent 06b553f02c
124 changed files with 0 additions and 17536 deletions
--- a/src/intel/compiler/elk/brw_fs.cpp
+++ b/src/intel/compiler/elk/brw_fs.cpp
@ -3490,8 +3490,6 @@ fs_visitor::emit_repclear_shader()
   calculate_cfg();

   this->first_non_payload_grf = payload().num_regs;
-
-   lower_scoreboard();
 }

 /**
@ -6823,8 +6821,6 @@ fs_visitor::allocate_registers(bool allow_spilling)
       */
      assert(prog_data->total_scratch < max_scratch_size);
   }
-
-   lower_scoreboard();
 }

 bool
--- a/src/intel/compiler/elk/brw_fs_scoreboard.cpp
+++ b/src/intel/compiler/elk/brw_fs_scoreboard.cpp
--- a/src/intel/compiler/elk/brw_kernel.c
+++ b/src/intel/compiler/elk/brw_kernel.c
@ -1,790 +0,0 @@
-/*
- * Copyright © 2020 Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_kernel.h"
-#include "brw_nir.h"
-#include "intel_nir.h"
-
-#include "intel_nir.h"
-#include "nir_clc_helpers.h"
-#include "compiler/nir/nir_builder.h"
-#include "compiler/spirv/nir_spirv.h"
-#include "dev/intel_debug.h"
-#include "util/u_atomic.h"
-#include "util/u_dynarray.h"
-
-static const nir_shader *
-load_clc_shader(struct brw_compiler *compiler, struct disk_cache *disk_cache,
-                const nir_shader_compiler_options *nir_options,
-                const struct spirv_to_nir_options *spirv_options)
-{
-   if (compiler->clc_shader)
-      return compiler->clc_shader;
-
-   nir_shader *nir =  nir_load_libclc_shader(64, disk_cache,
-                                             spirv_options, nir_options,
-                                             disk_cache != NULL);
-   if (nir == NULL)
-      return NULL;
-
-   const nir_shader *old_nir =
-      p_atomic_cmpxchg(&compiler->clc_shader, NULL, nir);
-   if (old_nir == NULL) {
-      /* We won the race */
-      ralloc_steal(compiler, nir);
-      return nir;
-   } else {
-      /* Someone else built the shader first */
-      ralloc_free(nir);
-      return old_nir;
-   }
-}
-
-static nir_builder
-builder_init_new_impl(nir_function *func)
-{
-   nir_function_impl *impl = nir_function_impl_create(func);
-   return nir_builder_at(nir_before_impl(impl));
-}
-
-static void
-implement_atomic_builtin(nir_function *func, nir_atomic_op atomic_op,
-                         enum glsl_base_type data_base_type,
-                         nir_variable_mode mode)
-{
-   nir_builder b = builder_init_new_impl(func);
-   const struct glsl_type *data_type = glsl_scalar_type(data_base_type);
-
-   unsigned p = 0;
-
-   nir_deref_instr *ret = NULL;
-   ret = nir_build_deref_cast(&b, nir_load_param(&b, p++),
-                              nir_var_function_temp, data_type, 0);
-
-   nir_intrinsic_op op = nir_intrinsic_deref_atomic;
-   nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b.shader, op);
-   nir_intrinsic_set_atomic_op(atomic, atomic_op);
-
-   for (unsigned i = 0; i < nir_intrinsic_infos[op].num_srcs; i++) {
-      nir_def *src = nir_load_param(&b, p++);
-      if (i == 0) {
-         /* The first source is our deref */
-         assert(nir_intrinsic_infos[op].src_components[i] == -1);
-         src = &nir_build_deref_cast(&b, src, mode, data_type, 0)->def;
-      }
-      atomic->src[i] = nir_src_for_ssa(src);
-   }
-
-   nir_def_init_for_type(&atomic->instr, &atomic->def, data_type);
-
-   nir_builder_instr_insert(&b, &atomic->instr);
-   nir_store_deref(&b, ret, &atomic->def, ~0);
-}
-
-static void
-implement_sub_group_ballot_builtin(nir_function *func)
-{
-   nir_builder b = builder_init_new_impl(func);
-   nir_deref_instr *ret =
-      nir_build_deref_cast(&b, nir_load_param(&b, 0),
-                           nir_var_function_temp, glsl_uint_type(), 0);
-   nir_def *cond = nir_load_param(&b, 1);
-
-   nir_intrinsic_instr *ballot =
-      nir_intrinsic_instr_create(b.shader, nir_intrinsic_ballot);
-   ballot->src[0] = nir_src_for_ssa(cond);
-   ballot->num_components = 1;
-   nir_def_init(&ballot->instr, &ballot->def, 1, 32);
-   nir_builder_instr_insert(&b, &ballot->instr);
-
-   nir_store_deref(&b, ret, &ballot->def, ~0);
-}
-
-static bool
-implement_intel_builtins(nir_shader *nir)
-{
-   bool progress = false;
-
-   nir_foreach_function(func, nir) {
-      if (strcmp(func->name, "_Z10atomic_minPU3AS1Vff") == 0) {
-         /* float atom_min(__global float volatile *p, float val) */
-         implement_atomic_builtin(func, nir_atomic_op_fmin,
-                                  GLSL_TYPE_FLOAT, nir_var_mem_global);
-         progress = true;
-      } else if (strcmp(func->name, "_Z10atomic_maxPU3AS1Vff") == 0) {
-         /* float atom_max(__global float volatile *p, float val) */
-         implement_atomic_builtin(func, nir_atomic_op_fmax,
-                                  GLSL_TYPE_FLOAT, nir_var_mem_global);
-         progress = true;
-      } else if (strcmp(func->name, "_Z10atomic_minPU3AS3Vff") == 0) {
-         /* float atomic_min(__shared float volatile *, float) */
-         implement_atomic_builtin(func, nir_atomic_op_fmin,
-                                  GLSL_TYPE_FLOAT, nir_var_mem_shared);
-         progress = true;
-      } else if (strcmp(func->name, "_Z10atomic_maxPU3AS3Vff") == 0) {
-         /* float atomic_max(__shared float volatile *, float) */
-         implement_atomic_builtin(func, nir_atomic_op_fmax,
-                                  GLSL_TYPE_FLOAT, nir_var_mem_shared);
-         progress = true;
-      } else if (strcmp(func->name, "intel_sub_group_ballot") == 0) {
-         implement_sub_group_ballot_builtin(func);
-         progress = true;
-      }
-   }
-
-   nir_shader_preserve_all_metadata(nir);
-
-   return progress;
-}
-
-static bool
-lower_kernel_intrinsics(nir_shader *nir)
-{
-   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
-
-   bool progress = false;
-
-   unsigned kernel_sysvals_start = 0;
-   unsigned kernel_arg_start = sizeof(struct brw_kernel_sysvals);
-   nir->num_uniforms += kernel_arg_start;
-
-   nir_builder b = nir_builder_create(impl);
-
-   nir_foreach_block(block, impl) {
-      nir_foreach_instr_safe(instr, block) {
-         if (instr->type != nir_instr_type_intrinsic)
-            continue;
-
-         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-         switch (intrin->intrinsic) {
-         case nir_intrinsic_load_kernel_input: {
-            b.cursor = nir_instr_remove(&intrin->instr);
-
-            nir_intrinsic_instr *load =
-               nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
-            load->num_components = intrin->num_components;
-            load->src[0] = nir_src_for_ssa(nir_u2u32(&b, intrin->src[0].ssa));
-            nir_intrinsic_set_base(load, kernel_arg_start);
-            nir_intrinsic_set_range(load, nir->num_uniforms);
-            nir_def_init(&load->instr, &load->def,
-                         intrin->def.num_components,
-                         intrin->def.bit_size);
-            nir_builder_instr_insert(&b, &load->instr);
-
-            nir_def_rewrite_uses(&intrin->def, &load->def);
-            progress = true;
-            break;
-         }
-
-         case nir_intrinsic_load_constant_base_ptr: {
-            b.cursor = nir_instr_remove(&intrin->instr);
-            nir_def *const_data_base_addr = nir_pack_64_2x32_split(&b,
-               nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
-               nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
-            nir_def_rewrite_uses(&intrin->def, const_data_base_addr);
-            progress = true;
-            break;
-         }
-
-         case nir_intrinsic_load_num_workgroups: {
-            b.cursor = nir_instr_remove(&intrin->instr);
-
-            nir_intrinsic_instr *load =
-               nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
-            load->num_components = 3;
-            load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
-            nir_intrinsic_set_base(load, kernel_sysvals_start +
-               offsetof(struct brw_kernel_sysvals, num_work_groups));
-            nir_intrinsic_set_range(load, 3 * 4);
-            nir_def_init(&load->instr, &load->def, 3, 32);
-            nir_builder_instr_insert(&b, &load->instr);
-            nir_def_rewrite_uses(&intrin->def, &load->def);
-            progress = true;
-            break;
-         }
-
-         default:
-            break;
-         }
-      }
-   }
-
-   if (progress) {
-      nir_metadata_preserve(impl, nir_metadata_block_index |
-                                  nir_metadata_dominance);
-   } else {
-      nir_metadata_preserve(impl, nir_metadata_all);
-   }
-
-   return progress;
-}
-
-bool
-brw_kernel_from_spirv(struct brw_compiler *compiler,
-                      struct disk_cache *disk_cache,
-                      struct brw_kernel *kernel,
-                      void *log_data, void *mem_ctx,
-                      const uint32_t *spirv, size_t spirv_size,
-                      const char *entrypoint_name,
-                      char **error_str)
-{
-   const struct intel_device_info *devinfo = compiler->devinfo;
-   const nir_shader_compiler_options *nir_options =
-      compiler->nir_options[MESA_SHADER_KERNEL];
-
-   struct spirv_to_nir_options spirv_options = {
-      .environment = NIR_SPIRV_OPENCL,
-      .caps = {
-         .address = true,
-         .float16 = devinfo->ver >= 8,
-         .float64 = devinfo->ver >= 8,
-         .groups = true,
-         .image_write_without_format = true,
-         .int8 = devinfo->ver >= 8,
-         .int16 = devinfo->ver >= 8,
-         .int64 = devinfo->ver >= 8,
-         .int64_atomics = devinfo->ver >= 9,
-         .kernel = true,
-         .linkage = true, /* We receive linked kernel from clc */
-         .float_controls = devinfo->ver >= 8,
-         .generic_pointers = true,
-         .storage_8bit = devinfo->ver >= 8,
-         .storage_16bit = devinfo->ver >= 8,
-         .subgroup_arithmetic = true,
-         .subgroup_basic = true,
-         .subgroup_ballot = true,
-         .subgroup_dispatch = true,
-         .subgroup_quad = true,
-         .subgroup_shuffle = true,
-         .subgroup_vote = true,
-
-         .intel_subgroup_shuffle = true,
-         .intel_subgroup_buffer_block_io = true,
-      },
-      .shared_addr_format = nir_address_format_62bit_generic,
-      .global_addr_format = nir_address_format_62bit_generic,
-      .temp_addr_format = nir_address_format_62bit_generic,
-      .constant_addr_format = nir_address_format_64bit_global,
-   };
-
-   spirv_options.clc_shader = load_clc_shader(compiler, disk_cache,
-                                              nir_options, &spirv_options);
-   if (spirv_options.clc_shader == NULL) {
-      fprintf(stderr, "ERROR: libclc shader missing."
-              " Consider installing the libclc package\n");
-      abort();
-   }
-
-   assert(spirv_size % 4 == 0);
-   nir_shader *nir =
-      spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
-                   entrypoint_name, &spirv_options, nir_options);
-   nir_validate_shader(nir, "after spirv_to_nir");
-   nir_validate_ssa_dominance(nir, "after spirv_to_nir");
-   ralloc_steal(mem_ctx, nir);
-   nir->info.name = ralloc_strdup(nir, entrypoint_name);
-
-   if (INTEL_DEBUG(DEBUG_CS)) {
-      /* Re-index SSA defs so we print more sensible numbers. */
-      nir_foreach_function_impl(impl, nir) {
-         nir_index_ssa_defs(impl);
-      }
-
-      fprintf(stderr, "NIR (from SPIR-V) for kernel\n");
-      nir_print_shader(nir, stderr);
-   }
-
-   NIR_PASS_V(nir, implement_intel_builtins);
-   NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader);
-
-   /* We have to lower away local constant initializers right before we
-    * inline functions.  That way they get properly initialized at the top
-    * of the function and not at the top of its caller.
-    */
-   NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
-   NIR_PASS_V(nir, nir_lower_returns);
-   NIR_PASS_V(nir, nir_inline_functions);
-   NIR_PASS_V(nir, nir_copy_prop);
-   NIR_PASS_V(nir, nir_opt_deref);
-
-   /* Pick off the single entrypoint that we want */
-   nir_remove_non_entrypoints(nir);
-
-   /* Now that we've deleted all but the main function, we can go ahead and
-    * lower the rest of the constant initializers.  We do this here so that
-    * nir_remove_dead_variables and split_per_member_structs below see the
-    * corresponding stores.
-    */
-   NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
-
-   /* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
-    * aligned and so it can just read/write them as vec4s.  This results in a
-    * LOT of vec4->vec3 casts on loads and stores.  One solution to this
-    * problem is to get rid of all vec3 variables.
-    */
-   NIR_PASS_V(nir, nir_lower_vec3_to_vec4,
-              nir_var_shader_temp | nir_var_function_temp |
-              nir_var_mem_shared | nir_var_mem_global|
-              nir_var_mem_constant);
-
-   /* We assign explicit types early so that the optimizer can take advantage
-    * of that information and hopefully get rid of some of our memcpys.
-    */
-   NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
-              nir_var_uniform |
-              nir_var_shader_temp | nir_var_function_temp |
-              nir_var_mem_shared | nir_var_mem_global,
-              glsl_get_cl_type_size_align);
-
-   struct brw_nir_compiler_opts opts = {};
-   brw_preprocess_nir(compiler, nir, &opts);
-
-   int max_arg_idx = -1;
-   nir_foreach_uniform_variable(var, nir) {
-      assert(var->data.location < 256);
-      max_arg_idx = MAX2(max_arg_idx, var->data.location);
-   }
-
-   kernel->args_size = nir->num_uniforms;
-   kernel->arg_count = max_arg_idx + 1;
-
-   /* No bindings */
-   struct brw_kernel_arg_desc *args =
-      rzalloc_array(mem_ctx, struct brw_kernel_arg_desc, kernel->arg_count);
-   kernel->args = args;
-
-   nir_foreach_uniform_variable(var, nir) {
-      struct brw_kernel_arg_desc arg_desc = {
-         .offset = var->data.driver_location,
-         .size = glsl_get_explicit_size(var->type, false),
-      };
-      assert(arg_desc.offset + arg_desc.size <= nir->num_uniforms);
-
-      assert(var->data.location >= 0);
-      args[var->data.location] = arg_desc;
-   }
-
-   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_all, NULL);
-
-   /* Lower again, this time after dead-variables to get more compact variable
-    * layouts.
-    */
-   nir->global_mem_size = 0;
-   nir->scratch_size = 0;
-   nir->info.shared_size = 0;
-   NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
-              nir_var_shader_temp | nir_var_function_temp |
-              nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant,
-              glsl_get_cl_type_size_align);
-   if (nir->constant_data_size > 0) {
-      assert(nir->constant_data == NULL);
-      nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
-      nir_gather_explicit_io_initializers(nir, nir->constant_data,
-                                          nir->constant_data_size,
-                                          nir_var_mem_constant);
-   }
-
-   if (INTEL_DEBUG(DEBUG_CS)) {
-      /* Re-index SSA defs so we print more sensible numbers. */
-      nir_foreach_function_impl(impl, nir) {
-         nir_index_ssa_defs(impl);
-      }
-
-      fprintf(stderr, "NIR (before I/O lowering) for kernel\n");
-      nir_print_shader(nir, stderr);
-   }
-
-   NIR_PASS_V(nir, nir_lower_memcpy);
-
-   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
-              nir_address_format_64bit_global);
-
-   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
-              nir_address_format_32bit_offset_as_64bit);
-
-   NIR_PASS_V(nir, nir_lower_explicit_io,
-              nir_var_shader_temp | nir_var_function_temp |
-              nir_var_mem_shared | nir_var_mem_global,
-              nir_address_format_62bit_generic);
-
-   NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);
-
-   NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
-   NIR_PASS_V(nir, lower_kernel_intrinsics);
-
-   struct brw_cs_prog_key key = { };
-
-   memset(&kernel->prog_data, 0, sizeof(kernel->prog_data));
-   kernel->prog_data.base.nr_params = DIV_ROUND_UP(nir->num_uniforms, 4);
-
-   struct brw_compile_cs_params params = {
-      .base = {
-         .nir = nir,
-         .stats = kernel->stats,
-         .log_data = log_data,
-         .mem_ctx = mem_ctx,
-      },
-      .key = &key,
-      .prog_data = &kernel->prog_data,
-   };
-
-   kernel->code = brw_compile_cs(compiler, &params);
-
-   if (error_str)
-      *error_str = params.base.error_str;
-
-   return kernel->code != NULL;
-}
-
-static nir_def *
-rebuild_value_from_store(struct util_dynarray *stores,
-                         nir_def *value, unsigned read_offset)
-{
-   unsigned read_size = value->num_components * value->bit_size / 8;
-
-   util_dynarray_foreach(stores, nir_intrinsic_instr *, _store) {
-      nir_intrinsic_instr *store = *_store;
-
-      unsigned write_offset = nir_src_as_uint(store->src[1]);
-      unsigned write_size = nir_src_num_components(store->src[0]) *
-                            nir_src_bit_size(store->src[0]) / 8;
-      if (write_offset <= read_offset &&
-          (write_offset + write_size) >= (read_offset + read_size)) {
-         assert(nir_block_dominates(store->instr.block, value->parent_instr->block));
-         assert(write_size == read_size);
-         return store->src[0].ssa;
-      }
-   }
-   unreachable("Matching scratch store not found");
-}
-
-/**
- * Remove temporary variables stored to scratch to be then reloaded
- * immediately. Remap the load to the store SSA value.
- *
- * This workaround is only meant to be applied to shaders in src/intel/shaders
- * were we know there should be no issue. More complex cases might not work
- * with this approach.
- */
-static bool
-nir_remove_llvm17_scratch(nir_shader *nir)
-{
-   struct util_dynarray scratch_stores;
-   void *mem_ctx = ralloc_context(NULL);
-
-   util_dynarray_init(&scratch_stores, mem_ctx);
-
-   nir_foreach_function_impl(func, nir) {
-      nir_foreach_block(block, func) {
-         nir_foreach_instr(instr, block) {
-            if (instr->type != nir_instr_type_intrinsic)
-               continue;
-
-            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-
-            if (intrin->intrinsic != nir_intrinsic_store_scratch)
-               continue;
-
-            nir_const_value *offset = nir_src_as_const_value(intrin->src[1]);
-            if (offset != NULL) {
-               util_dynarray_append(&scratch_stores, nir_intrinsic_instr *, intrin);
-            }
-         }
-      }
-   }
-
-   bool progress = false;
-   if (util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) > 0) {
-      nir_foreach_function_impl(func, nir) {
-         nir_foreach_block(block, func) {
-            nir_foreach_instr_safe(instr, block) {
-               if (instr->type != nir_instr_type_intrinsic)
-                  continue;
-
-               nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-
-               if (intrin->intrinsic != nir_intrinsic_load_scratch)
-                  continue;
-
-               nir_const_value *offset = nir_src_as_const_value(intrin->src[0]);
-               if (offset == NULL)
-                  continue;
-
-               nir_def_rewrite_uses(&intrin->def,
-                                    rebuild_value_from_store(
-                                       &scratch_stores, &intrin->def,
-                                       nir_src_as_uint(intrin->src[0])));
-               nir_instr_remove(instr);
-
-               progress = true;
-            }
-         }
-      }
-   }
-
-   util_dynarray_foreach(&scratch_stores, nir_intrinsic_instr *, _store) {
-      nir_intrinsic_instr *store = *_store;
-      nir_instr_remove(&store->instr);
-   }
-
-   /* Quick sanity check */
-   assert(util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) == 0 ||
-          progress);
-
-   ralloc_free(mem_ctx);
-
-   return progress;
-}
-
-static void
-cleanup_llvm17_scratch(nir_shader *nir)
-{
-   {
-      bool progress;
-      do {
-         progress = false;
-         NIR_PASS(progress, nir, nir_copy_prop);
-         NIR_PASS(progress, nir, nir_opt_dce);
-         NIR_PASS(progress, nir, nir_opt_constant_folding);
-         NIR_PASS(progress, nir, nir_opt_cse);
-         NIR_PASS(progress, nir, nir_opt_algebraic);
-      } while (progress);
-   }
-
-   nir_remove_llvm17_scratch(nir);
-
-   {
-      bool progress;
-      do {
-         progress = false;
-         NIR_PASS(progress, nir, nir_copy_prop);
-         NIR_PASS(progress, nir, nir_opt_dce);
-         NIR_PASS(progress, nir, nir_opt_constant_folding);
-         NIR_PASS(progress, nir, nir_opt_cse);
-         NIR_PASS(progress, nir, nir_opt_algebraic);
-      } while (progress);
-   }
-}
-
-nir_shader *
-brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size,
-                   bool llvm17_wa)
-{
-   struct spirv_to_nir_options spirv_options = {
-      .environment = NIR_SPIRV_OPENCL,
-      .caps = {
-         .address = true,
-         .groups = true,
-         .image_write_without_format = true,
-         .int8 = true,
-         .int16 = true,
-         .int64 = true,
-         .int64_atomics = true,
-         .kernel = true,
-         .linkage = true, /* We receive linked kernel from clc */
-         .float_controls = true,
-         .generic_pointers = true,
-         .storage_8bit = true,
-         .storage_16bit = true,
-         .subgroup_arithmetic = true,
-         .subgroup_basic = true,
-         .subgroup_ballot = true,
-         .subgroup_dispatch = true,
-         .subgroup_quad = true,
-         .subgroup_shuffle = true,
-         .subgroup_vote = true,
-
-         .intel_subgroup_shuffle = true,
-         .intel_subgroup_buffer_block_io = true,
-      },
-      .shared_addr_format = nir_address_format_62bit_generic,
-      .global_addr_format = nir_address_format_62bit_generic,
-      .temp_addr_format = nir_address_format_62bit_generic,
-      .constant_addr_format = nir_address_format_64bit_global,
-      .create_library = true,
-   };
-
-   assert(spirv_size % 4 == 0);
-   nir_shader *nir =
-      spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
-                   "library", &spirv_options, &brw_scalar_nir_options);
-   nir_validate_shader(nir, "after spirv_to_nir");
-   nir_validate_ssa_dominance(nir, "after spirv_to_nir");
-   ralloc_steal(mem_ctx, nir);
-   nir->info.name = ralloc_strdup(nir, "library");
-
-   if (INTEL_DEBUG(DEBUG_CS)) {
-      /* Re-index SSA defs so we print more sensible numbers. */
-      nir_foreach_function_impl(impl, nir) {
-         nir_index_ssa_defs(impl);
-      }
-
-      fprintf(stderr, "NIR (from SPIR-V) for kernel\n");
-      nir_print_shader(nir, stderr);
-   }
-
-   NIR_PASS_V(nir, implement_intel_builtins);
-   NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader);
-
-   /* We have to lower away local constant initializers right before we
-    * inline functions.  That way they get properly initialized at the top
-    * of the function and not at the top of its caller.
-    */
-   NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_shader_temp |
-                                                      nir_var_function_temp));
-   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
-              nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
-   {
-      bool progress;
-      do
-      {
-         progress = false;
-         NIR_PASS(progress, nir, nir_copy_prop);
-         NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
-         NIR_PASS(progress, nir, nir_opt_deref);
-         NIR_PASS(progress, nir, nir_opt_dce);
-         NIR_PASS(progress, nir, nir_opt_undef);
-         NIR_PASS(progress, nir, nir_opt_constant_folding);
-         NIR_PASS(progress, nir, nir_opt_cse);
-         NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
-         NIR_PASS(progress, nir, nir_opt_algebraic);
-      } while (progress);
-   }
-
-   NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
-   NIR_PASS_V(nir, nir_lower_returns);
-   NIR_PASS_V(nir, nir_inline_functions);
-
-   assert(nir->scratch_size == 0);
-   NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);
-
-   {
-      bool progress;
-      do
-      {
-         progress = false;
-         NIR_PASS(progress, nir, nir_copy_prop);
-         NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
-         NIR_PASS(progress, nir, nir_opt_deref);
-         NIR_PASS(progress, nir, nir_opt_dce);
-         NIR_PASS(progress, nir, nir_opt_undef);
-         NIR_PASS(progress, nir, nir_opt_constant_folding);
-         NIR_PASS(progress, nir, nir_opt_cse);
-         NIR_PASS(progress, nir, nir_split_var_copies);
-         NIR_PASS(progress, nir, nir_lower_var_copies);
-         NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
-         NIR_PASS(progress, nir, nir_opt_algebraic);
-         NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
-         NIR_PASS(progress, nir, nir_opt_dead_cf);
-         NIR_PASS(progress, nir, nir_opt_remove_phis);
-         NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
-         NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);
-         NIR_PASS(progress, nir, nir_opt_memcpy);
-      } while (progress);
-   }
-
-   NIR_PASS_V(nir, nir_scale_fdiv);
-
-   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
-              nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
-
-
-   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL);
-
-   nir->scratch_size = 0;
-   NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
-              nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp |
-              nir_var_mem_global | nir_var_mem_constant,
-              glsl_get_cl_type_size_align);
-
-   // Lower memcpy - needs to wait until types are sized
-   {
-      bool progress;
-      do {
-         progress = false;
-         NIR_PASS(progress, nir, nir_opt_memcpy);
-         NIR_PASS(progress, nir, nir_copy_prop);
-         NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
-         NIR_PASS(progress, nir, nir_opt_deref);
-         NIR_PASS(progress, nir, nir_opt_dce);
-         NIR_PASS(progress, nir, nir_split_var_copies);
-         NIR_PASS(progress, nir, nir_lower_var_copies);
-         NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
-         NIR_PASS(progress, nir, nir_opt_constant_folding);
-         NIR_PASS(progress, nir, nir_opt_cse);
-      } while (progress);
-   }
-   NIR_PASS_V(nir, nir_lower_memcpy);
-
-   NIR_PASS_V(nir, nir_lower_explicit_io,
-              nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp | nir_var_uniform,
-              nir_address_format_32bit_offset_as_64bit);
-
-   NIR_PASS_V(nir, nir_lower_system_values);
-
-   /* Hopefully we can drop this once lower_vars_to_ssa has improved to not
-    * lower everything to scratch.
-    */
-   if (llvm17_wa)
-      cleanup_llvm17_scratch(nir);
-
-   /* Lower again, this time after dead-variables to get more compact variable
-    * layouts.
-    */
-   nir->global_mem_size = 0;
-   nir->scratch_size = 0;
-   nir->info.shared_size = 0;
-   NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
-              nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant,
-              glsl_get_cl_type_size_align);
-   if (nir->constant_data_size > 0) {
-      assert(nir->constant_data == NULL);
-      nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
-      nir_gather_explicit_io_initializers(nir, nir->constant_data,
-                                          nir->constant_data_size,
-                                          nir_var_mem_constant);
-   }
-
-   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
-              nir_address_format_64bit_global);
-
-   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
-              nir_address_format_32bit_offset_as_64bit);
-
-   NIR_PASS_V(nir, nir_lower_explicit_io,
-              nir_var_shader_temp | nir_var_function_temp |
-              nir_var_mem_shared | nir_var_mem_global,
-              nir_address_format_62bit_generic);
-
-   if (INTEL_DEBUG(DEBUG_CS)) {
-      /* Re-index SSA defs so we print more sensible numbers. */
-      nir_foreach_function_impl(impl, nir) {
-         nir_index_ssa_defs(impl);
-      }
-
-      fprintf(stderr, "NIR (before I/O lowering) for kernel\n");
-      nir_print_shader(nir, stderr);
-   }
-
-   return nir;
-}
--- a/src/intel/compiler/elk/brw_kernel.h
+++ b/src/intel/compiler/elk/brw_kernel.h
@ -1,78 +0,0 @@
-/*
- * Copyright © 2020 Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_KERNEL_H
-#define BRW_KERNEL_H
-
-#include "brw_compiler.h"
-
-struct disk_cache;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** Software interface for system values in kernels
- *
- * These are intended to go at the start of the kernel argument buffer.
- */
-struct brw_kernel_sysvals {
-   uint32_t num_work_groups[3];
-   uint32_t pad[5];
-};
-
-struct brw_kernel_arg_desc {
-   uint16_t offset;
-   uint16_t size;
-};
-
-struct brw_kernel {
-   struct brw_cs_prog_data prog_data;
-
-   struct brw_compile_stats stats[3];
-
-   uint16_t args_size;
-   uint16_t arg_count;
-   const struct brw_kernel_arg_desc *args;
-
-   const void *code;
-};
-
-bool
-brw_kernel_from_spirv(struct brw_compiler *compiler,
-                      struct disk_cache *disk_cache,
-                      struct brw_kernel *kernel,
-                      void *log_data, void *mem_ctx,
-                      const uint32_t *spirv, size_t spirv_size,
-                      const char *entrypoint_name,
-                      char **error_str);
-
-nir_shader *
-brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size,
-                   bool llvm17_wa);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* BRW_KERNEL_H */
--- a/src/intel/compiler/elk/brw_mesh.cpp
+++ b/src/intel/compiler/elk/brw_mesh.cpp
--- a/src/intel/compiler/elk/brw_nir.c
+++ b/src/intel/compiler/elk/brw_nir.c
@ -23,7 +23,6 @@

 #include "intel_nir.h"
 #include "brw_nir.h"
-#include "brw_nir_rt.h"
 #include "brw_shader.h"
 #include "dev/intel_debug.h"
 #include "compiler/glsl_types.h"
@ -1770,15 +1769,6 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,

   OPT(nir_opt_dce);

-   /* The mesh stages require this pass to be called at the last minute,
-    * but if anything is done by it, it will also constant fold, and that
-    * undoes the work done by nir_trivialize_registers, so call it right
-    * before that one instead.
-    */
-   if (nir->info.stage == MESA_SHADER_MESH ||
-       nir->info.stage == MESA_SHADER_TASK)
-      brw_nir_adjust_payload(nir);
-
   nir_trivialize_registers(nir);

   /* This is the last pass we run before we start emitting stuff.  It
--- a/src/intel/compiler/elk/brw_nir_lower_cooperative_matrix.c
+++ b/src/intel/compiler/elk/brw_nir_lower_cooperative_matrix.c
@ -1,818 +0,0 @@
-/*
- * Copyright 2023 Intel Corporation
- * SPDX-License-Identifier: MIT
- */
-
-/**
- * \file brw_nir_lower_cooperative_matrix.c
- * Lower cooperative matrix to subgroup operations.
- *
- * All supported matrix types are assumed to have either 8 rows or 8
- * columns. The other dimension of the matrix is typically 8 times the number
- * of data elements that can be stored in a 32-bit dword. Matrix data is
- * indexed by a combination of an array element and a subgroup invocation ID.
- *
- * Two layouts for matrix data are used. In the first layout,
- * subgroupShuffle(slice[N], ...) accesses row N of the matrix. This will be
- * called row-major hereafter. In the other layout,
- * subgroupShuffle(slice[...], M) accesses column M of the matrix. This will
- * be called column-major hereafter. In cases where a single 32-bit value is
- * stored in each entry, these layouts are identical.
- *
- * The subtle difference arises when multiple values are packed into a single
- * 32-bit dword. If two 16-bit values are packed in a single 32-bit value in
- * column-major, subgroupShuffle(slice[0], 1) holds matrix entries m[1][1] and
- * m[2][1] (in m[row][column] notation). In row-major, that same shuffle holds
- * m[0][2] and m[0][3].
- *
- * There is an alternate way to think about the matrix layouts. Every matrix
- * size supported by the Intel driver is either Sx8 (e.g., 16x8 for float16 B
- * matrix) or Sx8T (e.g., 8x32 for int8 A matrix). The A matrix and B matrix
- * layouts are such that a single 8 dword register hold an entire row of the
- * matrix.
- *
- * Consider a matrix stored starting in register g32. In an A matrix, the
- * packed dwords of g32 contain only the data for a single row of the
- * matrix. g32 is row 0, g33 is row 1, etc. In a B matrix, the packed dwords
- * of g(32+N).X contain only the data for a single column of the
- * matrix. g[32:40].0 is column 0, g[32:40].1 is column 1, etc.
- *
- * This leads to some shenanigans in \c lower_cmat_load_store.
- *
- * In the common case, A, C, and result matrices are stored row major while B
- * matrices are stored column major. This arrangement facilitates efficient
- * dot product operations using DPAS or DP4A instructions.
- *
- * Future optimizations are possible when row and column major are
- * flipped. That is, efficient dot products are also possible when A, C, and
- * result matrices are column major while B is row major.
- */
-
-#include "brw_nir.h"
-
-struct lower_cmat_state {
-   nir_shader *shader;
-
-   struct hash_table *slice_coop_types;
-
-   struct hash_table *vars_to_slice;
-
-   unsigned subgroup_size;
-};
-
-static void
-print_coop_types(struct lower_cmat_state *state)
-{
-   fprintf(stderr, "--- Slices to Cooperative Matrix type table\n");
-   hash_table_foreach(state->slice_coop_types, e) {
-      nir_variable *var = (void *)e->key;
-      const struct glsl_type *t = e->data;
-      fprintf(stderr, "%p: %s -> %s\n", var, var->name, glsl_get_type_name(t));
-   }
-   fprintf(stderr, "\n\n");
-}
-
-static const struct glsl_type *
-get_coop_type_for_slice(struct lower_cmat_state *state, nir_deref_instr *deref)
-{
-   nir_variable *var = nir_deref_instr_get_variable(deref);
-   struct hash_entry *entry = _mesa_hash_table_search(state->slice_coop_types, var);
-
-   assert(entry != NULL);
-
-   return entry->data;
-}
-
-static bool
-lower_cmat_filter(const nir_instr *instr, const void *_state)
-{
-   if (instr->type == nir_instr_type_deref) {
-      nir_deref_instr *deref = nir_instr_as_deref(instr);
-      return glsl_type_is_cmat(deref->type);
-   }
-
-   if (instr->type != nir_instr_type_intrinsic)
-      return false;
-
-   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-   switch (intrin->intrinsic) {
-   case nir_intrinsic_cmat_construct:
-   case nir_intrinsic_cmat_load:
-   case nir_intrinsic_cmat_store:
-   case nir_intrinsic_cmat_length:
-   case nir_intrinsic_cmat_muladd:
-   case nir_intrinsic_cmat_unary_op:
-   case nir_intrinsic_cmat_binary_op:
-   case nir_intrinsic_cmat_scalar_op:
-   case nir_intrinsic_cmat_bitcast:
-   case nir_intrinsic_cmat_insert:
-   case nir_intrinsic_cmat_extract:
-   case nir_intrinsic_cmat_copy:
-      return true;
-
-   default:
-      return false;
-   }
-}
-
-/**
- * Get number of matrix elements packed in each component of the slice.
- */
-static unsigned
-get_packing_factor(const struct glsl_cmat_description desc,
-                   const struct glsl_type *slice_type)
-{
-   const struct glsl_type *slice_element_type = glsl_without_array(slice_type);
-
-   assert(!glsl_type_is_cmat(slice_type));
-
-   assert(glsl_get_bit_size(slice_element_type) >= glsl_base_type_get_bit_size(desc.element_type));
-   assert(glsl_get_bit_size(slice_element_type) % glsl_base_type_get_bit_size(desc.element_type) == 0);
-
-   return glsl_get_bit_size(slice_element_type) / glsl_base_type_get_bit_size(desc.element_type);
-}
-
-static const struct glsl_type *
-get_slice_type_from_desc(const struct lower_cmat_state *state,
-                         const struct glsl_cmat_description desc)
-{
-   enum glsl_base_type base_type;
-
-   /* Number of matrix elements stored by each subgroup invocation. If the
-    * data is packed, the slice size will be less than this.
-    */
-   const unsigned elements_per_invocation =
-      (desc.rows * desc.cols) / state->subgroup_size;
-
-   assert(elements_per_invocation > 0);
-
-   const unsigned element_bits = 32;
-   const unsigned bits = glsl_base_type_get_bit_size(desc.element_type);
-   unsigned packing_factor = MIN2(elements_per_invocation,
-                                  element_bits / bits);
-
-   /* Adjust the packing factor so that each row of the matrix fills and
-    * entire GRF.
-    *
-    * The in-register layout of B matrices is different, so those are handled
-    * more like column major (for row major matrices). See the file comment
-    * for more details.
-    */
-   const unsigned actual_cols = desc.use != GLSL_CMAT_USE_B ? desc.cols : desc.rows;
-   while ((actual_cols / packing_factor) < 8) {
-      assert(packing_factor > 1);
-      packing_factor /= 2;
-   }
-
-   switch (desc.element_type) {
-   case GLSL_TYPE_FLOAT:
-      base_type = GLSL_TYPE_FLOAT;
-      break;
-   case GLSL_TYPE_UINT:
-   case GLSL_TYPE_FLOAT16:
-   case GLSL_TYPE_UINT8:
-   case GLSL_TYPE_UINT16:
-      base_type = glsl_get_base_type(glsl_uintN_t_type(packing_factor * bits));
-      break;
-   case GLSL_TYPE_INT:
-   case GLSL_TYPE_INT8:
-   case GLSL_TYPE_INT16:
-      base_type = glsl_get_base_type(glsl_intN_t_type(packing_factor * bits));
-      break;
-   default:
-      unreachable("Invalid cooperative matrix element type.");
-   }
-
-   unsigned len = elements_per_invocation / packing_factor;
-
-   /* Supported matrix sizes are designed to fill either 4 or 8 SIMD8
-    * registers. That means:
-    *
-    *          4 regsiters   8 registers
-    * SIMD32     len = 1       len = 2
-    * SIMD16     len = 2       len = 4
-    * SIMD8      len = 4       len = 8
-    *
-    * If configurations are added that result in other values of len, at the
-    * very least this assertion will need to be updated. The only value of len
-    * that makes sense to add would be 16, and that would be a lot of
-    * registers.
-    */
-   assert(len == 1 || len == 2 || len == 4 || len == 8);
-
-   const struct glsl_type *slice_type = glsl_vector_type(base_type, len);
-
-   assert(packing_factor == get_packing_factor(desc, slice_type));
-
-   return slice_type;
-}
-
-static const struct glsl_type *
-get_slice_type(const struct lower_cmat_state *state,
-               const struct glsl_type *type)
-{
-   if (glsl_type_is_array(type)) {
-      const struct glsl_type *slice_type =
-         get_slice_type(state, glsl_get_array_element(type));
-
-      return glsl_array_type(slice_type, glsl_array_size(type), 0);
-   }
-
-   assert(glsl_type_is_cmat(type));
-
-   return get_slice_type_from_desc(state,
-                                   *glsl_get_cmat_description(type));
-}
-
-static nir_deref_instr *
-create_local_slice(struct lower_cmat_state *state, nir_builder *b,
-                   const struct glsl_type *mat_type, const char *name)
-{
-   const struct glsl_type *slice_type = get_slice_type(state, mat_type);
-   nir_variable *slice_var = nir_local_variable_create(b->impl, slice_type, name);
-   _mesa_hash_table_insert(state->slice_coop_types, slice_var, (void *)mat_type);
-   return nir_build_deref_var(b, slice_var);
-}
-
-static void
-lower_cmat_load_store(nir_builder *b, nir_intrinsic_instr *intrin,
-                      struct lower_cmat_state *state)
-{
-   const bool load = intrin->intrinsic == nir_intrinsic_cmat_load;
-   const unsigned mat_src = load ? 0 : 1;
-   const unsigned ptr_src = load ? 1 : 0;
-
-   nir_deref_instr *slice = nir_src_as_deref(intrin->src[mat_src]);
-   const struct glsl_type *mat_type = get_coop_type_for_slice(state, slice);
-   const struct glsl_cmat_description *desc = glsl_get_cmat_description(mat_type);
-
-   nir_def *results[NIR_MAX_VEC_COMPONENTS];
-   const unsigned num_components = glsl_get_vector_elements(slice->type);
-   const unsigned packing_factor = get_packing_factor(*desc, slice->type);
-
-   nir_deref_instr *pointer = nir_src_as_deref(intrin->src[ptr_src]);
-
-   if ((nir_intrinsic_matrix_layout(intrin) == GLSL_MATRIX_LAYOUT_ROW_MAJOR) ==
-       (desc->use != GLSL_CMAT_USE_B)) {
-      nir_def *stride = nir_udiv_imm(b, intrin->src[2].ssa, packing_factor);
-
-      const struct glsl_type *element_type =
-         glsl_scalar_type(glsl_get_base_type(slice->type));
-
-      pointer = nir_build_deref_cast(b, &pointer->def, pointer->modes,
-                                     element_type,
-                                     glsl_get_bit_size(element_type) / 8);
-
-      nir_def *invocation = nir_load_subgroup_invocation(b);
-      nir_def *base_offset;
-      nir_def *step;
-
-      if (desc->use != GLSL_CMAT_USE_B) {
-         base_offset = nir_iadd(b,
-                                nir_imul(b,
-                                         nir_udiv_imm(b, invocation, 8),
-                                         stride),
-                                nir_umod_imm(b, invocation, 8));
-
-         step = nir_imul_imm(b, stride, state->subgroup_size / 8);
-      } else {
-         base_offset = nir_iadd(b,
-                                nir_imul(b,
-                                         nir_umod_imm(b, invocation, 8),
-                                         stride),
-                                nir_udiv_imm(b, invocation, 8));
-
-         step = nir_imm_int(b, state->subgroup_size / 8);
-      }
-
-      for (unsigned i = 0; i < num_components; i++) {
-         nir_def *offset = nir_imul_imm(b, step, i);
-
-         nir_deref_instr *memory_deref =
-            nir_build_deref_ptr_as_array(b, pointer,
-                                         nir_i2iN(b,
-                                                  nir_iadd(b,
-                                                           base_offset,
-                                                           offset),
-                                                  pointer->def.bit_size));
-
-         if (load) {
-            results[i] = nir_load_deref(b, memory_deref);
-         } else {
-            nir_def *src = nir_channel(b, nir_load_deref(b, slice), i);
-            nir_store_deref(b, memory_deref, src, 0x1);
-         }
-      }
-   } else {
-      nir_def *stride = intrin->src[2].ssa;
-
-      const struct glsl_type *element_type = glsl_scalar_type(desc->element_type);
-      const unsigned element_bits = glsl_base_type_get_bit_size(desc->element_type);
-      const unsigned element_stride = element_bits / 8;
-
-      pointer = nir_build_deref_cast(b, &pointer->def, pointer->modes, element_type,
-                                     element_stride);
-
-      nir_def *invocation_div_8 = nir_udiv_imm(b, nir_load_subgroup_invocation(b), 8);
-      nir_def *invocation_mod_8 = nir_umod_imm(b, nir_load_subgroup_invocation(b), 8);
-
-      nir_def *packed_stride = nir_imul_imm(b, stride, packing_factor);
-
-      for (unsigned i = 0; i < num_components; i++) {
-         const unsigned i_offset = i * (state->subgroup_size / 8);
-         nir_def *v[4];
-
-         for (unsigned j = 0; j < packing_factor; j++) {
-            nir_def *j_offset = nir_imul_imm(b, stride, j);
-            nir_def *offset;
-
-            if (desc->use != GLSL_CMAT_USE_B) {
-               offset = nir_iadd(b,
-                                 nir_iadd(b,
-                                          nir_imul(b,
-                                                   invocation_mod_8,
-                                                   packed_stride),
-                                          invocation_div_8),
-                                 nir_iadd_imm(b, j_offset, i_offset));
-            } else {
-               offset = nir_iadd(b,
-                                 nir_iadd(b,
-                                          nir_imul(b,
-                                                   invocation_div_8,
-                                                   packed_stride),
-                                          invocation_mod_8),
-                                 nir_iadd(b,
-                                          nir_imul_imm(b,
-                                                       packed_stride,
-                                                       i_offset),
-                                          j_offset));
-            }
-
-            nir_deref_instr *memory_deref =
-               nir_build_deref_ptr_as_array(b, pointer,
-                                            nir_i2iN(b,
-                                                     offset,
-                                                     pointer->def.bit_size));
-
-            if (load) {
-               v[j] = nir_load_deref(b, memory_deref);
-            } else {
-               nir_def *src = nir_channel(b, nir_load_deref(b, slice), i);
-
-               nir_def *v =
-                  nir_channel(b, nir_unpack_bits(b, src, element_bits), j);
-
-               nir_store_deref(b, memory_deref, v, 0x1);
-            }
-         }
-
-         if (load) {
-            results[i] = nir_pack_bits(b, nir_vec(b, v, packing_factor),
-                                       packing_factor * element_bits);
-         }
-      }
-   }
-
-   if (load)
-      nir_store_deref(b, slice, nir_vec(b, results, num_components),
-                      nir_component_mask(num_components));
-}
-
-static void
-lower_cmat_unary_op(nir_builder *b, nir_intrinsic_instr *intrin,
-                    struct lower_cmat_state *state)
-{
-   nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
-   nir_deref_instr *src_slice = nir_src_as_deref(intrin->src[1]);
-   nir_def *results[NIR_MAX_VEC_COMPONENTS];
-   const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
-
-   const struct glsl_type *dst_mat_type =
-      get_coop_type_for_slice(state, dst_slice);
-   const struct glsl_type *src_mat_type =
-      get_coop_type_for_slice(state, src_slice);
-
-   const struct glsl_cmat_description dst_desc =
-      *glsl_get_cmat_description(dst_mat_type);
-
-   const struct glsl_cmat_description src_desc =
-      *glsl_get_cmat_description(src_mat_type);
-
-   const unsigned dst_bits = glsl_base_type_bit_size(dst_desc.element_type);
-   const unsigned src_bits = glsl_base_type_bit_size(src_desc.element_type);
-
-   /* The type of the returned slice may be different from the type of the
-    * input slice.
-    */
-   const unsigned dst_packing_factor =
-      get_packing_factor(dst_desc, dst_slice->type);
-
-   const unsigned src_packing_factor =
-      get_packing_factor(src_desc, src_slice->type);
-
-   const nir_op op = nir_intrinsic_alu_op(intrin);
-
-   /* There are three possible cases:
-    *
-    * 1. dst_packing_factor == src_packing_factor. This is the common case,
-    *    and handling it is straightforward.
-    *
-    * 2. dst_packing_factor > src_packing_factor. This occurs when converting a
-    *    float32_t matrix slice to a packed float16_t slice. Loop over the size
-    *    of the destination slice, but read multiple entries from the source
-    *    slice on each iteration.
-    *
-    * 3. dst_packing_factor < src_packing_factor. This occurs when converting a
-    *    packed int8_t matrix slice to an int32_t slice. Loop over the size of
-    *    the source slice, but write multiple entries to the destination slice
-    *    on each iteration.
-    *
-    * Handle all cases by iterating over the total (non-packed) number of
-    * elements in the slice. When dst_packing_factor values have been
-    * calculated, store them.
-    */
-   assert((dst_packing_factor * glsl_get_vector_elements(dst_slice->type)) ==
-          (src_packing_factor * glsl_get_vector_elements(src_slice->type)));
-
-   /* Stores at most dst_packing_factor partial results. */
-   nir_def *v[4];
-   assert(dst_packing_factor <= 4);
-
-   for (unsigned i = 0; i < num_components * dst_packing_factor; i++) {
-      const unsigned dst_chan_index = i % dst_packing_factor;
-      const unsigned src_chan_index = i % src_packing_factor;
-      const unsigned dst_index = i / dst_packing_factor;
-      const unsigned src_index = i / src_packing_factor;
-
-      nir_def *src =
-         nir_channel(b,
-                     nir_unpack_bits(b,
-                                     nir_channel(b,
-                                                 nir_load_deref(b, src_slice),
-                                                 src_index),
-                                     src_bits),
-                     src_chan_index);
-
-      v[dst_chan_index] = nir_build_alu1(b, op, src);
-
-      if (dst_chan_index == (dst_packing_factor - 1)) {
-         results[dst_index] =
-            nir_pack_bits(b, nir_vec(b, v, dst_packing_factor),
-                          dst_packing_factor * dst_bits);
-      }
-   }
-
-   nir_store_deref(b, dst_slice, nir_vec(b, results, num_components),
-                   nir_component_mask(num_components));
-}
-
-static void
-lower_cmat_binary_op(nir_builder *b, nir_intrinsic_instr *intrin,
-                     struct lower_cmat_state *state)
-{
-   nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
-   nir_deref_instr *src_a_slice = nir_src_as_deref(intrin->src[1]);
-   nir_deref_instr *src_b_slice = nir_src_as_deref(intrin->src[2]);
-
-   nir_def *src_a = nir_load_deref(b, src_a_slice);
-   nir_def *src_b = nir_load_deref(b, src_b_slice);
-   nir_def *results[NIR_MAX_VEC_COMPONENTS];
-   const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
-
-   const struct glsl_type *dst_mat_type = get_coop_type_for_slice(state, dst_slice);
-   ASSERTED const struct glsl_type *src_a_mat_type = get_coop_type_for_slice(state, src_a_slice);
-   ASSERTED const struct glsl_type *src_b_mat_type = get_coop_type_for_slice(state, src_b_slice);
-
-   const struct glsl_cmat_description desc =
-      *glsl_get_cmat_description(dst_mat_type);
-
-   assert(dst_mat_type == src_a_mat_type);
-   assert(dst_mat_type == src_b_mat_type);
-
-   const unsigned bits = glsl_base_type_bit_size(desc.element_type);
-   const unsigned packing_factor = get_packing_factor(desc, dst_slice->type);
-
-   for (unsigned i = 0; i < num_components; i++) {
-      nir_def *val_a = nir_channel(b, src_a, i);
-      nir_def *val_b = nir_channel(b, src_b, i);
-
-      results[i] =
-         nir_pack_bits(b, nir_build_alu2(b, nir_intrinsic_alu_op(intrin),
-                                         nir_unpack_bits(b, val_a, bits),
-                                         nir_unpack_bits(b, val_b, bits)),
-                       packing_factor * bits);
-   }
-
-   nir_store_deref(b, dst_slice, nir_vec(b, results, num_components),
-                   nir_component_mask(num_components));
-}
-
-static void
-lower_cmat_scalar_op(nir_builder *b, nir_intrinsic_instr *intrin,
-                     struct lower_cmat_state *state)
-{
-   nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
-   nir_deref_instr *src_slice = nir_src_as_deref(intrin->src[1]);
-   nir_def *scalar = intrin->src[2].ssa;
-
-   nir_def *src = nir_load_deref(b, src_slice);
-   nir_def *results[NIR_MAX_VEC_COMPONENTS];
-   const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
-
-   ASSERTED const struct glsl_type *dst_mat_type = get_coop_type_for_slice(state, dst_slice);
-   ASSERTED const struct glsl_type *src_mat_type = get_coop_type_for_slice(state, src_slice);
-   assert(dst_mat_type == src_mat_type);
-
-   const struct glsl_cmat_description desc =
-      *glsl_get_cmat_description(dst_mat_type);
-
-   const unsigned bits = glsl_base_type_bit_size(desc.element_type);
-   const unsigned packing_factor = get_packing_factor(desc, dst_slice->type);
-
-   for (unsigned i = 0; i < num_components; i++) {
-      nir_def *val = nir_channel(b, src, i);
-
-      results[i] =
-         nir_pack_bits(b, nir_build_alu2(b, nir_intrinsic_alu_op(intrin),
-                                         nir_unpack_bits(b, val, bits),
-                                         scalar),
-                       packing_factor * bits);
-   }
-
-   nir_store_deref(b, dst_slice, nir_vec(b, results, num_components),
-                   nir_component_mask(num_components));
-}
-
-static nir_deref_instr *
-lower_cmat_deref(nir_builder *b, nir_deref_instr *deref,
-                 struct lower_cmat_state *state)
-{
-   nir_deref_instr *parent = nir_deref_instr_parent(deref);
-   if (parent) {
-      assert(deref->deref_type == nir_deref_type_array);
-      parent = lower_cmat_deref(b, parent, state);
-      return nir_build_deref_array(b, parent, deref->arr.index.ssa);
-   } else {
-      assert(deref->deref_type == nir_deref_type_var);
-      assert(deref->var);
-      assert(glsl_type_is_cmat(glsl_without_array(deref->var->type)));
-
-      struct hash_entry *entry = _mesa_hash_table_search(state->vars_to_slice, deref->var);
-      assert(entry);
-      return nir_build_deref_var(b, (nir_variable *)entry->data);
-   }
-}
-
-static nir_def *
-lower_cmat_instr(nir_builder *b, nir_instr *instr, void *_state)
-{
-   struct lower_cmat_state *state = _state;
-
-   if (instr->type == nir_instr_type_deref) {
-      nir_deref_instr *deref = lower_cmat_deref(b, nir_instr_as_deref(instr), state);
-      return &deref->def;
-   }
-
-   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-   switch (intrin->intrinsic) {
-   case nir_intrinsic_cmat_load:
-   case nir_intrinsic_cmat_store:
-      lower_cmat_load_store(b, intrin, state);
-      return NIR_LOWER_INSTR_PROGRESS_REPLACE;
-
-   case nir_intrinsic_cmat_construct: {
-      nir_deref_instr *slice = nir_src_as_deref(intrin->src[0]);
-      nir_def *src = intrin->src[1].ssa;
-
-      const struct glsl_type *mat_type = get_coop_type_for_slice(state, slice);
-      const struct glsl_cmat_description desc =
-         *glsl_get_cmat_description(mat_type);
-      const unsigned packing_factor = get_packing_factor(desc, slice->type);
-
-      if (packing_factor > 1) {
-         src = nir_pack_bits(b, nir_replicate(b, src, packing_factor),
-                             packing_factor * glsl_base_type_get_bit_size(desc.element_type));
-      }
-
-      const unsigned num_components = glsl_get_vector_elements(slice->type);
-
-      nir_store_deref(b, slice, nir_replicate(b, src, num_components),
-                      nir_component_mask(num_components));
-      return NIR_LOWER_INSTR_PROGRESS_REPLACE;
-   }
-
-   case nir_intrinsic_cmat_unary_op:
-      lower_cmat_unary_op(b, intrin, state);
-      return NIR_LOWER_INSTR_PROGRESS_REPLACE;
-
-   case nir_intrinsic_cmat_binary_op:
-      lower_cmat_binary_op(b, intrin, state);
-      return NIR_LOWER_INSTR_PROGRESS_REPLACE;
-
-   case nir_intrinsic_cmat_scalar_op:
-      lower_cmat_scalar_op(b, intrin, state);
-      return NIR_LOWER_INSTR_PROGRESS_REPLACE;
-
-   case nir_intrinsic_cmat_length: {
-      const struct glsl_cmat_description desc = nir_intrinsic_cmat_desc(intrin);
-      const struct glsl_type *mat_type = glsl_cmat_type(&desc);
-      const struct glsl_type *slice_type = get_slice_type(state, mat_type);
-      return nir_imm_intN_t(b, (get_packing_factor(desc, slice_type) *
-                                glsl_get_vector_elements(slice_type)), 32);
-   }
-
-   case nir_intrinsic_cmat_muladd: {
-      nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
-      nir_deref_instr *A_slice = nir_src_as_deref(intrin->src[1]);
-      nir_deref_instr *B_slice = nir_src_as_deref(intrin->src[2]);
-      nir_deref_instr *accum_slice = nir_src_as_deref(intrin->src[3]);
-
-      const struct glsl_type *dst_mat_type = get_coop_type_for_slice(state, dst_slice);
-      const struct glsl_cmat_description dst_desc = *glsl_get_cmat_description(dst_mat_type);
-
-      const struct glsl_type *src_mat_type = get_coop_type_for_slice(state, A_slice);
-      const struct glsl_cmat_description src_desc = *glsl_get_cmat_description(src_mat_type);
-
-      const unsigned packing_factor = get_packing_factor(dst_desc, dst_slice->type);
-      const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
-
-      nir_def *result =
-         nir_dpas_intel(b,
-                        packing_factor * glsl_base_type_get_bit_size(dst_desc.element_type),
-                        nir_load_deref(b, A_slice),
-                        nir_load_deref(b, B_slice),
-                        nir_load_deref(b, accum_slice),
-                        .dest_type = nir_get_nir_type_for_glsl_base_type(dst_desc.element_type),
-                        .src_type = nir_get_nir_type_for_glsl_base_type(src_desc.element_type),
-                        .saturate = nir_intrinsic_saturate(intrin),
-                        .cmat_signed_mask = nir_intrinsic_cmat_signed_mask(intrin),
-                        .systolic_depth = 8,
-                        .repeat_count = 8);
-
-      nir_store_deref(b, dst_slice, result,
-                      nir_component_mask(num_components));
-
-      return NIR_LOWER_INSTR_PROGRESS_REPLACE;
-   }
-
-   case nir_intrinsic_cmat_bitcast: {
-      nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
-      nir_deref_instr *src_slice = nir_src_as_deref(intrin->src[1]);
-
-      const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
-
-      assert(glsl_get_vector_elements(src_slice->type) == num_components);
-
-      nir_store_deref(b, dst_slice, nir_load_deref(b, src_slice),
-                      nir_component_mask(num_components));
-      return NIR_LOWER_INSTR_PROGRESS_REPLACE;
-   }
-
-   case nir_intrinsic_cmat_copy:
-      nir_copy_deref(b,
-                     nir_src_as_deref(intrin->src[0]),
-                     nir_src_as_deref(intrin->src[1]));
-      return NIR_LOWER_INSTR_PROGRESS_REPLACE;
-
-   case nir_intrinsic_cmat_insert: {
-      nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
-      nir_def *scalar = intrin->src[1].ssa;
-      nir_deref_instr *src_slice = nir_src_as_deref(intrin->src[2]);
-      const nir_src dst_index = intrin->src[3];
-
-      const struct glsl_type *dst_mat_type = get_coop_type_for_slice(state, dst_slice);
-      ASSERTED const struct glsl_type *src_mat_type = get_coop_type_for_slice(state, src_slice);
-      assert(dst_mat_type == src_mat_type);
-
-      const struct glsl_cmat_description desc =
-         *glsl_get_cmat_description(dst_mat_type);
-
-      const unsigned bits = glsl_base_type_bit_size(desc.element_type);
-      const unsigned packing_factor = get_packing_factor(desc, dst_slice->type);
-      const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
-
-      nir_def *slice_index = nir_udiv_imm(b, dst_index.ssa, packing_factor);
-      nir_def *vector_index = nir_umod_imm(b, dst_index.ssa, packing_factor);
-      nir_def *results[NIR_MAX_VEC_COMPONENTS];
-
-      const int slice_constant_index = nir_src_is_const(dst_index)
-         ? nir_src_as_uint(dst_index) / packing_factor
-         : -1;
-
-      for (unsigned i = 0; i < num_components; i++) {
-         nir_def *val = nir_channel(b, nir_load_deref(b, src_slice), i);
-         nir_def *insert;
-
-         if (slice_constant_index < 0 || slice_constant_index == i) {
-            if (packing_factor == 1) {
-               insert = scalar;
-            } else {
-               nir_def *unpacked = nir_unpack_bits(b, val, bits);
-               nir_def *v = nir_vector_insert(b, unpacked, scalar, vector_index);
-
-               insert = nir_pack_bits(b, v, bits * packing_factor);
-            }
-         } else {
-            insert = val;
-         }
-
-         results[i] = slice_constant_index < 0
-            ? nir_bcsel(b, nir_ieq_imm(b, slice_index, i), insert, val)
-            : insert;
-      }
-
-      nir_store_deref(b, dst_slice, nir_vec(b, results, num_components),
-                      nir_component_mask(num_components));
-
-      return NIR_LOWER_INSTR_PROGRESS_REPLACE;
-   }
-
-   case nir_intrinsic_cmat_extract: {
-      nir_deref_instr *slice = nir_src_as_deref(intrin->src[0]);
-      const struct glsl_type *mat_type = get_coop_type_for_slice(state, slice);
-      nir_def *index = intrin->src[1].ssa;
-
-      const struct glsl_cmat_description desc =
-         *glsl_get_cmat_description(mat_type);
-
-      const unsigned bits = glsl_base_type_bit_size(desc.element_type);
-      const unsigned packing_factor = get_packing_factor(desc, slice->type);
-
-      nir_def *src =
-         nir_vector_extract(b, nir_load_deref(b, slice),
-                            nir_udiv_imm(b, index, packing_factor));
-
-      if (packing_factor == 1) {
-         return src;
-      } else {
-         return nir_vector_extract(b,
-                                   nir_unpack_bits(b, src, bits),
-                                   nir_umod_imm(b, index, packing_factor));
-      }
-
-      return NIR_LOWER_INSTR_PROGRESS_REPLACE;
-   }
-
-   default:
-      unreachable("invalid cooperative matrix intrinsic");
-   }
-}
-
-static void
-create_slice_var(struct lower_cmat_state *state, nir_variable *var,
-                 nir_function_impl *impl)
-{
-   // TODO: without array
-   const struct glsl_type *mat_type = glsl_without_array(var->type);
-
-   assert(glsl_type_is_cmat(mat_type));
-   assert((!impl && var->data.mode == nir_var_shader_temp) ||
-          ( impl && var->data.mode == nir_var_function_temp));
-
-   const struct glsl_type *slice_type = get_slice_type(state, var->type);
-   const char *slice_name = ralloc_asprintf(state->shader, "%s_slice", var->name);
-   nir_variable *slice_var = impl ?
-      nir_local_variable_create(impl, slice_type, slice_name) :
-      nir_variable_create(state->shader, var->data.mode, slice_type, slice_name);
-
-   _mesa_hash_table_insert(state->vars_to_slice, var, slice_var);
-   _mesa_hash_table_insert(state->slice_coop_types, slice_var, (void *)mat_type);
-}
-
-bool
-brw_nir_lower_cmat(nir_shader *shader, unsigned subgroup_size)
-{
-   void *temp_ctx = ralloc_context(NULL);
-
-   struct lower_cmat_state state = {
-      .shader = shader,
-      .slice_coop_types = _mesa_pointer_hash_table_create(temp_ctx),
-      .vars_to_slice = _mesa_pointer_hash_table_create(temp_ctx),
-      .subgroup_size = subgroup_size,
-   };
-
-   /* Create a slice array for each variable and add a map from the original
-    * variable back to it, so it can be reached during lowering.
-    *
-    * TODO: Cooperative matrix inside struct?
-    */
-   nir_foreach_variable_in_shader(var, shader) {
-      if (glsl_type_is_cmat(glsl_without_array(var->type)))
-         create_slice_var(&state, var, NULL);
-   }
-   nir_foreach_function(func, shader) {
-      nir_foreach_function_temp_variable(var, func->impl) {
-         if (glsl_type_is_cmat(glsl_without_array(var->type)))
-            create_slice_var(&state, var, func->impl);
-      }
-   }
-
-   bool progress = nir_shader_lower_instructions(shader,
-                                                 lower_cmat_filter,
-                                                 lower_cmat_instr,
-                                                 &state);
-
-   ralloc_free(temp_ctx);
-
-   return progress;
-}
--- a/src/intel/compiler/elk/brw_nir_lower_intersection_shader.c
+++ b/src/intel/compiler/elk/brw_nir_lower_intersection_shader.c
@ -1,273 +0,0 @@
-/*
- * Copyright (c) 2020 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_nir_rt.h"
-#include "brw_nir_rt_builder.h"
-
-static nir_function_impl *
-lower_any_hit_for_intersection(nir_shader *any_hit)
-{
-   nir_function_impl *impl = nir_shader_get_entrypoint(any_hit);
-
-   /* Any-hit shaders need three parameters */
-   assert(impl->function->num_params == 0);
-   nir_parameter params[] = {
-      {
-         /* A pointer to a boolean value for whether or not the hit was
-          * accepted.
-          */
-         .num_components = 1,
-         .bit_size = 32,
-      },
-      {
-         /* The hit T value */
-         .num_components = 1,
-         .bit_size = 32,
-      },
-      {
-         /* The hit kind */
-         .num_components = 1,
-         .bit_size = 32,
-      },
-   };
-   impl->function->num_params = ARRAY_SIZE(params);
-   impl->function->params =
-      ralloc_array(any_hit, nir_parameter, ARRAY_SIZE(params));
-   memcpy(impl->function->params, params, sizeof(params));
-
-   nir_builder build = nir_builder_at(nir_before_impl(impl));
-   nir_builder *b = &build;
-
-   nir_def *commit_ptr = nir_load_param(b, 0);
-   nir_def *hit_t = nir_load_param(b, 1);
-   nir_def *hit_kind = nir_load_param(b, 2);
-
-   nir_deref_instr *commit =
-      nir_build_deref_cast(b, commit_ptr, nir_var_function_temp,
-                           glsl_bool_type(), 0);
-
-   nir_foreach_block_safe(block, impl) {
-      nir_foreach_instr_safe(instr, block) {
-         switch (instr->type) {
-         case nir_instr_type_intrinsic: {
-            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-            switch (intrin->intrinsic) {
-            case nir_intrinsic_ignore_ray_intersection:
-               b->cursor = nir_instr_remove(&intrin->instr);
-               /* We put the newly emitted code inside a dummy if because it's
-                * going to contain a jump instruction and we don't want to
-                * deal with that mess here.  It'll get dealt with by our
-                * control-flow optimization passes.
-                */
-               nir_store_deref(b, commit, nir_imm_false(b), 0x1);
-               nir_push_if(b, nir_imm_true(b));
-               nir_jump(b, nir_jump_return);
-               nir_pop_if(b, NULL);
-               break;
-
-            case nir_intrinsic_terminate_ray:
-               /* The "normal" handling of terminateRay works fine in
-                * intersection shaders.
-                */
-               break;
-
-            case nir_intrinsic_load_ray_t_max:
-               nir_def_rewrite_uses(&intrin->def,
-                                        hit_t);
-               nir_instr_remove(&intrin->instr);
-               break;
-
-            case nir_intrinsic_load_ray_hit_kind:
-               nir_def_rewrite_uses(&intrin->def,
-                                        hit_kind);
-               nir_instr_remove(&intrin->instr);
-               break;
-
-            default:
-               break;
-            }
-            break;
-         }
-
-         case nir_instr_type_jump: {
-            /* Stomp any halts to returns since they only return from the
-             * any-hit shader and not necessarily from the intersection
-             * shader.  This is safe to do because we've already asserted
-             * that we only have the one function.
-             */
-            nir_jump_instr *jump = nir_instr_as_jump(instr);
-            if (jump->type == nir_jump_halt)
-               jump->type = nir_jump_return;
-            break;
-         }
-
-         default:
-            break;
-         }
-      }
-   }
-
-   nir_validate_shader(any_hit, "after initial any-hit lowering");
-
-   nir_lower_returns_impl(impl);
-
-   nir_validate_shader(any_hit, "after lowering returns");
-
-   return impl;
-}
-
-void
-brw_nir_lower_intersection_shader(nir_shader *intersection,
-                                  const nir_shader *any_hit,
-                                  const struct intel_device_info *devinfo)
-{
-   void *dead_ctx = ralloc_context(intersection);
-
-   nir_function_impl *any_hit_impl = NULL;
-   struct hash_table *any_hit_var_remap = NULL;
-   if (any_hit) {
-      nir_shader *any_hit_tmp = nir_shader_clone(dead_ctx, any_hit);
-      NIR_PASS_V(any_hit_tmp, nir_opt_dce);
-      any_hit_impl = lower_any_hit_for_intersection(any_hit_tmp);
-      any_hit_var_remap = _mesa_pointer_hash_table_create(dead_ctx);
-   }
-
-   nir_function_impl *impl = nir_shader_get_entrypoint(intersection);
-
-   nir_builder build = nir_builder_at(nir_before_impl(impl));
-   nir_builder *b = &build;
-
-   nir_def *t_addr = brw_nir_rt_mem_hit_addr(b, false /* committed */);
-   nir_variable *commit =
-      nir_local_variable_create(impl, glsl_bool_type(), "ray_commit");
-   nir_store_var(b, commit, nir_imm_false(b), 0x1);
-
-   assert(impl->end_block->predecessors->entries == 1);
-   set_foreach(impl->end_block->predecessors, block_entry) {
-      struct nir_block *block = (void *)block_entry->key;
-      b->cursor = nir_after_block_before_jump(block);
-      nir_push_if(b, nir_load_var(b, commit));
-      {
-         /* Set the "valid" bit in mem_hit */
-         nir_def *ray_addr = brw_nir_rt_mem_hit_addr(b, false /* committed */);
-         nir_def *flags_dw_addr = nir_iadd_imm(b, ray_addr, 12);
-         nir_store_global(b, flags_dw_addr, 4,
-            nir_ior(b, nir_load_global(b, flags_dw_addr, 4, 1, 32),
-                       nir_imm_int(b, 1 << 16)), 0x1 /* write_mask */);
-
-         nir_accept_ray_intersection(b);
-      }
-      nir_push_else(b, NULL);
-      {
-         nir_ignore_ray_intersection(b);
-      }
-      nir_pop_if(b, NULL);
-      break;
-   }
-
-   nir_foreach_block_safe(block, impl) {
-      nir_foreach_instr_safe(instr, block) {
-         switch (instr->type) {
-         case nir_instr_type_intrinsic: {
-            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-            switch (intrin->intrinsic) {
-            case nir_intrinsic_report_ray_intersection: {
-               b->cursor = nir_instr_remove(&intrin->instr);
-               nir_def *hit_t = intrin->src[0].ssa;
-               nir_def *hit_kind = intrin->src[1].ssa;
-               nir_def *min_t = nir_load_ray_t_min(b);
-
-               struct brw_nir_rt_mem_ray_defs ray_def;
-               brw_nir_rt_load_mem_ray(b, &ray_def, BRW_RT_BVH_LEVEL_WORLD);
-
-               struct brw_nir_rt_mem_hit_defs hit_in = {};
-               brw_nir_rt_load_mem_hit(b, &hit_in, false);
-
-               nir_def *max_t = ray_def.t_far;
-
-               /* bool commit_tmp = false; */
-               nir_variable *commit_tmp =
-                  nir_local_variable_create(impl, glsl_bool_type(),
-                                            "commit_tmp");
-               nir_store_var(b, commit_tmp, nir_imm_false(b), 0x1);
-
-               nir_push_if(b, nir_iand(b, nir_fge(b, hit_t, min_t),
-                                          nir_fge(b, max_t, hit_t)));
-               {
-                  /* Any-hit defaults to commit */
-                  nir_store_var(b, commit_tmp, nir_imm_true(b), 0x1);
-
-                  if (any_hit_impl != NULL) {
-                     nir_push_if(b, nir_inot(b, nir_load_leaf_opaque_intel(b)));
-                     {
-                        nir_def *params[] = {
-                           &nir_build_deref_var(b, commit_tmp)->def,
-                           hit_t,
-                           hit_kind,
-                        };
-                        nir_inline_function_impl(b, any_hit_impl, params,
-                                                 any_hit_var_remap);
-                     }
-                     nir_pop_if(b, NULL);
-                  }
-
-                  nir_push_if(b, nir_load_var(b, commit_tmp));
-                  {
-                     nir_store_var(b, commit, nir_imm_true(b), 0x1);
-
-                     nir_def *ray_addr =
-                        brw_nir_rt_mem_ray_addr(b, brw_nir_rt_stack_addr(b), BRW_RT_BVH_LEVEL_WORLD);
-
-                     nir_store_global(b, nir_iadd_imm(b, ray_addr, 16 + 12), 4,  hit_t, 0x1);
-                     nir_store_global(b, t_addr, 4,
-                                      nir_vec2(b, nir_fmin(b, hit_t, hit_in.t), hit_kind),
-                                      0x3);
-                  }
-                  nir_pop_if(b, NULL);
-               }
-               nir_pop_if(b, NULL);
-
-               nir_def *accepted = nir_load_var(b, commit_tmp);
-               nir_def_rewrite_uses(&intrin->def,
-                                        accepted);
-               break;
-            }
-
-            default:
-               break;
-            }
-            break;
-         }
-
-         default:
-            break;
-         }
-      }
-   }
-   nir_metadata_preserve(impl, nir_metadata_none);
-
-   /* We did some inlining; have to re-index SSA defs */
-   nir_index_ssa_defs(impl);
-
-   ralloc_free(dead_ctx);
-}
--- a/src/intel/compiler/elk/brw_nir_lower_ray_queries.c
+++ b/src/intel/compiler/elk/brw_nir_lower_ray_queries.c
@ -1,567 +0,0 @@
-/*
- * Copyright (c) 2021 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_nir_rt.h"
-#include "brw_nir_rt_builder.h"
-
-#include "nir_deref.h"
-
-#include "util/macros.h"
-
-struct lowering_state {
-   const struct intel_device_info *devinfo;
-
-   nir_function_impl *impl;
-
-   struct hash_table *queries;
-   uint32_t n_queries;
-
-   struct brw_nir_rt_globals_defs globals;
-   nir_def *rq_globals;
-};
-
-struct brw_ray_query {
-   nir_variable *opaque_var;
-   nir_variable *internal_var;
-   uint32_t id;
-};
-
-#define SIZEOF_QUERY_STATE (sizeof(uint32_t))
-
-static bool
-need_spill_fill(struct lowering_state *state)
-{
-   return state->n_queries > 1;
-}
-
-/**
- * This pass converts opaque RayQuery structures from SPIRV into a vec3 where
- * the first 2 elements store a global address for the query and the third
- * element is an incremented counter on the number of executed
- * nir_intrinsic_rq_proceed.
- */
-
-static void
-register_opaque_var(nir_variable *opaque_var, struct lowering_state *state)
-{
-   struct hash_entry *entry = _mesa_hash_table_search(state->queries, opaque_var);
-   assert(entry == NULL);
-
-   struct brw_ray_query *rq = rzalloc(state->queries, struct brw_ray_query);
-   rq->opaque_var = opaque_var;
-   rq->id = state->n_queries;
-
-   unsigned aoa_size = glsl_get_aoa_size(opaque_var->type);
-   state->n_queries += MAX2(1, aoa_size);
-
-   _mesa_hash_table_insert(state->queries, opaque_var, rq);
-}
-
-static void
-create_internal_var(struct brw_ray_query *rq, struct lowering_state *state)
-{
-   const struct glsl_type *opaque_type = rq->opaque_var->type;
-   const struct glsl_type *internal_type = glsl_uint16_t_type();
-
-   while (glsl_type_is_array(opaque_type)) {
-      assert(!glsl_type_is_unsized_array(opaque_type));
-      internal_type = glsl_array_type(internal_type,
-                                      glsl_array_size(opaque_type),
-                                      0);
-      opaque_type = glsl_get_array_element(opaque_type);
-   }
-
-   rq->internal_var = nir_local_variable_create(state->impl,
-                                                internal_type,
-                                                NULL);
-}
-
-
-
-static nir_def *
-get_ray_query_shadow_addr(nir_builder *b,
-                          nir_deref_instr *deref,
-                          struct lowering_state *state,
-                          nir_deref_instr **out_state_deref)
-{
-   nir_deref_path path;
-   nir_deref_path_init(&path, deref, NULL);
-   assert(path.path[0]->deref_type == nir_deref_type_var);
-
-   nir_variable *opaque_var = nir_deref_instr_get_variable(path.path[0]);
-   struct hash_entry *entry = _mesa_hash_table_search(state->queries, opaque_var);
-   assert(entry);
-
-   struct brw_ray_query *rq = entry->data;
-
-   /* Base address in the shadow memory of the variable associated with this
-    * ray query variable.
-    */
-   nir_def *base_addr =
-      nir_iadd_imm(b, state->globals.resume_sbt_addr,
-                   brw_rt_ray_queries_shadow_stack_size(state->devinfo) * rq->id);
-
-   bool spill_fill = need_spill_fill(state);
-   *out_state_deref = nir_build_deref_var(b, rq->internal_var);
-
-   if (!spill_fill)
-      return NULL;
-
-   /* Just emit code and let constant-folding go to town */
-   nir_deref_instr **p = &path.path[1];
-   for (; *p; p++) {
-      if ((*p)->deref_type == nir_deref_type_array) {
-         nir_def *index = (*p)->arr.index.ssa;
-
-         /**/
-         *out_state_deref = nir_build_deref_array(b, *out_state_deref, index);
-
-         /**/
-         uint64_t size = MAX2(1, glsl_get_aoa_size((*p)->type)) *
-            brw_rt_ray_queries_shadow_stack_size(state->devinfo);
-
-         nir_def *mul = nir_amul_imm(b, nir_i2i64(b, index), size);
-
-         base_addr = nir_iadd(b, base_addr, mul);
-      } else {
-         unreachable("Unsupported deref type");
-      }
-   }
-
-   nir_deref_path_finish(&path);
-
-   /* Add the lane offset to the shadow memory address */
-   nir_def *lane_offset =
-      nir_imul_imm(
-         b,
-         nir_iadd(
-            b,
-            nir_imul(
-               b,
-               brw_load_btd_dss_id(b),
-               brw_nir_rt_load_num_simd_lanes_per_dss(b, state->devinfo)),
-            brw_nir_rt_sync_stack_id(b)),
-         BRW_RT_SIZEOF_SHADOW_RAY_QUERY);
-
-   return nir_iadd(b, base_addr, nir_i2i64(b, lane_offset));
-}
-
-static void
-update_trace_ctrl_level(nir_builder *b,
-                        nir_deref_instr *state_deref,
-                        nir_def **out_old_ctrl,
-                        nir_def **out_old_level,
-                        nir_def *new_ctrl,
-                        nir_def *new_level)
-{
-   nir_def *old_value = nir_load_deref(b, state_deref);
-   nir_def *old_ctrl = nir_ishr_imm(b, old_value, 2);
-   nir_def *old_level = nir_iand_imm(b, old_value, 0x3);
-
-   if (out_old_ctrl)
-      *out_old_ctrl = old_ctrl;
-   if (out_old_level)
-      *out_old_level = old_level;
-
-   if (new_ctrl)
-      new_ctrl = nir_i2i16(b, new_ctrl);
-   if (new_level)
-      new_level = nir_i2i16(b, new_level);
-
-   if (new_ctrl || new_level) {
-      if (!new_ctrl)
-         new_ctrl = old_ctrl;
-      if (!new_level)
-         new_level = old_level;
-
-      nir_def *new_value = nir_ior(b, nir_ishl_imm(b, new_ctrl, 2), new_level);
-      nir_store_deref(b, state_deref, new_value, 0x1);
-   }
-}
-
-static void
-fill_query(nir_builder *b,
-           nir_def *hw_stack_addr,
-           nir_def *shadow_stack_addr,
-           nir_def *ctrl)
-{
-   brw_nir_memcpy_global(b, hw_stack_addr, 64, shadow_stack_addr, 64,
-                         BRW_RT_SIZEOF_RAY_QUERY);
-}
-
-static void
-spill_query(nir_builder *b,
-            nir_def *hw_stack_addr,
-            nir_def *shadow_stack_addr)
-{
-   brw_nir_memcpy_global(b, shadow_stack_addr, 64, hw_stack_addr, 64,
-                         BRW_RT_SIZEOF_RAY_QUERY);
-}
-
-
-static void
-lower_ray_query_intrinsic(nir_builder *b,
-                          nir_intrinsic_instr *intrin,
-                          struct lowering_state *state)
-{
-   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
-
-   b->cursor = nir_instr_remove(&intrin->instr);
-
-   nir_deref_instr *ctrl_level_deref;
-   nir_def *shadow_stack_addr =
-      get_ray_query_shadow_addr(b, deref, state, &ctrl_level_deref);
-   nir_def *hw_stack_addr =
-      brw_nir_rt_sync_stack_addr(b, state->globals.base_mem_addr, state->devinfo);
-   nir_def *stack_addr = shadow_stack_addr ? shadow_stack_addr : hw_stack_addr;
-
-   switch (intrin->intrinsic) {
-   case nir_intrinsic_rq_initialize: {
-      nir_def *as_addr = intrin->src[1].ssa;
-      nir_def *ray_flags = intrin->src[2].ssa;
-      /* From the SPIR-V spec:
-       *
-       *    "Only the 8 least-significant bits of Cull Mask are used by
-       *    this instruction - other bits are ignored.
-       *
-       *    Only the 16 least-significant bits of Miss Index are used by
-       *    this instruction - other bits are ignored."
-       */
-      nir_def *cull_mask = nir_iand_imm(b, intrin->src[3].ssa, 0xff);
-      nir_def *ray_orig = intrin->src[4].ssa;
-      nir_def *ray_t_min = intrin->src[5].ssa;
-      nir_def *ray_dir = intrin->src[6].ssa;
-      nir_def *ray_t_max = intrin->src[7].ssa;
-
-      nir_def *root_node_ptr =
-         brw_nir_rt_acceleration_structure_to_root_node(b, as_addr);
-
-      struct brw_nir_rt_mem_ray_defs ray_defs = {
-         .root_node_ptr = root_node_ptr,
-         .ray_flags = nir_u2u16(b, ray_flags),
-         .ray_mask = cull_mask,
-         .orig = ray_orig,
-         .t_near = ray_t_min,
-         .dir = ray_dir,
-         .t_far = ray_t_max,
-      };
-
-      nir_def *ray_addr =
-         brw_nir_rt_mem_ray_addr(b, stack_addr, BRW_RT_BVH_LEVEL_WORLD);
-
-      brw_nir_rt_query_mark_init(b, stack_addr);
-      brw_nir_rt_store_mem_ray_query_at_addr(b, ray_addr, &ray_defs);
-
-      update_trace_ctrl_level(b, ctrl_level_deref,
-                              NULL, NULL,
-                              nir_imm_int(b, GEN_RT_TRACE_RAY_INITAL),
-                              nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD));
-      break;
-   }
-
-   case nir_intrinsic_rq_proceed: {
-      nir_def *not_done =
-         nir_inot(b, brw_nir_rt_query_done(b, stack_addr));
-      nir_def *not_done_then, *not_done_else;
-
-      nir_push_if(b, not_done);
-      {
-         nir_def *ctrl, *level;
-         update_trace_ctrl_level(b, ctrl_level_deref,
-                                 &ctrl, &level,
-                                 NULL,
-                                 NULL);
-
-         /* Mark the query as done because handing it over to the HW for
-          * processing. If the HW make any progress, it will write back some
-          * data and as a side effect, clear the "done" bit. If no progress is
-          * made, HW does not write anything back and we can use this bit to
-          * detect that.
-          */
-         brw_nir_rt_query_mark_done(b, stack_addr);
-
-         if (shadow_stack_addr)
-            fill_query(b, hw_stack_addr, shadow_stack_addr, ctrl);
-
-         nir_trace_ray_intel(b, state->rq_globals, level, ctrl, .synchronous = true);
-
-         struct brw_nir_rt_mem_hit_defs hit_in = {};
-         brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, hw_stack_addr, false);
-
-         if (shadow_stack_addr)
-            spill_query(b, hw_stack_addr, shadow_stack_addr);
-
-         update_trace_ctrl_level(b, ctrl_level_deref,
-                                 NULL, NULL,
-                                 nir_imm_int(b, GEN_RT_TRACE_RAY_CONTINUE),
-                                 hit_in.bvh_level);
-
-         not_done_then = nir_inot(b, hit_in.done);
-      }
-      nir_push_else(b, NULL);
-      {
-         not_done_else = nir_imm_false(b);
-      }
-      nir_pop_if(b, NULL);
-      not_done = nir_if_phi(b, not_done_then, not_done_else);
-      nir_def_rewrite_uses(&intrin->def, not_done);
-      break;
-   }
-
-   case nir_intrinsic_rq_confirm_intersection: {
-      brw_nir_memcpy_global(b,
-                            brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, true), 16,
-                            brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, false), 16,
-                            BRW_RT_SIZEOF_HIT_INFO);
-      update_trace_ctrl_level(b, ctrl_level_deref,
-                              NULL, NULL,
-                              nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT),
-                              nir_imm_int(b, BRW_RT_BVH_LEVEL_OBJECT));
-      break;
-   }
-
-   case nir_intrinsic_rq_generate_intersection: {
-      brw_nir_rt_generate_hit_addr(b, stack_addr, intrin->src[1].ssa);
-      update_trace_ctrl_level(b, ctrl_level_deref,
-                              NULL, NULL,
-                              nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT),
-                              nir_imm_int(b, BRW_RT_BVH_LEVEL_OBJECT));
-      break;
-   }
-
-   case nir_intrinsic_rq_terminate: {
-      brw_nir_rt_query_mark_done(b, stack_addr);
-      break;
-   }
-
-   case nir_intrinsic_rq_load: {
-      const bool committed = nir_intrinsic_committed(intrin);
-
-      struct brw_nir_rt_mem_ray_defs world_ray_in = {};
-      struct brw_nir_rt_mem_ray_defs object_ray_in = {};
-      struct brw_nir_rt_mem_hit_defs hit_in = {};
-      brw_nir_rt_load_mem_ray_from_addr(b, &world_ray_in, stack_addr,
-                                        BRW_RT_BVH_LEVEL_WORLD);
-      brw_nir_rt_load_mem_ray_from_addr(b, &object_ray_in, stack_addr,
-                                        BRW_RT_BVH_LEVEL_OBJECT);
-      brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, stack_addr, committed);
-
-      nir_def *sysval = NULL;
-      switch (nir_intrinsic_ray_query_value(intrin)) {
-      case nir_ray_query_value_intersection_type:
-         if (committed) {
-            /* Values we want to generate :
-             *
-             * RayQueryCommittedIntersectionNoneEXT = 0U        <= hit_in.valid == false
-             * RayQueryCommittedIntersectionTriangleEXT = 1U    <= hit_in.leaf_type == BRW_RT_BVH_NODE_TYPE_QUAD (4)
-             * RayQueryCommittedIntersectionGeneratedEXT = 2U   <= hit_in.leaf_type == BRW_RT_BVH_NODE_TYPE_PROCEDURAL (3)
-             */
-            sysval =
-               nir_bcsel(b, nir_ieq_imm(b, hit_in.leaf_type, 4),
-                         nir_imm_int(b, 1), nir_imm_int(b, 2));
-            sysval =
-               nir_bcsel(b, hit_in.valid,
-                         sysval, nir_imm_int(b, 0));
-         } else {
-            /* 0 -> triangle, 1 -> AABB */
-            sysval =
-               nir_b2i32(b,
-                         nir_ieq_imm(b, hit_in.leaf_type,
-                                     BRW_RT_BVH_NODE_TYPE_PROCEDURAL));
-         }
-         break;
-
-      case nir_ray_query_value_intersection_t:
-         sysval = hit_in.t;
-         break;
-
-      case nir_ray_query_value_intersection_instance_custom_index: {
-         struct brw_nir_rt_bvh_instance_leaf_defs leaf;
-         brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
-         sysval = leaf.instance_id;
-         break;
-      }
-
-      case nir_ray_query_value_intersection_instance_id: {
-         struct brw_nir_rt_bvh_instance_leaf_defs leaf;
-         brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
-         sysval = leaf.instance_index;
-         break;
-      }
-
-      case nir_ray_query_value_intersection_instance_sbt_index: {
-         struct brw_nir_rt_bvh_instance_leaf_defs leaf;
-         brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
-         sysval = leaf.contribution_to_hit_group_index;
-         break;
-      }
-
-      case nir_ray_query_value_intersection_geometry_index: {
-         nir_def *geometry_index_dw =
-            nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
-                            1, 32);
-         sysval = nir_iand_imm(b, geometry_index_dw, BITFIELD_MASK(29));
-         break;
-      }
-
-      case nir_ray_query_value_intersection_primitive_index:
-         sysval = brw_nir_rt_load_primitive_id_from_hit(b, NULL /* is_procedural */, &hit_in);
-         break;
-
-      case nir_ray_query_value_intersection_barycentrics:
-         sysval = hit_in.tri_bary;
-         break;
-
-      case nir_ray_query_value_intersection_front_face:
-         sysval = hit_in.front_face;
-         break;
-
-      case nir_ray_query_value_intersection_object_ray_direction:
-         sysval = world_ray_in.dir;
-         break;
-
-      case nir_ray_query_value_intersection_object_ray_origin:
-         sysval = world_ray_in.orig;
-         break;
-
-      case nir_ray_query_value_intersection_object_to_world: {
-         struct brw_nir_rt_bvh_instance_leaf_defs leaf;
-         brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
-         sysval = leaf.object_to_world[nir_intrinsic_column(intrin)];
-         break;
-      }
-
-      case nir_ray_query_value_intersection_world_to_object: {
-         struct brw_nir_rt_bvh_instance_leaf_defs leaf;
-         brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
-         sysval = leaf.world_to_object[nir_intrinsic_column(intrin)];
-         break;
-      }
-
-      case nir_ray_query_value_intersection_candidate_aabb_opaque:
-         sysval = hit_in.front_face;
-         break;
-
-      case nir_ray_query_value_tmin:
-         sysval = world_ray_in.t_near;
-         break;
-
-      case nir_ray_query_value_flags:
-         sysval = nir_u2u32(b, world_ray_in.ray_flags);
-         break;
-
-      case nir_ray_query_value_world_ray_direction:
-         sysval = world_ray_in.dir;
-         break;
-
-      case nir_ray_query_value_world_ray_origin:
-         sysval = world_ray_in.orig;
-         break;
-
-      case nir_ray_query_value_intersection_triangle_vertex_positions: {
-         struct brw_nir_rt_bvh_primitive_leaf_positions_defs pos;
-         brw_nir_rt_load_bvh_primitive_leaf_positions(b, &pos, hit_in.prim_leaf_ptr);
-         sysval = pos.positions[nir_intrinsic_column(intrin)];
-         break;
-      }
-
-      default:
-         unreachable("Invalid ray query");
-      }
-
-      assert(sysval);
-      nir_def_rewrite_uses(&intrin->def, sysval);
-      break;
-   }
-
-   default:
-      unreachable("Invalid intrinsic");
-   }
-}
-
-static void
-lower_ray_query_impl(nir_function_impl *impl, struct lowering_state *state)
-{
-   nir_builder _b, *b = &_b;
-   _b = nir_builder_at(nir_before_impl(impl));
-
-   state->rq_globals = nir_load_ray_query_global_intel(b);
-
-   brw_nir_rt_load_globals_addr(b, &state->globals, state->rq_globals);
-
-   nir_foreach_block_safe(block, impl) {
-      nir_foreach_instr_safe(instr, block) {
-         if (instr->type != nir_instr_type_intrinsic)
-            continue;
-
-         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-         if (intrin->intrinsic != nir_intrinsic_rq_initialize &&
-             intrin->intrinsic != nir_intrinsic_rq_terminate &&
-             intrin->intrinsic != nir_intrinsic_rq_proceed &&
-             intrin->intrinsic != nir_intrinsic_rq_generate_intersection &&
-             intrin->intrinsic != nir_intrinsic_rq_confirm_intersection &&
-             intrin->intrinsic != nir_intrinsic_rq_load)
-            continue;
-
-         lower_ray_query_intrinsic(b, intrin, state);
-      }
-   }
-
-   nir_metadata_preserve(impl, nir_metadata_none);
-}
-
-bool
-brw_nir_lower_ray_queries(nir_shader *shader,
-                          const struct intel_device_info *devinfo)
-{
-   assert(exec_list_length(&shader->functions) == 1);
-
-   struct lowering_state state = {
-      .devinfo = devinfo,
-      .impl = nir_shader_get_entrypoint(shader),
-      .queries = _mesa_pointer_hash_table_create(NULL),
-   };
-
-   /* Map all query variable to internal type variables */
-   nir_foreach_function_temp_variable(var, state.impl)
-      register_opaque_var(var, &state);
-   hash_table_foreach(state.queries, entry)
-      create_internal_var(entry->data, &state);
-
-   bool progress = state.n_queries > 0;
-
-   if (progress) {
-      lower_ray_query_impl(state.impl, &state);
-
-      nir_remove_dead_derefs(shader);
-      nir_remove_dead_variables(shader,
-                                nir_var_shader_temp | nir_var_function_temp,
-                                NULL);
-
-      nir_metadata_preserve(state.impl, nir_metadata_none);
-   }
-
-   ralloc_free(state.queries);
-
-   return progress;
-}
--- a/src/intel/compiler/elk/brw_nir_lower_rt_intrinsics.c
+++ b/src/intel/compiler/elk/brw_nir_lower_rt_intrinsics.c
@ -1,386 +0,0 @@
-/*
- * Copyright (c) 2020 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_nir_rt.h"
-#include "brw_nir_rt_builder.h"
-
-static nir_def *
-build_leaf_is_procedural(nir_builder *b, struct brw_nir_rt_mem_hit_defs *hit)
-{
-   switch (b->shader->info.stage) {
-   case MESA_SHADER_ANY_HIT:
-      /* Any-hit shaders are always compiled into intersection shaders for
-       * procedural geometry.  If we got here in an any-hit shader, it's for
-       * triangles.
-       */
-      return nir_imm_false(b);
-
-   case MESA_SHADER_INTERSECTION:
-      return nir_imm_true(b);
-
-   default:
-      return nir_ieq_imm(b, hit->leaf_type,
-                            BRW_RT_BVH_NODE_TYPE_PROCEDURAL);
-   }
-}
-
-static void
-lower_rt_intrinsics_impl(nir_function_impl *impl,
-                         const struct intel_device_info *devinfo)
-{
-   bool progress = false;
-
-   nir_builder build = nir_builder_at(nir_before_impl(impl));
-   nir_builder *b = &build;
-
-   struct brw_nir_rt_globals_defs globals;
-   brw_nir_rt_load_globals(b, &globals);
-
-   nir_def *hotzone_addr = brw_nir_rt_sw_hotzone_addr(b, devinfo);
-   nir_def *hotzone = nir_load_global(b, hotzone_addr, 16, 4, 32);
-
-   gl_shader_stage stage = b->shader->info.stage;
-   struct brw_nir_rt_mem_ray_defs world_ray_in = {};
-   struct brw_nir_rt_mem_ray_defs object_ray_in = {};
-   struct brw_nir_rt_mem_hit_defs hit_in = {};
-   switch (stage) {
-   case MESA_SHADER_ANY_HIT:
-   case MESA_SHADER_CLOSEST_HIT:
-   case MESA_SHADER_INTERSECTION:
-      brw_nir_rt_load_mem_hit(b, &hit_in,
-                              stage == MESA_SHADER_CLOSEST_HIT);
-      brw_nir_rt_load_mem_ray(b, &object_ray_in,
-                              BRW_RT_BVH_LEVEL_OBJECT);
-      FALLTHROUGH;
-
-   case MESA_SHADER_MISS:
-      brw_nir_rt_load_mem_ray(b, &world_ray_in,
-                              BRW_RT_BVH_LEVEL_WORLD);
-      break;
-
-   default:
-      break;
-   }
-
-   nir_def *thread_stack_base_addr = brw_nir_rt_sw_stack_addr(b, devinfo);
-   nir_def *stack_base_offset = nir_channel(b, hotzone, 0);
-   nir_def *stack_base_addr =
-      nir_iadd(b, thread_stack_base_addr, nir_u2u64(b, stack_base_offset));
-   ASSERTED bool seen_scratch_base_ptr_load = false;
-   ASSERTED bool found_resume = false;
-
-   nir_foreach_block(block, impl) {
-      nir_foreach_instr_safe(instr, block) {
-         if (instr->type != nir_instr_type_intrinsic)
-            continue;
-
-         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-
-         b->cursor = nir_after_instr(&intrin->instr);
-
-         nir_def *sysval = NULL;
-         switch (intrin->intrinsic) {
-         case nir_intrinsic_load_scratch_base_ptr:
-            assert(nir_intrinsic_base(intrin) == 1);
-            seen_scratch_base_ptr_load = true;
-            sysval = stack_base_addr;
-            break;
-
-         case nir_intrinsic_btd_stack_push_intel: {
-            int32_t stack_size = nir_intrinsic_stack_size(intrin);
-            if (stack_size > 0) {
-               nir_def *child_stack_offset =
-                  nir_iadd_imm(b, stack_base_offset, stack_size);
-               nir_store_global(b, hotzone_addr, 16, child_stack_offset, 0x1);
-            }
-            nir_instr_remove(instr);
-            break;
-         }
-
-         case nir_intrinsic_rt_resume:
-            /* This is the first "interesting" instruction */
-            assert(block == nir_start_block(impl));
-            assert(!seen_scratch_base_ptr_load);
-            found_resume = true;
-
-            int32_t stack_size = nir_intrinsic_stack_size(intrin);
-            if (stack_size > 0) {
-               stack_base_offset =
-                  nir_iadd_imm(b, stack_base_offset, -stack_size);
-               nir_store_global(b, hotzone_addr, 16, stack_base_offset, 0x1);
-               stack_base_addr = nir_iadd(b, thread_stack_base_addr,
-                                          nir_u2u64(b, stack_base_offset));
-            }
-            nir_instr_remove(instr);
-            break;
-
-         case nir_intrinsic_load_uniform: {
-            /* We don't want to lower this in the launch trampoline. */
-            if (stage == MESA_SHADER_COMPUTE)
-               break;
-
-            sysval = brw_nir_load_global_const(b, intrin,
-                        nir_load_btd_global_arg_addr_intel(b),
-                        BRW_RT_PUSH_CONST_OFFSET);
-
-            break;
-         }
-
-         case nir_intrinsic_load_ray_launch_id:
-            sysval = nir_channels(b, hotzone, 0xe);
-            break;
-
-         case nir_intrinsic_load_ray_launch_size:
-            sysval = globals.launch_size;
-            break;
-
-         case nir_intrinsic_load_ray_world_origin:
-            sysval = world_ray_in.orig;
-            break;
-
-         case nir_intrinsic_load_ray_world_direction:
-            sysval = world_ray_in.dir;
-            break;
-
-         case nir_intrinsic_load_ray_object_origin:
-            sysval = object_ray_in.orig;
-            break;
-
-         case nir_intrinsic_load_ray_object_direction:
-            sysval = object_ray_in.dir;
-            break;
-
-         case nir_intrinsic_load_ray_t_min:
-            /* It shouldn't matter which we pull this from */
-            sysval = world_ray_in.t_near;
-            break;
-
-         case nir_intrinsic_load_ray_t_max:
-            if (stage == MESA_SHADER_MISS)
-               sysval = world_ray_in.t_far;
-            else
-               sysval = hit_in.t;
-            break;
-
-         case nir_intrinsic_load_primitive_id:
-            sysval = brw_nir_rt_load_primitive_id_from_hit(b,
-                                                           build_leaf_is_procedural(b, &hit_in),
-                                                           &hit_in);
-            break;
-
-         case nir_intrinsic_load_instance_id: {
-            struct brw_nir_rt_bvh_instance_leaf_defs leaf;
-            brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
-            sysval = leaf.instance_index;
-            break;
-         }
-
-         case nir_intrinsic_load_ray_object_to_world: {
-            struct brw_nir_rt_bvh_instance_leaf_defs leaf;
-            brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
-            sysval = leaf.object_to_world[nir_intrinsic_column(intrin)];
-            break;
-         }
-
-         case nir_intrinsic_load_ray_world_to_object: {
-            struct brw_nir_rt_bvh_instance_leaf_defs leaf;
-            brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
-            sysval = leaf.world_to_object[nir_intrinsic_column(intrin)];
-            break;
-         }
-
-         case nir_intrinsic_load_ray_hit_kind: {
-            nir_def *tri_hit_kind =
-               nir_bcsel(b, hit_in.front_face,
-                            nir_imm_int(b, BRW_RT_HIT_KIND_FRONT_FACE),
-                            nir_imm_int(b, BRW_RT_HIT_KIND_BACK_FACE));
-            sysval = nir_bcsel(b, build_leaf_is_procedural(b, &hit_in),
-                                  hit_in.aabb_hit_kind, tri_hit_kind);
-            break;
-         }
-
-         case nir_intrinsic_load_ray_flags:
-            /* We need to fetch the original ray flags we stored in the
-             * leaf pointer, because the actual ray flags we get here
-             * will include any flags passed on the pipeline at creation
-             * time, and the spec for IncomingRayFlagsKHR says:
-             *   Setting pipeline flags on the raytracing pipeline must not
-             *   cause any corresponding flags to be set in variables with
-             *   this decoration.
-             */
-            sysval = nir_u2u32(b, world_ray_in.inst_leaf_ptr);
-            break;
-
-         case nir_intrinsic_load_cull_mask:
-            sysval = nir_u2u32(b, world_ray_in.ray_mask);
-            break;
-
-         case nir_intrinsic_load_ray_geometry_index: {
-            nir_def *geometry_index_dw =
-               nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
-                               1, 32);
-            sysval = nir_iand_imm(b, geometry_index_dw, BITFIELD_MASK(29));
-            break;
-         }
-
-         case nir_intrinsic_load_ray_instance_custom_index: {
-            struct brw_nir_rt_bvh_instance_leaf_defs leaf;
-            brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
-            sysval = leaf.instance_id;
-            break;
-         }
-
-         case nir_intrinsic_load_shader_record_ptr:
-            /* We can't handle this intrinsic in resume shaders because the
-             * handle we get there won't be from the original SBT.  The shader
-             * call lowering/splitting pass should have ensured that this
-             * value was spilled from the initial shader and unspilled in any
-             * resume shaders that need it.
-             */
-            assert(!found_resume);
-            sysval = nir_load_btd_local_arg_addr_intel(b);
-            break;
-
-         case nir_intrinsic_load_ray_base_mem_addr_intel:
-            sysval = globals.base_mem_addr;
-            break;
-
-         case nir_intrinsic_load_ray_hw_stack_size_intel:
-            sysval = nir_imul_imm(b, globals.hw_stack_size, 64);
-            break;
-
-         case nir_intrinsic_load_ray_sw_stack_size_intel:
-            sysval = nir_imul_imm(b, globals.sw_stack_size, 64);
-            break;
-
-         case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
-            sysval = globals.num_dss_rt_stacks;
-            break;
-
-         case nir_intrinsic_load_ray_hit_sbt_addr_intel:
-            sysval = globals.hit_sbt_addr;
-            break;
-
-         case nir_intrinsic_load_ray_hit_sbt_stride_intel:
-            sysval = globals.hit_sbt_stride;
-            break;
-
-         case nir_intrinsic_load_ray_miss_sbt_addr_intel:
-            sysval = globals.miss_sbt_addr;
-            break;
-
-         case nir_intrinsic_load_ray_miss_sbt_stride_intel:
-            sysval = globals.miss_sbt_stride;
-            break;
-
-         case nir_intrinsic_load_callable_sbt_addr_intel:
-            sysval = globals.call_sbt_addr;
-            break;
-
-         case nir_intrinsic_load_callable_sbt_stride_intel:
-            sysval = globals.call_sbt_stride;
-            break;
-
-         case nir_intrinsic_load_btd_resume_sbt_addr_intel:
-            sysval = nir_pack_64_2x32_split(b,
-               nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW),
-               nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH));
-            break;
-
-         case nir_intrinsic_load_leaf_procedural_intel:
-            sysval = build_leaf_is_procedural(b, &hit_in);
-            break;
-
-         case nir_intrinsic_load_ray_triangle_vertex_positions: {
-            struct brw_nir_rt_bvh_primitive_leaf_positions_defs pos;
-            brw_nir_rt_load_bvh_primitive_leaf_positions(b, &pos, hit_in.prim_leaf_ptr);
-            sysval = pos.positions[nir_intrinsic_column(intrin)];
-            break;
-         }
-
-         case nir_intrinsic_load_leaf_opaque_intel: {
-            if (stage == MESA_SHADER_INTERSECTION) {
-               /* In intersection shaders, the opaque bit is passed to us in
-                * the front_face bit.
-                */
-               sysval = hit_in.front_face;
-            } else {
-               nir_def *flags_dw =
-                  nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
-                                  1, 32);
-               sysval = nir_i2b(b, nir_iand_imm(b, flags_dw, 1u << 30));
-            }
-            break;
-         }
-
-         default:
-            continue;
-         }
-
-         progress = true;
-
-         if (sysval) {
-            nir_def_rewrite_uses(&intrin->def,
-                                     sysval);
-            nir_instr_remove(&intrin->instr);
-         }
-      }
-   }
-
-   nir_metadata_preserve(impl,
-                         progress ?
-                         nir_metadata_none :
-                         (nir_metadata_block_index |
-                          nir_metadata_dominance));
-}
-
-/** Lower ray-tracing system values and intrinsics
- *
- * In most 3D shader stages, intrinsics are a fairly thin wrapper around
- * hardware functionality and system values represent magic bits that come
- * into the shader from FF hardware.  Ray-tracing, however, looks a bit more
- * like the OpenGL 1.0 world where the underlying hardware is simple and most
- * of the API implementation is software.
- *
- * In particular, most things that are treated as system values (or built-ins
- * in SPIR-V) don't get magically dropped into registers for us.  Instead, we
- * have to fetch them from the relevant data structures shared with the
- * ray-tracing hardware.  Most come from either the RT_DISPATCH_GLOBALS or
- * from one of the MemHit data structures.  Some, such as primitive_id require
- * us to fetch the leaf address from the MemHit struct and then manually read
- * the data out of the BVH.  Instead of trying to emit all this code deep in
- * the back-end where we can't effectively optimize it, we lower it all to
- * global memory access in NIR.
- *
- * Once this pass is complete, the only real system values left are the two
- * argument pointer system values for BTD dispatch: btd_local_arg_addr and
- * btd_global_arg_addr.
- */
-void
-brw_nir_lower_rt_intrinsics(nir_shader *nir,
-                            const struct intel_device_info *devinfo)
-{
-   nir_foreach_function_impl(impl, nir) {
-      lower_rt_intrinsics_impl(impl, devinfo);
-   }
-}
--- a/src/intel/compiler/elk/brw_nir_lower_shader_calls.c
+++ b/src/intel/compiler/elk/brw_nir_lower_shader_calls.c
@ -1,329 +0,0 @@
-/*
- * Copyright © 2020 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_nir_rt.h"
-#include "brw_nir_rt_builder.h"
-#include "nir_phi_builder.h"
-
-UNUSED static bool
-no_load_scratch_base_ptr_intrinsic(nir_shader *shader)
-{
-   nir_foreach_function_impl(impl, shader) {
-      nir_foreach_block(block, impl) {
-         nir_foreach_instr(instr, block) {
-            if (instr->type != nir_instr_type_intrinsic)
-               continue;
-
-            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-            if (intrin->intrinsic == nir_intrinsic_load_scratch_base_ptr)
-               return false;
-         }
-      }
-   }
-
-   return true;
-}
-
-/** Insert the appropriate return instruction at the end of the shader */
-void
-brw_nir_lower_shader_returns(nir_shader *shader)
-{
-   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
-
-   /* Reserve scratch space at the start of the shader's per-thread scratch
-    * space for the return BINDLESS_SHADER_RECORD address and data payload.
-    * When a shader is called, the calling shader will write the return BSR
-    * address in this region of the callee's scratch space.
-    *
-    * We could also put it at the end of the caller's scratch space.  However,
-    * doing this way means that a shader never accesses its caller's scratch
-    * space unless given an explicit pointer (such as for ray payloads).  It
-    * also makes computing the address easier given that we want to apply an
-    * alignment to the scratch offset to ensure we can make alignment
-    * assumptions in the called shader.
-    *
-    * This isn't needed for ray-gen shaders because they end the thread and
-    * never return to the calling trampoline shader.
-    */
-   assert(no_load_scratch_base_ptr_intrinsic(shader));
-   if (shader->info.stage != MESA_SHADER_RAYGEN)
-      shader->scratch_size += BRW_BTD_STACK_CALLEE_DATA_SIZE;
-
-   nir_builder b = nir_builder_create(impl);
-
-   set_foreach(impl->end_block->predecessors, block_entry) {
-      struct nir_block *block = (void *)block_entry->key;
-      b.cursor = nir_after_block_before_jump(block);
-
-      switch (shader->info.stage) {
-      case MESA_SHADER_RAYGEN:
-         /* A raygen shader is always the root of the shader call tree.  When
-          * it ends, we retire the bindless stack ID and no further shaders
-          * will be executed.
-          */
-         assert(impl->end_block->predecessors->entries == 1);
-         brw_nir_btd_retire(&b);
-         break;
-
-      case MESA_SHADER_ANY_HIT:
-         /* The default action of an any-hit shader is to accept the ray
-          * intersection.  Any-hit shaders may have more than one exit.  Only
-          * the final "normal" exit will actually need to accept the
-          * intersection as any others should come from nir_jump_halt
-          * instructions inserted after ignore_ray_intersection or
-          * terminate_ray or the like.  However, inserting an accept after
-          * the ignore or terminate is safe because it'll get deleted later.
-          */
-         nir_accept_ray_intersection(&b);
-         break;
-
-      case MESA_SHADER_CALLABLE:
-      case MESA_SHADER_MISS:
-      case MESA_SHADER_CLOSEST_HIT:
-         /* Callable, miss, and closest-hit shaders don't take any special
-          * action at the end.  They simply return back to the previous shader
-          * in the call stack.
-          */
-         assert(impl->end_block->predecessors->entries == 1);
-         brw_nir_btd_return(&b);
-         break;
-
-      case MESA_SHADER_INTERSECTION:
-         /* This will be handled by brw_nir_lower_intersection_shader */
-         break;
-
-      default:
-         unreachable("Invalid callable shader stage");
-      }
-   }
-
-   nir_metadata_preserve(impl, nir_metadata_block_index |
-                               nir_metadata_dominance);
-}
-
-static void
-store_resume_addr(nir_builder *b, nir_intrinsic_instr *call)
-{
-   uint32_t call_idx = nir_intrinsic_call_idx(call);
-   uint32_t offset = nir_intrinsic_stack_size(call);
-
-   /* First thing on the called shader's stack is the resume address
-    * followed by a pointer to the payload.
-    */
-   nir_def *resume_record_addr =
-      nir_iadd_imm(b, nir_load_btd_resume_sbt_addr_intel(b),
-                   call_idx * BRW_BTD_RESUME_SBT_STRIDE);
-   /* By the time we get here, any remaining shader/function memory
-    * pointers have been lowered to SSA values.
-    */
-   nir_def *payload_addr =
-      nir_get_shader_call_payload_src(call)->ssa;
-   brw_nir_rt_store_scratch(b, offset, BRW_BTD_STACK_ALIGN,
-                            nir_vec2(b, resume_record_addr, payload_addr),
-                            0xf /* write_mask */);
-
-   nir_btd_stack_push_intel(b, offset);
-}
-
-static bool
-lower_shader_trace_ray_instr(struct nir_builder *b, nir_instr *instr, void *data)
-{
-   struct brw_bs_prog_key *key = data;
-
-   if (instr->type != nir_instr_type_intrinsic)
-      return false;
-
-   /* Leave nir_intrinsic_rt_resume to be lowered by
-    * brw_nir_lower_rt_intrinsics()
-    */
-   nir_intrinsic_instr *call = nir_instr_as_intrinsic(instr);
-   if (call->intrinsic != nir_intrinsic_rt_trace_ray)
-      return false;
-
-   b->cursor = nir_instr_remove(instr);
-
-   store_resume_addr(b, call);
-
-   nir_def *as_addr = call->src[0].ssa;
-   nir_def *ray_flags = call->src[1].ssa;
-   /* From the SPIR-V spec:
-    *
-    *    "Only the 8 least-significant bits of Cull Mask are used by this
-    *    instruction - other bits are ignored.
-    *
-    *    Only the 4 least-significant bits of SBT Offset and SBT Stride are
-    *    used by this instruction - other bits are ignored.
-    *
-    *    Only the 16 least-significant bits of Miss Index are used by this
-    *    instruction - other bits are ignored."
-    */
-   nir_def *cull_mask = nir_iand_imm(b, call->src[2].ssa, 0xff);
-   nir_def *sbt_offset = nir_iand_imm(b, call->src[3].ssa, 0xf);
-   nir_def *sbt_stride = nir_iand_imm(b, call->src[4].ssa, 0xf);
-   nir_def *miss_index = nir_iand_imm(b, call->src[5].ssa, 0xffff);
-   nir_def *ray_orig = call->src[6].ssa;
-   nir_def *ray_t_min = call->src[7].ssa;
-   nir_def *ray_dir = call->src[8].ssa;
-   nir_def *ray_t_max = call->src[9].ssa;
-
-   nir_def *root_node_ptr =
-      brw_nir_rt_acceleration_structure_to_root_node(b, as_addr);
-
-   /* The hardware packet requires an address to the first element of the
-    * hit SBT.
-    *
-    * In order to calculate this, we must multiply the "SBT Offset"
-    * provided to OpTraceRay by the SBT stride provided for the hit SBT in
-    * the call to vkCmdTraceRay() and add that to the base address of the
-    * hit SBT. This stride is not to be confused with the "SBT Stride"
-    * provided to OpTraceRay which is in units of this stride. It's a
-    * rather terrible overload of the word "stride". The hardware docs
-    * calls the SPIR-V stride value the "shader index multiplier" which is
-    * a much more sane name.
-    */
-   nir_def *hit_sbt_stride_B =
-      nir_load_ray_hit_sbt_stride_intel(b);
-   nir_def *hit_sbt_offset_B =
-      nir_imul(b, sbt_offset, nir_u2u32(b, hit_sbt_stride_B));
-   nir_def *hit_sbt_addr =
-      nir_iadd(b, nir_load_ray_hit_sbt_addr_intel(b),
-                  nir_u2u64(b, hit_sbt_offset_B));
-
-   /* The hardware packet takes an address to the miss BSR. */
-   nir_def *miss_sbt_stride_B =
-      nir_load_ray_miss_sbt_stride_intel(b);
-   nir_def *miss_sbt_offset_B =
-      nir_imul(b, miss_index, nir_u2u32(b, miss_sbt_stride_B));
-   nir_def *miss_sbt_addr =
-      nir_iadd(b, nir_load_ray_miss_sbt_addr_intel(b),
-                  nir_u2u64(b, miss_sbt_offset_B));
-
-   struct brw_nir_rt_mem_ray_defs ray_defs = {
-      .root_node_ptr = root_node_ptr,
-      /* Combine the shader value given to traceRayEXT() with the pipeline
-       * creation value VkPipelineCreateFlags.
-       */
-      .ray_flags = nir_ior_imm(b, nir_u2u16(b, ray_flags), key->pipeline_ray_flags),
-      .ray_mask = cull_mask,
-      .hit_group_sr_base_ptr = hit_sbt_addr,
-      .hit_group_sr_stride = nir_u2u16(b, hit_sbt_stride_B),
-      .miss_sr_ptr = miss_sbt_addr,
-      .orig = ray_orig,
-      .t_near = ray_t_min,
-      .dir = ray_dir,
-      .t_far = ray_t_max,
-      .shader_index_multiplier = sbt_stride,
-      /* The instance leaf pointer is unused in the top level BVH traversal
-       * since we always start from the root node. We can reuse that field to
-       * store the ray_flags handed to traceRayEXT(). This will be reloaded
-       * when the shader accesses gl_IncomingRayFlagsEXT (see
-       * nir_intrinsic_load_ray_flags brw_nir_lower_rt_intrinsic.c)
-       */
-      .inst_leaf_ptr = nir_u2u64(b, ray_flags),
-   };
-   brw_nir_rt_store_mem_ray(b, &ray_defs, BRW_RT_BVH_LEVEL_WORLD);
-
-   nir_trace_ray_intel(b,
-                       nir_load_btd_global_arg_addr_intel(b),
-                       nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD),
-                       nir_imm_int(b, GEN_RT_TRACE_RAY_INITAL),
-                       .synchronous = false);
-   return true;
-}
-
-static bool
-lower_shader_call_instr(struct nir_builder *b, nir_intrinsic_instr *call,
-                        void *data)
-{
-   if (call->intrinsic != nir_intrinsic_rt_execute_callable)
-      return false;
-
-   b->cursor = nir_instr_remove(&call->instr);
-
-   store_resume_addr(b, call);
-
-   nir_def *sbt_offset32 =
-      nir_imul(b, call->src[0].ssa,
-               nir_u2u32(b, nir_load_callable_sbt_stride_intel(b)));
-   nir_def *sbt_addr =
-      nir_iadd(b, nir_load_callable_sbt_addr_intel(b),
-               nir_u2u64(b, sbt_offset32));
-   brw_nir_btd_spawn(b, sbt_addr);
-   return true;
-}
-
-bool
-brw_nir_lower_shader_calls(nir_shader *shader, struct brw_bs_prog_key *key)
-{
-   bool a = nir_shader_instructions_pass(shader,
-                                         lower_shader_trace_ray_instr,
-                                         nir_metadata_none,
-                                         key);
-   bool b = nir_shader_intrinsics_pass(shader, lower_shader_call_instr,
-                                         nir_metadata_block_index |
-                                         nir_metadata_dominance,
-                                         NULL);
-   return a || b;
-}
-
-/** Creates a trivial return shader
- *
- * In most cases this shader doesn't actually do anything. It just needs to
- * return to the caller.
- *
- * By default, our HW has the ability to handle the fact that a shader is not
- * available and will execute the next following shader in the tracing call.
- * For instance, a RAYGEN shader traces a ray, the tracing generates a hit,
- * but there is no ANYHIT shader available. The HW should follow up by
- * execution the CLOSESTHIT shader.
- *
- * This default behavior can be changed through the RT_CTRL register
- * (privileged access) and when NULL shader checks are disabled, the HW will
- * instead call the call stack handler (this shader). This is what i915 is
- * doing as part of Wa_14013202645.
- *
- * In order to ensure the call to the CLOSESTHIT shader, this shader needs to
- * commit the ray and will not proceed with the BTD return. Similarly when the
- * same thing happen with the INTERSECTION shader, we should just carry on the
- * ray traversal with the continue operation.
- *
- */
-nir_shader *
-brw_nir_create_trivial_return_shader(const struct brw_compiler *compiler,
-                                     void *mem_ctx)
-{
-   const nir_shader_compiler_options *nir_options =
-      compiler->nir_options[MESA_SHADER_CALLABLE];
-
-   nir_builder _b = nir_builder_init_simple_shader(MESA_SHADER_CALLABLE,
-                                                   nir_options,
-                                                   "RT Trivial Return");
-   nir_builder *b = &_b;
-
-   ralloc_steal(mem_ctx, b->shader);
-   nir_shader *nir = b->shader;
-
-   NIR_PASS_V(nir, brw_nir_lower_shader_returns);
-
-   return nir;
-}
--- a/src/intel/compiler/elk/brw_nir_rt.c
+++ b/src/intel/compiler/elk/brw_nir_rt.c
@ -1,536 +0,0 @@
-/*
- * Copyright © 2020 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "intel_nir.h"
-#include "brw_nir_rt.h"
-#include "brw_nir_rt_builder.h"
-#include "intel_nir.h"
-
-static bool
-resize_deref(nir_builder *b, nir_deref_instr *deref,
-             unsigned num_components, unsigned bit_size)
-{
-   if (deref->def.num_components == num_components &&
-       deref->def.bit_size == bit_size)
-      return false;
-
-   /* NIR requires array indices have to match the deref bit size */
-   if (deref->def.bit_size != bit_size &&
-       (deref->deref_type == nir_deref_type_array ||
-        deref->deref_type == nir_deref_type_ptr_as_array)) {
-      b->cursor = nir_before_instr(&deref->instr);
-      nir_def *idx;
-      if (nir_src_is_const(deref->arr.index)) {
-         idx = nir_imm_intN_t(b, nir_src_as_int(deref->arr.index), bit_size);
-      } else {
-         idx = nir_i2iN(b, deref->arr.index.ssa, bit_size);
-      }
-      nir_src_rewrite(&deref->arr.index, idx);
-   }
-
-   deref->def.num_components = num_components;
-   deref->def.bit_size = bit_size;
-
-   return true;
-}
-
-static bool
-lower_rt_io_derefs(nir_shader *shader)
-{
-   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
-
-   bool progress = false;
-
-   unsigned num_shader_call_vars = 0;
-   nir_foreach_variable_with_modes(var, shader, nir_var_shader_call_data)
-      num_shader_call_vars++;
-
-   unsigned num_ray_hit_attrib_vars = 0;
-   nir_foreach_variable_with_modes(var, shader, nir_var_ray_hit_attrib)
-      num_ray_hit_attrib_vars++;
-
-   /* At most one payload is allowed because it's an input.  Technically, this
-    * is also true for hit attribute variables.  However, after we inline an
-    * any-hit shader into an intersection shader, we can end up with multiple
-    * hit attribute variables.  They'll end up mapping to a cast from the same
-    * base pointer so this is fine.
-    */
-   assert(num_shader_call_vars <= 1);
-
-   nir_builder b = nir_builder_at(nir_before_impl(impl));
-
-   nir_def *call_data_addr = NULL;
-   if (num_shader_call_vars > 0) {
-      assert(shader->scratch_size >= BRW_BTD_STACK_CALLEE_DATA_SIZE);
-      call_data_addr =
-         brw_nir_rt_load_scratch(&b, BRW_BTD_STACK_CALL_DATA_PTR_OFFSET, 8,
-                                 1, 64);
-      progress = true;
-   }
-
-   gl_shader_stage stage = shader->info.stage;
-   nir_def *hit_attrib_addr = NULL;
-   if (num_ray_hit_attrib_vars > 0) {
-      assert(stage == MESA_SHADER_ANY_HIT ||
-             stage == MESA_SHADER_CLOSEST_HIT ||
-             stage == MESA_SHADER_INTERSECTION);
-      nir_def *hit_addr =
-         brw_nir_rt_mem_hit_addr(&b, stage == MESA_SHADER_CLOSEST_HIT);
-      /* The vec2 barycentrics are in 2nd and 3rd dwords of MemHit */
-      nir_def *bary_addr = nir_iadd_imm(&b, hit_addr, 4);
-      hit_attrib_addr = nir_bcsel(&b, nir_load_leaf_procedural_intel(&b),
-                                      brw_nir_rt_hit_attrib_data_addr(&b),
-                                      bary_addr);
-      progress = true;
-   }
-
-   nir_foreach_block(block, impl) {
-      nir_foreach_instr_safe(instr, block) {
-         if (instr->type != nir_instr_type_deref)
-            continue;
-
-         nir_deref_instr *deref = nir_instr_as_deref(instr);
-         if (nir_deref_mode_is(deref, nir_var_shader_call_data)) {
-            deref->modes = nir_var_function_temp;
-            if (deref->deref_type == nir_deref_type_var) {
-               b.cursor = nir_before_instr(&deref->instr);
-               nir_deref_instr *cast =
-                  nir_build_deref_cast(&b, call_data_addr,
-                                       nir_var_function_temp,
-                                       deref->var->type, 0);
-               nir_def_rewrite_uses(&deref->def,
-                                        &cast->def);
-               nir_instr_remove(&deref->instr);
-               progress = true;
-            }
-         } else if (nir_deref_mode_is(deref, nir_var_ray_hit_attrib)) {
-            deref->modes = nir_var_function_temp;
-            if (deref->deref_type == nir_deref_type_var) {
-               b.cursor = nir_before_instr(&deref->instr);
-               nir_deref_instr *cast =
-                  nir_build_deref_cast(&b, hit_attrib_addr,
-                                       nir_var_function_temp,
-                                       deref->type, 0);
-               nir_def_rewrite_uses(&deref->def,
-                                        &cast->def);
-               nir_instr_remove(&deref->instr);
-               progress = true;
-            }
-         }
-
-         /* We're going to lower all function_temp memory to scratch using
-          * 64-bit addresses.  We need to resize all our derefs first or else
-          * nir_lower_explicit_io will have a fit.
-          */
-         if (nir_deref_mode_is(deref, nir_var_function_temp) &&
-             resize_deref(&b, deref, 1, 64))
-            progress = true;
-      }
-   }
-
-   if (progress) {
-      nir_metadata_preserve(impl, nir_metadata_block_index |
-                                  nir_metadata_dominance);
-   } else {
-      nir_metadata_preserve(impl, nir_metadata_all);
-   }
-
-   return progress;
-}
-
-/** Lowers ray-tracing shader I/O and scratch access
- *
- * SPV_KHR_ray_tracing adds three new types of I/O, each of which need their
- * own bit of special care:
- *
- *  - Shader payload data:  This is represented by the IncomingCallableData
- *    and IncomingRayPayload storage classes which are both represented by
- *    nir_var_call_data in NIR.  There is at most one of these per-shader and
- *    they contain payload data passed down the stack from the parent shader
- *    when it calls executeCallable() or traceRay().  In our implementation,
- *    the actual storage lives in the calling shader's scratch space and we're
- *    passed a pointer to it.
- *
- *  - Hit attribute data:  This is represented by the HitAttribute storage
- *    class in SPIR-V and nir_var_ray_hit_attrib in NIR.  For triangle
- *    geometry, it's supposed to contain two floats which are the barycentric
- *    coordinates.  For AABS/procedural geometry, it contains the hit data
- *    written out by the intersection shader.  In our implementation, it's a
- *    64-bit pointer which points either to the u/v area of the relevant
- *    MemHit data structure or the space right after the HW ray stack entry.
- *
- *  - Shader record buffer data:  This allows read-only access to the data
- *    stored in the SBT right after the bindless shader handles.  It's
- *    effectively a UBO with a magic address.  Coming out of spirv_to_nir,
- *    we get a nir_intrinsic_load_shader_record_ptr which is cast to a
- *    nir_var_mem_global deref and all access happens through that.  The
- *    shader_record_ptr system value is handled in brw_nir_lower_rt_intrinsics
- *    and we assume nir_lower_explicit_io is called elsewhere thanks to
- *    VK_KHR_buffer_device_address so there's really nothing to do here.
- *
- * We also handle lowering any remaining function_temp variables to scratch at
- * this point.  This gets rid of any remaining arrays and also takes care of
- * the sending side of ray payloads where we pass pointers to a function_temp
- * variable down the call stack.
- */
-static void
-lower_rt_io_and_scratch(nir_shader *nir)
-{
-   /* First, we to ensure all the I/O variables have explicit types.  Because
-    * these are shader-internal and don't come in from outside, they don't
-    * have an explicit memory layout and we have to assign them one.
-    */
-   NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
-              nir_var_function_temp |
-              nir_var_shader_call_data |
-              nir_var_ray_hit_attrib,
-              glsl_get_natural_size_align_bytes);
-
-   /* Now patch any derefs to I/O vars */
-   NIR_PASS_V(nir, lower_rt_io_derefs);
-
-   /* Finally, lower any remaining function_temp, mem_constant, or
-    * ray_hit_attrib access to 64-bit global memory access.
-    */
-   NIR_PASS_V(nir, nir_lower_explicit_io,
-              nir_var_function_temp |
-              nir_var_mem_constant |
-              nir_var_ray_hit_attrib,
-              nir_address_format_64bit_global);
-}
-
-static void
-build_terminate_ray(nir_builder *b)
-{
-   nir_def *skip_closest_hit = nir_test_mask(b, nir_load_ray_flags(b),
-      BRW_RT_RAY_FLAG_SKIP_CLOSEST_HIT_SHADER);
-   nir_push_if(b, skip_closest_hit);
-   {
-      /* The shader that calls traceRay() is unable to access any ray hit
-       * information except for that which is explicitly written into the ray
-       * payload by shaders invoked during the trace.  If there's no closest-
-       * hit shader, then accepting the hit has no observable effect; it's
-       * just extra memory traffic for no reason.
-       */
-      brw_nir_btd_return(b);
-      nir_jump(b, nir_jump_halt);
-   }
-   nir_push_else(b, NULL);
-   {
-      /* The closest hit shader is in the same shader group as the any-hit
-       * shader that we're currently in.  We can get the address for its SBT
-       * handle by looking at the shader record pointer and subtracting the
-       * size of a SBT handle.  The BINDLESS_SHADER_RECORD for a closest hit
-       * shader is the first one in the SBT handle.
-       */
-      nir_def *closest_hit =
-         nir_iadd_imm(b, nir_load_shader_record_ptr(b),
-                        -BRW_RT_SBT_HANDLE_SIZE);
-
-      brw_nir_rt_commit_hit(b);
-      brw_nir_btd_spawn(b, closest_hit);
-      nir_jump(b, nir_jump_halt);
-   }
-   nir_pop_if(b, NULL);
-}
-
-/** Lowers away ray walk intrinsics
- *
- * This lowers terminate_ray, ignore_ray_intersection, and the NIR-specific
- * accept_ray_intersection intrinsics to the appropriate Intel-specific
- * intrinsics.
- */
-static bool
-lower_ray_walk_intrinsics(nir_shader *shader,
-                          const struct intel_device_info *devinfo)
-{
-   assert(shader->info.stage == MESA_SHADER_ANY_HIT ||
-          shader->info.stage == MESA_SHADER_INTERSECTION);
-
-   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
-
-   nir_builder b = nir_builder_create(impl);
-
-   bool progress = false;
-   nir_foreach_block_safe(block, impl) {
-      nir_foreach_instr_safe(instr, block) {
-         if (instr->type != nir_instr_type_intrinsic)
-            continue;
-
-         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-
-         switch (intrin->intrinsic) {
-         case nir_intrinsic_ignore_ray_intersection: {
-            b.cursor = nir_instr_remove(&intrin->instr);
-
-            /* We put the newly emitted code inside a dummy if because it's
-             * going to contain a jump instruction and we don't want to deal
-             * with that mess here.  It'll get dealt with by our control-flow
-             * optimization passes.
-             */
-            nir_push_if(&b, nir_imm_true(&b));
-            nir_trace_ray_intel(&b,
-                                nir_load_btd_global_arg_addr_intel(&b),
-                                nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT),
-                                nir_imm_int(&b, GEN_RT_TRACE_RAY_CONTINUE),
-                                .synchronous = false);
-            nir_jump(&b, nir_jump_halt);
-            nir_pop_if(&b, NULL);
-            progress = true;
-            break;
-         }
-
-         case nir_intrinsic_accept_ray_intersection: {
-            b.cursor = nir_instr_remove(&intrin->instr);
-
-            nir_def *terminate = nir_test_mask(&b, nir_load_ray_flags(&b),
-               BRW_RT_RAY_FLAG_TERMINATE_ON_FIRST_HIT);
-            nir_push_if(&b, terminate);
-            {
-               build_terminate_ray(&b);
-            }
-            nir_push_else(&b, NULL);
-            {
-               nir_trace_ray_intel(&b,
-                                   nir_load_btd_global_arg_addr_intel(&b),
-                                   nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT),
-                                   nir_imm_int(&b, GEN_RT_TRACE_RAY_COMMIT),
-                                   .synchronous = false);
-               nir_jump(&b, nir_jump_halt);
-            }
-            nir_pop_if(&b, NULL);
-            progress = true;
-            break;
-         }
-
-         case nir_intrinsic_terminate_ray: {
-            b.cursor = nir_instr_remove(&intrin->instr);
-            build_terminate_ray(&b);
-            progress = true;
-            break;
-         }
-
-         default:
-            break;
-         }
-      }
-   }
-
-   if (progress) {
-      nir_metadata_preserve(impl, nir_metadata_none);
-   } else {
-      nir_metadata_preserve(impl, nir_metadata_all);
-   }
-
-   return progress;
-}
-
-void
-brw_nir_lower_raygen(nir_shader *nir)
-{
-   assert(nir->info.stage == MESA_SHADER_RAYGEN);
-   NIR_PASS_V(nir, brw_nir_lower_shader_returns);
-   lower_rt_io_and_scratch(nir);
-}
-
-void
-brw_nir_lower_any_hit(nir_shader *nir, const struct intel_device_info *devinfo)
-{
-   assert(nir->info.stage == MESA_SHADER_ANY_HIT);
-   NIR_PASS_V(nir, brw_nir_lower_shader_returns);
-   NIR_PASS_V(nir, lower_ray_walk_intrinsics, devinfo);
-   lower_rt_io_and_scratch(nir);
-}
-
-void
-brw_nir_lower_closest_hit(nir_shader *nir)
-{
-   assert(nir->info.stage == MESA_SHADER_CLOSEST_HIT);
-   NIR_PASS_V(nir, brw_nir_lower_shader_returns);
-   lower_rt_io_and_scratch(nir);
-}
-
-void
-brw_nir_lower_miss(nir_shader *nir)
-{
-   assert(nir->info.stage == MESA_SHADER_MISS);
-   NIR_PASS_V(nir, brw_nir_lower_shader_returns);
-   lower_rt_io_and_scratch(nir);
-}
-
-void
-brw_nir_lower_callable(nir_shader *nir)
-{
-   assert(nir->info.stage == MESA_SHADER_CALLABLE);
-   NIR_PASS_V(nir, brw_nir_lower_shader_returns);
-   lower_rt_io_and_scratch(nir);
-}
-
-void
-brw_nir_lower_combined_intersection_any_hit(nir_shader *intersection,
-                                            const nir_shader *any_hit,
-                                            const struct intel_device_info *devinfo)
-{
-   assert(intersection->info.stage == MESA_SHADER_INTERSECTION);
-   assert(any_hit == NULL || any_hit->info.stage == MESA_SHADER_ANY_HIT);
-   NIR_PASS_V(intersection, brw_nir_lower_shader_returns);
-   NIR_PASS_V(intersection, brw_nir_lower_intersection_shader,
-              any_hit, devinfo);
-   NIR_PASS_V(intersection, lower_ray_walk_intrinsics, devinfo);
-   lower_rt_io_and_scratch(intersection);
-}
-
-static nir_def *
-build_load_uniform(nir_builder *b, unsigned offset,
-                   unsigned num_components, unsigned bit_size)
-{
-   return nir_load_uniform(b, num_components, bit_size, nir_imm_int(b, 0),
-                           .base = offset,
-                           .range = num_components * bit_size / 8);
-}
-
-#define load_trampoline_param(b, name, num_components, bit_size) \
-   build_load_uniform((b), offsetof(struct brw_rt_raygen_trampoline_params, name), \
-                      (num_components), (bit_size))
-
-nir_shader *
-brw_nir_create_raygen_trampoline(const struct brw_compiler *compiler,
-                                 void *mem_ctx)
-{
-   const struct intel_device_info *devinfo = compiler->devinfo;
-   const nir_shader_compiler_options *nir_options =
-      compiler->nir_options[MESA_SHADER_COMPUTE];
-
-   STATIC_ASSERT(sizeof(struct brw_rt_raygen_trampoline_params) == 32);
-
-   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
-                                                  nir_options,
-                                                  "RT Ray-Gen Trampoline");
-   ralloc_steal(mem_ctx, b.shader);
-
-   b.shader->info.workgroup_size_variable = true;
-
-   /* The RT global data and raygen BINDLESS_SHADER_RECORD addresses are
-    * passed in as push constants in the first register.  We deal with the
-    * raygen BSR address here; the global data we'll deal with later.
-    */
-   b.shader->num_uniforms = 32;
-   nir_def *raygen_param_bsr_addr =
-      load_trampoline_param(&b, raygen_bsr_addr, 1, 64);
-   nir_def *is_indirect =
-      nir_i2b(&b, load_trampoline_param(&b, is_indirect, 1, 8));
-   nir_def *local_shift =
-      nir_u2u32(&b, load_trampoline_param(&b, local_group_size_log2, 3, 8));
-
-   nir_def *raygen_indirect_bsr_addr;
-   nir_push_if(&b, is_indirect);
-   {
-      raygen_indirect_bsr_addr =
-         nir_load_global_constant(&b, raygen_param_bsr_addr,
-                                  8 /* align */,
-                                  1 /* components */,
-                                  64 /* bit_size */);
-   }
-   nir_pop_if(&b, NULL);
-
-   nir_def *raygen_bsr_addr =
-      nir_if_phi(&b, raygen_indirect_bsr_addr, raygen_param_bsr_addr);
-
-   nir_def *global_id = nir_load_workgroup_id_zero_base(&b);
-   nir_def *simd_channel = nir_load_subgroup_invocation(&b);
-   nir_def *local_x =
-      nir_ubfe(&b, simd_channel, nir_imm_int(&b, 0),
-                  nir_channel(&b, local_shift, 0));
-   nir_def *local_y =
-      nir_ubfe(&b, simd_channel, nir_channel(&b, local_shift, 0),
-                  nir_channel(&b, local_shift, 1));
-   nir_def *local_z =
-      nir_ubfe(&b, simd_channel,
-                  nir_iadd(&b, nir_channel(&b, local_shift, 0),
-                              nir_channel(&b, local_shift, 1)),
-                  nir_channel(&b, local_shift, 2));
-   nir_def *launch_id =
-      nir_iadd(&b, nir_ishl(&b, global_id, local_shift),
-                  nir_vec3(&b, local_x, local_y, local_z));
-
-   nir_def *launch_size = nir_load_ray_launch_size(&b);
-   nir_push_if(&b, nir_ball(&b, nir_ult(&b, launch_id, launch_size)));
-   {
-      nir_store_global(&b, brw_nir_rt_sw_hotzone_addr(&b, devinfo), 16,
-                       nir_vec4(&b, nir_imm_int(&b, 0), /* Stack ptr */
-                                    nir_channel(&b, launch_id, 0),
-                                    nir_channel(&b, launch_id, 1),
-                                    nir_channel(&b, launch_id, 2)),
-                       0xf /* write mask */);
-
-      brw_nir_btd_spawn(&b, raygen_bsr_addr);
-   }
-   nir_push_else(&b, NULL);
-   {
-      /* Even though these invocations aren't being used for anything, the
-       * hardware allocated stack IDs for them.  They need to retire them.
-       */
-      brw_nir_btd_retire(&b);
-   }
-   nir_pop_if(&b, NULL);
-
-   nir_shader *nir = b.shader;
-   nir->info.name = ralloc_strdup(nir, "RT: TraceRay trampoline");
-   nir_validate_shader(nir, "in brw_nir_create_raygen_trampoline");
-
-   struct brw_nir_compiler_opts opts = {};
-   brw_preprocess_nir(compiler, nir, &opts);
-
-   NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);
-
-   b = nir_builder_create(nir_shader_get_entrypoint(b.shader));
-   /* brw_nir_lower_rt_intrinsics will leave us with a btd_global_arg_addr
-    * intrinsic which doesn't exist in compute shaders.  We also created one
-    * above when we generated the BTD spawn intrinsic.  Now we go through and
-    * replace them with a uniform load.
-    */
-   nir_foreach_block(block, b.impl) {
-      nir_foreach_instr_safe(instr, block) {
-         if (instr->type != nir_instr_type_intrinsic)
-            continue;
-
-         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-         if (intrin->intrinsic != nir_intrinsic_load_btd_global_arg_addr_intel)
-            continue;
-
-         b.cursor = nir_before_instr(&intrin->instr);
-         nir_def *global_arg_addr =
-            load_trampoline_param(&b, rt_disp_globals_addr, 1, 64);
-         nir_def_rewrite_uses(&intrin->def,
-                                  global_arg_addr);
-         nir_instr_remove(instr);
-      }
-   }
-
-   NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
-
-   const bool is_scalar = true;
-   brw_nir_optimize(nir, is_scalar, devinfo);
-
-   return nir;
-}
--- a/src/intel/compiler/elk/brw_nir_rt.h
+++ b/src/intel/compiler/elk/brw_nir_rt.h
@ -1,76 +0,0 @@
-/*
- * Copyright © 2020 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_NIR_RT_H
-#define BRW_NIR_RT_H
-
-#include "brw_nir.h"
-#include "brw_rt.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void brw_nir_lower_raygen(nir_shader *nir);
-void brw_nir_lower_any_hit(nir_shader *nir,
-                           const struct intel_device_info *devinfo);
-void brw_nir_lower_closest_hit(nir_shader *nir);
-void brw_nir_lower_miss(nir_shader *nir);
-void brw_nir_lower_callable(nir_shader *nir);
-void brw_nir_lower_combined_intersection_any_hit(nir_shader *intersection,
-                                                 const nir_shader *any_hit,
-                                                 const struct intel_device_info *devinfo);
-
-/* We reserve the first 16B of the stack for callee data pointers */
-#define BRW_BTD_STACK_RESUME_BSR_ADDR_OFFSET 0
-#define BRW_BTD_STACK_CALL_DATA_PTR_OFFSET 8
-#define BRW_BTD_STACK_CALLEE_DATA_SIZE 16
-
-/* We require the stack to be 8B aligned at the start of a shader */
-#define BRW_BTD_STACK_ALIGN 8
-
-bool brw_nir_lower_ray_queries(nir_shader *shader,
-                               const struct intel_device_info *devinfo);
-
-void brw_nir_lower_shader_returns(nir_shader *shader);
-
-bool brw_nir_lower_shader_calls(nir_shader *shader, struct brw_bs_prog_key *key);
-
-void brw_nir_lower_rt_intrinsics(nir_shader *shader,
-                                 const struct intel_device_info *devinfo);
-void brw_nir_lower_intersection_shader(nir_shader *intersection,
-                                       const nir_shader *any_hit,
-                                       const struct intel_device_info *devinfo);
-
-nir_shader *
-brw_nir_create_raygen_trampoline(const struct brw_compiler *compiler,
-                                 void *mem_ctx);
-nir_shader *
-brw_nir_create_trivial_return_shader(const struct brw_compiler *compiler,
-                                     void *mem_ctx);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* BRW_NIR_RT_H */
--- a/src/intel/compiler/elk/brw_nir_rt_builder.h
+++ b/src/intel/compiler/elk/brw_nir_rt_builder.h
@ -1,990 +0,0 @@
-/*
- * Copyright © 2020 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_NIR_RT_BUILDER_H
-#define BRW_NIR_RT_BUILDER_H
-
-/* This file provides helpers to access memory based data structures that the
- * RT hardware reads/writes and their locations.
- *
- * See also "Memory Based Data Structures for Ray Tracing" (BSpec 47547) and
- * "Ray Tracing Address Computation for Memory Resident Structures" (BSpec
- * 47550).
- */
-
-#include "brw_rt.h"
-#include "nir_builder.h"
-
-#define is_access_for_builder(b) \
-   ((b)->shader->info.stage == MESA_SHADER_FRAGMENT ? \
-    ACCESS_INCLUDE_HELPERS : 0)
-
-static inline nir_def *
-brw_nir_rt_load(nir_builder *b, nir_def *addr, unsigned align,
-                unsigned components, unsigned bit_size)
-{
-   return nir_build_load_global(b, components, bit_size, addr,
-                                .align_mul = align,
-                                .access = is_access_for_builder(b));
-}
-
-static inline void
-brw_nir_rt_store(nir_builder *b, nir_def *addr, unsigned align,
-                 nir_def *value, unsigned write_mask)
-{
-   nir_build_store_global(b, value, addr,
-                          .align_mul = align,
-                          .write_mask = (write_mask) &
-                                        BITFIELD_MASK(value->num_components),
-                          .access = is_access_for_builder(b));
-}
-
-static inline nir_def *
-brw_nir_rt_load_const(nir_builder *b, unsigned components,
-                      nir_def *addr, nir_def *pred)
-{
-   return nir_load_global_const_block_intel(b, components, addr, pred);
-}
-
-static inline nir_def *
-brw_load_btd_dss_id(nir_builder *b)
-{
-   return nir_load_topology_id_intel(b, .base = BRW_TOPOLOGY_ID_DSS);
-}
-
-static inline nir_def *
-brw_nir_rt_load_num_simd_lanes_per_dss(nir_builder *b,
-                                       const struct intel_device_info *devinfo)
-{
-   return nir_imm_int(b, devinfo->num_thread_per_eu *
-                         devinfo->max_eus_per_subslice *
-                         16 /* The RT computation is based off SIMD16 */);
-}
-
-static inline nir_def *
-brw_load_eu_thread_simd(nir_builder *b)
-{
-   return nir_load_topology_id_intel(b, .base = BRW_TOPOLOGY_ID_EU_THREAD_SIMD);
-}
-
-static inline nir_def *
-brw_nir_rt_async_stack_id(nir_builder *b)
-{
-   return nir_iadd(b, nir_umul_32x16(b, nir_load_ray_num_dss_rt_stacks_intel(b),
-                                        brw_load_btd_dss_id(b)),
-                      nir_load_btd_stack_id_intel(b));
-}
-
-static inline nir_def *
-brw_nir_rt_sync_stack_id(nir_builder *b)
-{
-   return brw_load_eu_thread_simd(b);
-}
-
-/* We have our own load/store scratch helpers because they emit a global
- * memory read or write based on the scratch_base_ptr system value rather
- * than a load/store_scratch intrinsic.
- */
-static inline nir_def *
-brw_nir_rt_load_scratch(nir_builder *b, uint32_t offset, unsigned align,
-                        unsigned num_components, unsigned bit_size)
-{
-   nir_def *addr =
-      nir_iadd_imm(b, nir_load_scratch_base_ptr(b, 1, 64, 1), offset);
-   return brw_nir_rt_load(b, addr, MIN2(align, BRW_BTD_STACK_ALIGN),
-                             num_components, bit_size);
-}
-
-static inline void
-brw_nir_rt_store_scratch(nir_builder *b, uint32_t offset, unsigned align,
-                         nir_def *value, nir_component_mask_t write_mask)
-{
-   nir_def *addr =
-      nir_iadd_imm(b, nir_load_scratch_base_ptr(b, 1, 64, 1), offset);
-   brw_nir_rt_store(b, addr, MIN2(align, BRW_BTD_STACK_ALIGN),
-                    value, write_mask);
-}
-
-static inline void
-brw_nir_btd_spawn(nir_builder *b, nir_def *record_addr)
-{
-   nir_btd_spawn_intel(b, nir_load_btd_global_arg_addr_intel(b), record_addr);
-}
-
-static inline void
-brw_nir_btd_retire(nir_builder *b)
-{
-   nir_btd_retire_intel(b);
-}
-
-/** This is a pseudo-op which does a bindless return
- *
- * It loads the return address from the stack and calls btd_spawn to spawn the
- * resume shader.
- */
-static inline void
-brw_nir_btd_return(struct nir_builder *b)
-{
-   nir_def *resume_addr =
-      brw_nir_rt_load_scratch(b, BRW_BTD_STACK_RESUME_BSR_ADDR_OFFSET,
-                              8 /* align */, 1, 64);
-   brw_nir_btd_spawn(b, resume_addr);
-}
-
-static inline void
-assert_def_size(nir_def *def, unsigned num_components, unsigned bit_size)
-{
-   assert(def->num_components == num_components);
-   assert(def->bit_size == bit_size);
-}
-
-static inline nir_def *
-brw_nir_num_rt_stacks(nir_builder *b,
-                      const struct intel_device_info *devinfo)
-{
-   return nir_imul_imm(b, nir_load_ray_num_dss_rt_stacks_intel(b),
-                          intel_device_info_dual_subslice_id_bound(devinfo));
-}
-
-static inline nir_def *
-brw_nir_rt_sw_hotzone_addr(nir_builder *b,
-                           const struct intel_device_info *devinfo)
-{
-   nir_def *offset32 =
-      nir_imul_imm(b, brw_nir_rt_async_stack_id(b),
-                      BRW_RT_SIZEOF_HOTZONE);
-
-   offset32 = nir_iadd(b, offset32, nir_ineg(b,
-      nir_imul_imm(b, brw_nir_num_rt_stacks(b, devinfo),
-                      BRW_RT_SIZEOF_HOTZONE)));
-
-   return nir_iadd(b, nir_load_ray_base_mem_addr_intel(b),
-                      nir_i2i64(b, offset32));
-}
-
-static inline nir_def *
-brw_nir_rt_sync_stack_addr(nir_builder *b,
-                           nir_def *base_mem_addr,
-                           const struct intel_device_info *devinfo)
-{
-   /* For Ray queries (Synchronous Ray Tracing), the formula is similar but
-    * goes down from rtMemBasePtr :
-    *
-    *    syncBase  = RTDispatchGlobals.rtMemBasePtr
-    *              - (DSSID * NUM_SIMD_LANES_PER_DSS + SyncStackID + 1)
-    *              * syncStackSize
-    *
-    * We assume that we can calculate a 32-bit offset first and then add it
-    * to the 64-bit base address at the end.
-    */
-   nir_def *offset32 =
-      nir_imul(b,
-               nir_iadd(b,
-                        nir_imul(b, brw_load_btd_dss_id(b),
-                                    brw_nir_rt_load_num_simd_lanes_per_dss(b, devinfo)),
-                        nir_iadd_imm(b, brw_nir_rt_sync_stack_id(b), 1)),
-               nir_imm_int(b, BRW_RT_SIZEOF_RAY_QUERY));
-   return nir_isub(b, base_mem_addr, nir_u2u64(b, offset32));
-}
-
-static inline nir_def *
-brw_nir_rt_stack_addr(nir_builder *b)
-{
-   /* From the BSpec "Address Computation for Memory Based Data Structures:
-    * Ray and TraversalStack (Async Ray Tracing)":
-    *
-    *    stackBase = RTDispatchGlobals.rtMemBasePtr
-    *              + (DSSID * RTDispatchGlobals.numDSSRTStacks + stackID)
-    *              * RTDispatchGlobals.stackSizePerRay // 64B aligned
-    *
-    * We assume that we can calculate a 32-bit offset first and then add it
-    * to the 64-bit base address at the end.
-    */
-   nir_def *offset32 =
-      nir_imul(b, brw_nir_rt_async_stack_id(b),
-                  nir_load_ray_hw_stack_size_intel(b));
-   return nir_iadd(b, nir_load_ray_base_mem_addr_intel(b),
-                      nir_u2u64(b, offset32));
-}
-
-static inline nir_def *
-brw_nir_rt_mem_hit_addr_from_addr(nir_builder *b,
-                        nir_def *stack_addr,
-                        bool committed)
-{
-   return nir_iadd_imm(b, stack_addr, committed ? 0 : BRW_RT_SIZEOF_HIT_INFO);
-}
-
-static inline nir_def *
-brw_nir_rt_mem_hit_addr(nir_builder *b, bool committed)
-{
-   return nir_iadd_imm(b, brw_nir_rt_stack_addr(b),
-                          committed ? 0 : BRW_RT_SIZEOF_HIT_INFO);
-}
-
-static inline nir_def *
-brw_nir_rt_hit_attrib_data_addr(nir_builder *b)
-{
-   return nir_iadd_imm(b, brw_nir_rt_stack_addr(b),
-                          BRW_RT_OFFSETOF_HIT_ATTRIB_DATA);
-}
-
-static inline nir_def *
-brw_nir_rt_mem_ray_addr(nir_builder *b,
-                        nir_def *stack_addr,
-                        enum brw_rt_bvh_level bvh_level)
-{
-   /* From the BSpec "Address Computation for Memory Based Data Structures:
-    * Ray and TraversalStack (Async Ray Tracing)":
-    *
-    *    rayBase = stackBase + sizeof(HitInfo) * 2 // 64B aligned
-    *    rayPtr  = rayBase + bvhLevel * sizeof(Ray); // 64B aligned
-    *
-    * In Vulkan, we always have exactly two levels of BVH: World and Object.
-    */
-   uint32_t offset = BRW_RT_SIZEOF_HIT_INFO * 2 +
-                     bvh_level * BRW_RT_SIZEOF_RAY;
-   return nir_iadd_imm(b, stack_addr, offset);
-}
-
-static inline nir_def *
-brw_nir_rt_sw_stack_addr(nir_builder *b,
-                         const struct intel_device_info *devinfo)
-{
-   nir_def *addr = nir_load_ray_base_mem_addr_intel(b);
-
-   nir_def *offset32 = nir_imul(b, brw_nir_num_rt_stacks(b, devinfo),
-                                       nir_load_ray_hw_stack_size_intel(b));
-   addr = nir_iadd(b, addr, nir_u2u64(b, offset32));
-
-   nir_def *offset_in_stack =
-      nir_imul(b, nir_u2u64(b, brw_nir_rt_async_stack_id(b)),
-                  nir_u2u64(b, nir_load_ray_sw_stack_size_intel(b)));
-
-   return nir_iadd(b, addr, offset_in_stack);
-}
-
-static inline nir_def *
-nir_unpack_64_4x16_split_z(nir_builder *b, nir_def *val)
-{
-   return nir_unpack_32_2x16_split_x(b, nir_unpack_64_2x32_split_y(b, val));
-}
-
-struct brw_nir_rt_globals_defs {
-   nir_def *base_mem_addr;
-   nir_def *call_stack_handler_addr;
-   nir_def *hw_stack_size;
-   nir_def *num_dss_rt_stacks;
-   nir_def *hit_sbt_addr;
-   nir_def *hit_sbt_stride;
-   nir_def *miss_sbt_addr;
-   nir_def *miss_sbt_stride;
-   nir_def *sw_stack_size;
-   nir_def *launch_size;
-   nir_def *call_sbt_addr;
-   nir_def *call_sbt_stride;
-   nir_def *resume_sbt_addr;
-};
-
-static inline void
-brw_nir_rt_load_globals_addr(nir_builder *b,
-                             struct brw_nir_rt_globals_defs *defs,
-                             nir_def *addr)
-{
-   nir_def *data;
-   data = brw_nir_rt_load_const(b, 16, addr, nir_imm_true(b));
-   defs->base_mem_addr = nir_pack_64_2x32(b, nir_trim_vector(b, data, 2));
-
-   defs->call_stack_handler_addr =
-      nir_pack_64_2x32(b, nir_channels(b, data, 0x3 << 2));
-
-   defs->hw_stack_size = nir_channel(b, data, 4);
-   defs->num_dss_rt_stacks = nir_iand_imm(b, nir_channel(b, data, 5), 0xffff);
-   defs->hit_sbt_addr =
-      nir_pack_64_2x32_split(b, nir_channel(b, data, 8),
-                                nir_extract_i16(b, nir_channel(b, data, 9),
-                                                   nir_imm_int(b, 0)));
-   defs->hit_sbt_stride =
-      nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 9));
-   defs->miss_sbt_addr =
-      nir_pack_64_2x32_split(b, nir_channel(b, data, 10),
-                                nir_extract_i16(b, nir_channel(b, data, 11),
-                                                   nir_imm_int(b, 0)));
-   defs->miss_sbt_stride =
-      nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 11));
-   defs->sw_stack_size = nir_channel(b, data, 12);
-   defs->launch_size = nir_channels(b, data, 0x7u << 13);
-
-   data = brw_nir_rt_load_const(b, 8, nir_iadd_imm(b, addr, 64), nir_imm_true(b));
-   defs->call_sbt_addr =
-      nir_pack_64_2x32_split(b, nir_channel(b, data, 0),
-                                nir_extract_i16(b, nir_channel(b, data, 1),
-                                                   nir_imm_int(b, 0)));
-   defs->call_sbt_stride =
-      nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 1));
-
-   defs->resume_sbt_addr =
-      nir_pack_64_2x32(b, nir_channels(b, data, 0x3 << 2));
-}
-
-static inline void
-brw_nir_rt_load_globals(nir_builder *b,
-                        struct brw_nir_rt_globals_defs *defs)
-{
-   brw_nir_rt_load_globals_addr(b, defs, nir_load_btd_global_arg_addr_intel(b));
-}
-
-static inline nir_def *
-brw_nir_rt_unpack_leaf_ptr(nir_builder *b, nir_def *vec2)
-{
-   /* Hit record leaf pointers are 42-bit and assumed to be in 64B chunks.
-    * This leaves 22 bits at the top for other stuff.
-    */
-   nir_def *ptr64 = nir_imul_imm(b, nir_pack_64_2x32(b, vec2), 64);
-
-   /* The top 16 bits (remember, we shifted by 6 already) contain garbage
-    * that we need to get rid of.
-    */
-   nir_def *ptr_lo = nir_unpack_64_2x32_split_x(b, ptr64);
-   nir_def *ptr_hi = nir_unpack_64_2x32_split_y(b, ptr64);
-   ptr_hi = nir_extract_i16(b, ptr_hi, nir_imm_int(b, 0));
-   return nir_pack_64_2x32_split(b, ptr_lo, ptr_hi);
-}
-
-/**
- * MemHit memory layout (BSpec 47547) :
- *
- *      name            bits    description
- *    - t               32      hit distance of current hit (or initial traversal distance)
- *    - u               32      barycentric hit coordinates
- *    - v               32      barycentric hit coordinates
- *    - primIndexDelta  16      prim index delta for compressed meshlets and quads
- *    - valid            1      set if there is a hit
- *    - leafType         3      type of node primLeafPtr is pointing to
- *    - primLeafIndex    4      index of the hit primitive inside the leaf
- *    - bvhLevel         3      the instancing level at which the hit occured
- *    - frontFace        1      whether we hit the front-facing side of a triangle (also used to pass opaque flag when calling intersection shaders)
- *    - pad0             4      unused bits
- *    - primLeafPtr     42      pointer to BVH leaf node (multiple of 64 bytes)
- *    - hitGroupRecPtr0 22      LSB of hit group record of the hit triangle (multiple of 16 bytes)
- *    - instLeafPtr     42      pointer to BVH instance leaf node (in multiple of 64 bytes)
- *    - hitGroupRecPtr1 22      MSB of hit group record of the hit triangle (multiple of 32 bytes)
- */
-struct brw_nir_rt_mem_hit_defs {
-   nir_def *t;
-   nir_def *tri_bary; /**< Only valid for triangle geometry */
-   nir_def *aabb_hit_kind; /**< Only valid for AABB geometry */
-   nir_def *valid;
-   nir_def *leaf_type;
-   nir_def *prim_index_delta;
-   nir_def *prim_leaf_index;
-   nir_def *bvh_level;
-   nir_def *front_face;
-   nir_def *done; /**< Only for ray queries */
-   nir_def *prim_leaf_ptr;
-   nir_def *inst_leaf_ptr;
-};
-
-static inline void
-brw_nir_rt_load_mem_hit_from_addr(nir_builder *b,
-                                  struct brw_nir_rt_mem_hit_defs *defs,
-                                  nir_def *stack_addr,
-                                  bool committed)
-{
-   nir_def *hit_addr =
-      brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, committed);
-
-   nir_def *data = brw_nir_rt_load(b, hit_addr, 16, 4, 32);
-   defs->t = nir_channel(b, data, 0);
-   defs->aabb_hit_kind = nir_channel(b, data, 1);
-   defs->tri_bary = nir_channels(b, data, 0x6);
-   nir_def *bitfield = nir_channel(b, data, 3);
-   defs->prim_index_delta =
-      nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 0), nir_imm_int(b, 16));
-   defs->valid = nir_i2b(b, nir_iand_imm(b, bitfield, 1u << 16));
-   defs->leaf_type =
-      nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 17), nir_imm_int(b, 3));
-   defs->prim_leaf_index =
-      nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 20), nir_imm_int(b, 4));
-   defs->bvh_level =
-      nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 24), nir_imm_int(b, 3));
-   defs->front_face = nir_i2b(b, nir_iand_imm(b, bitfield, 1 << 27));
-   defs->done = nir_i2b(b, nir_iand_imm(b, bitfield, 1 << 28));
-
-   data = brw_nir_rt_load(b, nir_iadd_imm(b, hit_addr, 16), 16, 4, 32);
-   defs->prim_leaf_ptr =
-      brw_nir_rt_unpack_leaf_ptr(b, nir_channels(b, data, 0x3 << 0));
-   defs->inst_leaf_ptr =
-      brw_nir_rt_unpack_leaf_ptr(b, nir_channels(b, data, 0x3 << 2));
-}
-
-static inline void
-brw_nir_rt_load_mem_hit(nir_builder *b,
-                        struct brw_nir_rt_mem_hit_defs *defs,
-                        bool committed)
-{
-   brw_nir_rt_load_mem_hit_from_addr(b, defs, brw_nir_rt_stack_addr(b),
-                                     committed);
-}
-
-static inline void
-brw_nir_memcpy_global(nir_builder *b,
-                      nir_def *dst_addr, uint32_t dst_align,
-                      nir_def *src_addr, uint32_t src_align,
-                      uint32_t size)
-{
-   /* We're going to copy in 16B chunks */
-   assert(size % 16 == 0);
-   dst_align = MIN2(dst_align, 16);
-   src_align = MIN2(src_align, 16);
-
-   for (unsigned offset = 0; offset < size; offset += 16) {
-      nir_def *data =
-         brw_nir_rt_load(b, nir_iadd_imm(b, src_addr, offset), 16,
-                         4, 32);
-      brw_nir_rt_store(b, nir_iadd_imm(b, dst_addr, offset), 16,
-                       data, 0xf /* write_mask */);
-   }
-}
-
-static inline void
-brw_nir_memclear_global(nir_builder *b,
-                        nir_def *dst_addr, uint32_t dst_align,
-                        uint32_t size)
-{
-   /* We're going to copy in 16B chunks */
-   assert(size % 16 == 0);
-   dst_align = MIN2(dst_align, 16);
-
-   nir_def *zero = nir_imm_ivec4(b, 0, 0, 0, 0);
-   for (unsigned offset = 0; offset < size; offset += 16) {
-      brw_nir_rt_store(b, nir_iadd_imm(b, dst_addr, offset), dst_align,
-                       zero, 0xf /* write_mask */);
-   }
-}
-
-static inline nir_def *
-brw_nir_rt_query_done(nir_builder *b, nir_def *stack_addr)
-{
-   struct brw_nir_rt_mem_hit_defs hit_in = {};
-   brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, stack_addr,
-                                     false /* committed */);
-
-   return hit_in.done;
-}
-
-static inline void
-brw_nir_rt_set_dword_bit_at(nir_builder *b,
-                            nir_def *addr,
-                            uint32_t addr_offset,
-                            uint32_t bit)
-{
-   nir_def *dword_addr = nir_iadd_imm(b, addr, addr_offset);
-   nir_def *dword = brw_nir_rt_load(b, dword_addr, 4, 1, 32);
-   brw_nir_rt_store(b, dword_addr, 4, nir_ior_imm(b, dword, 1u << bit), 0x1);
-}
-
-static inline void
-brw_nir_rt_query_mark_done(nir_builder *b, nir_def *stack_addr)
-{
-   brw_nir_rt_set_dword_bit_at(b,
-                               brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr,
-                                                                 false /* committed */),
-                               4 * 3 /* dword offset */, 28 /* bit */);
-}
-
-/* This helper clears the 3rd dword of the MemHit structure where the valid
- * bit is located.
- */
-static inline void
-brw_nir_rt_query_mark_init(nir_builder *b, nir_def *stack_addr)
-{
-   nir_def *dword_addr;
-
-   for (uint32_t i = 0; i < 2; i++) {
-      dword_addr =
-         nir_iadd_imm(b,
-                      brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr,
-                                                        i == 0 /* committed */),
-                      4 * 3 /* dword offset */);
-      brw_nir_rt_store(b, dword_addr, 4, nir_imm_int(b, 0), 0x1);
-   }
-}
-
-/* This helper is pretty much a memcpy of uncommitted into committed hit
- * structure, just adding the valid bit.
- */
-static inline void
-brw_nir_rt_commit_hit_addr(nir_builder *b, nir_def *stack_addr)
-{
-   nir_def *dst_addr =
-      brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, true /* committed */);
-   nir_def *src_addr =
-      brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, false /* committed */);
-
-   for (unsigned offset = 0; offset < BRW_RT_SIZEOF_HIT_INFO; offset += 16) {
-      nir_def *data =
-         brw_nir_rt_load(b, nir_iadd_imm(b, src_addr, offset), 16, 4, 32);
-
-      if (offset == 0) {
-         data = nir_vec4(b,
-                         nir_channel(b, data, 0),
-                         nir_channel(b, data, 1),
-                         nir_channel(b, data, 2),
-                         nir_ior_imm(b,
-                                     nir_channel(b, data, 3),
-                                     0x1 << 16 /* valid */));
-
-         /* Also write the potential hit as we change it. */
-         brw_nir_rt_store(b, nir_iadd_imm(b, src_addr, offset), 16,
-                          data, 0xf /* write_mask */);
-      }
-
-      brw_nir_rt_store(b, nir_iadd_imm(b, dst_addr, offset), 16,
-                       data, 0xf /* write_mask */);
-   }
-}
-
-static inline void
-brw_nir_rt_commit_hit(nir_builder *b)
-{
-   nir_def *stack_addr = brw_nir_rt_stack_addr(b);
-   brw_nir_rt_commit_hit_addr(b, stack_addr);
-}
-
-static inline void
-brw_nir_rt_generate_hit_addr(nir_builder *b, nir_def *stack_addr, nir_def *t_val)
-{
-   nir_def *committed_addr =
-      brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, true /* committed */);
-   nir_def *potential_addr =
-      brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, false /* committed */);
-
-   /* Set:
-    *
-    *   potential.t     = t_val;
-    *   potential.valid = true;
-    */
-   nir_def *potential_hit_dwords_0_3 =
-      brw_nir_rt_load(b, potential_addr, 16, 4, 32);
-   potential_hit_dwords_0_3 =
-      nir_vec4(b,
-               t_val,
-               nir_channel(b, potential_hit_dwords_0_3, 1),
-               nir_channel(b, potential_hit_dwords_0_3, 2),
-               nir_ior_imm(b, nir_channel(b, potential_hit_dwords_0_3, 3),
-                           (0x1 << 16) /* valid */));
-   brw_nir_rt_store(b, potential_addr, 16, potential_hit_dwords_0_3, 0xf /* write_mask */);
-
-   /* Set:
-    *
-    *   committed.t               = t_val;
-    *   committed.u               = 0.0f;
-    *   committed.v               = 0.0f;
-    *   committed.valid           = true;
-    *   committed.leaf_type       = potential.leaf_type;
-    *   committed.bvh_level       = BRW_RT_BVH_LEVEL_OBJECT;
-    *   committed.front_face      = false;
-    *   committed.prim_leaf_index = 0;
-    *   committed.done            = false;
-    */
-   nir_def *committed_hit_dwords_0_3 =
-      brw_nir_rt_load(b, committed_addr, 16, 4, 32);
-   committed_hit_dwords_0_3 =
-      nir_vec4(b,
-               t_val,
-               nir_imm_float(b, 0.0f),
-               nir_imm_float(b, 0.0f),
-               nir_ior_imm(b,
-                           nir_ior_imm(b, nir_channel(b, potential_hit_dwords_0_3, 3), 0x000e0000),
-                           (0x1 << 16)                     /* valid */ |
-                           (BRW_RT_BVH_LEVEL_OBJECT << 24) /* leaf_type */));
-   brw_nir_rt_store(b, committed_addr, 16, committed_hit_dwords_0_3, 0xf /* write_mask */);
-
-   /* Set:
-    *
-    *   committed.prim_leaf_ptr   = potential.prim_leaf_ptr;
-    *   committed.inst_leaf_ptr   = potential.inst_leaf_ptr;
-    */
-   brw_nir_memcpy_global(b,
-                         nir_iadd_imm(b, committed_addr, 16), 16,
-                         nir_iadd_imm(b, potential_addr, 16), 16,
-                         16);
-}
-
-struct brw_nir_rt_mem_ray_defs {
-   nir_def *orig;
-   nir_def *dir;
-   nir_def *t_near;
-   nir_def *t_far;
-   nir_def *root_node_ptr;
-   nir_def *ray_flags;
-   nir_def *hit_group_sr_base_ptr;
-   nir_def *hit_group_sr_stride;
-   nir_def *miss_sr_ptr;
-   nir_def *shader_index_multiplier;
-   nir_def *inst_leaf_ptr;
-   nir_def *ray_mask;
-};
-
-static inline void
-brw_nir_rt_store_mem_ray_query_at_addr(nir_builder *b,
-                                       nir_def *ray_addr,
-                                       const struct brw_nir_rt_mem_ray_defs *defs)
-{
-   assert_def_size(defs->orig, 3, 32);
-   assert_def_size(defs->dir, 3, 32);
-   brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 0), 16,
-      nir_vec4(b, nir_channel(b, defs->orig, 0),
-                  nir_channel(b, defs->orig, 1),
-                  nir_channel(b, defs->orig, 2),
-                  nir_channel(b, defs->dir, 0)),
-      ~0 /* write mask */);
-
-   assert_def_size(defs->t_near, 1, 32);
-   assert_def_size(defs->t_far, 1, 32);
-   brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 16), 16,
-      nir_vec4(b, nir_channel(b, defs->dir, 1),
-                  nir_channel(b, defs->dir, 2),
-                  defs->t_near,
-                  defs->t_far),
-      ~0 /* write mask */);
-
-   assert_def_size(defs->root_node_ptr, 1, 64);
-   assert_def_size(defs->ray_flags, 1, 16);
-   brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 32), 16,
-      nir_vec2(b, nir_unpack_64_2x32_split_x(b, defs->root_node_ptr),
-                  nir_pack_32_2x16_split(b,
-                     nir_unpack_64_4x16_split_z(b, defs->root_node_ptr),
-                     defs->ray_flags)),
-      0x3 /* write mask */);
-
-   /* leaf_ptr is optional */
-   nir_def *inst_leaf_ptr;
-   if (defs->inst_leaf_ptr) {
-      inst_leaf_ptr = defs->inst_leaf_ptr;
-   } else {
-      inst_leaf_ptr = nir_imm_int64(b, 0);
-   }
-
-   assert_def_size(inst_leaf_ptr, 1, 64);
-   assert_def_size(defs->ray_mask, 1, 32);
-   brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 56), 8,
-      nir_vec2(b, nir_unpack_64_2x32_split_x(b, inst_leaf_ptr),
-                  nir_pack_32_2x16_split(b,
-                     nir_unpack_64_4x16_split_z(b, inst_leaf_ptr),
-                     nir_unpack_32_2x16_split_x(b, defs->ray_mask))),
-      ~0 /* write mask */);
-}
-
-static inline void
-brw_nir_rt_store_mem_ray(nir_builder *b,
-                         const struct brw_nir_rt_mem_ray_defs *defs,
-                         enum brw_rt_bvh_level bvh_level)
-{
-   nir_def *ray_addr =
-      brw_nir_rt_mem_ray_addr(b, brw_nir_rt_stack_addr(b), bvh_level);
-
-   assert_def_size(defs->orig, 3, 32);
-   assert_def_size(defs->dir, 3, 32);
-   brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 0), 16,
-      nir_vec4(b, nir_channel(b, defs->orig, 0),
-                  nir_channel(b, defs->orig, 1),
-                  nir_channel(b, defs->orig, 2),
-                  nir_channel(b, defs->dir, 0)),
-      ~0 /* write mask */);
-
-   assert_def_size(defs->t_near, 1, 32);
-   assert_def_size(defs->t_far, 1, 32);
-   brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 16), 16,
-      nir_vec4(b, nir_channel(b, defs->dir, 1),
-                  nir_channel(b, defs->dir, 2),
-                  defs->t_near,
-                  defs->t_far),
-      ~0 /* write mask */);
-
-   assert_def_size(defs->root_node_ptr, 1, 64);
-   assert_def_size(defs->ray_flags, 1, 16);
-   assert_def_size(defs->hit_group_sr_base_ptr, 1, 64);
-   assert_def_size(defs->hit_group_sr_stride, 1, 16);
-   brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 32), 16,
-      nir_vec4(b, nir_unpack_64_2x32_split_x(b, defs->root_node_ptr),
-                  nir_pack_32_2x16_split(b,
-                     nir_unpack_64_4x16_split_z(b, defs->root_node_ptr),
-                     defs->ray_flags),
-                  nir_unpack_64_2x32_split_x(b, defs->hit_group_sr_base_ptr),
-                  nir_pack_32_2x16_split(b,
-                     nir_unpack_64_4x16_split_z(b, defs->hit_group_sr_base_ptr),
-                     defs->hit_group_sr_stride)),
-      ~0 /* write mask */);
-
-   /* leaf_ptr is optional */
-   nir_def *inst_leaf_ptr;
-   if (defs->inst_leaf_ptr) {
-      inst_leaf_ptr = defs->inst_leaf_ptr;
-   } else {
-      inst_leaf_ptr = nir_imm_int64(b, 0);
-   }
-
-   assert_def_size(defs->miss_sr_ptr, 1, 64);
-   assert_def_size(defs->shader_index_multiplier, 1, 32);
-   assert_def_size(inst_leaf_ptr, 1, 64);
-   assert_def_size(defs->ray_mask, 1, 32);
-   brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 48), 16,
-      nir_vec4(b, nir_unpack_64_2x32_split_x(b, defs->miss_sr_ptr),
-                  nir_pack_32_2x16_split(b,
-                     nir_unpack_64_4x16_split_z(b, defs->miss_sr_ptr),
-                     nir_unpack_32_2x16_split_x(b,
-                        nir_ishl(b, defs->shader_index_multiplier,
-                                    nir_imm_int(b, 8)))),
-                  nir_unpack_64_2x32_split_x(b, inst_leaf_ptr),
-                  nir_pack_32_2x16_split(b,
-                     nir_unpack_64_4x16_split_z(b, inst_leaf_ptr),
-                     nir_unpack_32_2x16_split_x(b, defs->ray_mask))),
-      ~0 /* write mask */);
-}
-
-static inline void
-brw_nir_rt_load_mem_ray_from_addr(nir_builder *b,
-                                  struct brw_nir_rt_mem_ray_defs *defs,
-                                  nir_def *ray_base_addr,
-                                  enum brw_rt_bvh_level bvh_level)
-{
-   nir_def *ray_addr = brw_nir_rt_mem_ray_addr(b,
-                                                   ray_base_addr,
-                                                   bvh_level);
-
-   nir_def *data[4] = {
-      brw_nir_rt_load(b, nir_iadd_imm(b, ray_addr,  0), 16, 4, 32),
-      brw_nir_rt_load(b, nir_iadd_imm(b, ray_addr, 16), 16, 4, 32),
-      brw_nir_rt_load(b, nir_iadd_imm(b, ray_addr, 32), 16, 4, 32),
-      brw_nir_rt_load(b, nir_iadd_imm(b, ray_addr, 48), 16, 4, 32),
-   };
-
-   defs->orig = nir_trim_vector(b, data[0], 3);
-   defs->dir = nir_vec3(b, nir_channel(b, data[0], 3),
-                           nir_channel(b, data[1], 0),
-                           nir_channel(b, data[1], 1));
-   defs->t_near = nir_channel(b, data[1], 2);
-   defs->t_far = nir_channel(b, data[1], 3);
-   defs->root_node_ptr =
-      nir_pack_64_2x32_split(b, nir_channel(b, data[2], 0),
-                                nir_extract_i16(b, nir_channel(b, data[2], 1),
-                                                   nir_imm_int(b, 0)));
-   defs->ray_flags =
-      nir_unpack_32_2x16_split_y(b, nir_channel(b, data[2], 1));
-   defs->hit_group_sr_base_ptr =
-      nir_pack_64_2x32_split(b, nir_channel(b, data[2], 2),
-                                nir_extract_i16(b, nir_channel(b, data[2], 3),
-                                                   nir_imm_int(b, 0)));
-   defs->hit_group_sr_stride =
-      nir_unpack_32_2x16_split_y(b, nir_channel(b, data[2], 3));
-   defs->miss_sr_ptr =
-      nir_pack_64_2x32_split(b, nir_channel(b, data[3], 0),
-                                nir_extract_i16(b, nir_channel(b, data[3], 1),
-                                                   nir_imm_int(b, 0)));
-   defs->shader_index_multiplier =
-      nir_ushr(b, nir_unpack_32_2x16_split_y(b, nir_channel(b, data[3], 1)),
-                  nir_imm_int(b, 8));
-   defs->inst_leaf_ptr =
-      nir_pack_64_2x32_split(b, nir_channel(b, data[3], 2),
-                                nir_extract_i16(b, nir_channel(b, data[3], 3),
-                                                   nir_imm_int(b, 0)));
-   defs->ray_mask =
-      nir_unpack_32_2x16_split_y(b, nir_channel(b, data[3], 3));
-}
-
-static inline void
-brw_nir_rt_load_mem_ray(nir_builder *b,
-                        struct brw_nir_rt_mem_ray_defs *defs,
-                        enum brw_rt_bvh_level bvh_level)
-{
-   brw_nir_rt_load_mem_ray_from_addr(b, defs, brw_nir_rt_stack_addr(b),
-                                     bvh_level);
-}
-
-struct brw_nir_rt_bvh_instance_leaf_defs {
-   nir_def *shader_index;
-   nir_def *contribution_to_hit_group_index;
-   nir_def *world_to_object[4];
-   nir_def *instance_id;
-   nir_def *instance_index;
-   nir_def *object_to_world[4];
-};
-
-static inline void
-brw_nir_rt_load_bvh_instance_leaf(nir_builder *b,
-                                  struct brw_nir_rt_bvh_instance_leaf_defs *defs,
-                                  nir_def *leaf_addr)
-{
-   nir_def *leaf_desc = brw_nir_rt_load(b, leaf_addr, 4, 2, 32);
-
-   defs->shader_index =
-      nir_iand_imm(b, nir_channel(b, leaf_desc, 0), (1 << 24) - 1);
-   defs->contribution_to_hit_group_index =
-      nir_iand_imm(b, nir_channel(b, leaf_desc, 1), (1 << 24) - 1);
-
-   defs->world_to_object[0] =
-      brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 16), 4, 3, 32);
-   defs->world_to_object[1] =
-      brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 28), 4, 3, 32);
-   defs->world_to_object[2] =
-      brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 40), 4, 3, 32);
-   /* The last column of the matrices is swapped between the two probably
-    * because it makes it easier/faster for hardware somehow.
-    */
-   defs->object_to_world[3] =
-      brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 52), 4, 3, 32);
-
-   nir_def *data =
-      brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 64), 4, 4, 32);
-   defs->instance_id = nir_channel(b, data, 2);
-   defs->instance_index = nir_channel(b, data, 3);
-
-   defs->object_to_world[0] =
-      brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 80), 4, 3, 32);
-   defs->object_to_world[1] =
-      brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 92), 4, 3, 32);
-   defs->object_to_world[2] =
-      brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 104), 4, 3, 32);
-   defs->world_to_object[3] =
-      brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 116), 4, 3, 32);
-}
-
-struct brw_nir_rt_bvh_primitive_leaf_defs {
-   nir_def *shader_index;
-   nir_def *geom_mask;
-   nir_def *geom_index;
-   nir_def *type;
-   nir_def *geom_flags;
-};
-
-static inline void
-brw_nir_rt_load_bvh_primitive_leaf(nir_builder *b,
-                                   struct brw_nir_rt_bvh_primitive_leaf_defs *defs,
-                                   nir_def *leaf_addr)
-{
-   nir_def *desc = brw_nir_rt_load(b, leaf_addr, 4, 2, 32);
-
-   defs->shader_index =
-      nir_ubitfield_extract(b, nir_channel(b, desc, 0),
-                            nir_imm_int(b, 23), nir_imm_int(b, 0));
-   defs->geom_mask =
-      nir_ubitfield_extract(b, nir_channel(b, desc, 0),
-                            nir_imm_int(b, 31), nir_imm_int(b, 24));
-
-   defs->geom_index =
-      nir_ubitfield_extract(b, nir_channel(b, desc, 1),
-                            nir_imm_int(b, 28), nir_imm_int(b, 0));
-   defs->type =
-      nir_ubitfield_extract(b, nir_channel(b, desc, 1),
-                            nir_imm_int(b, 29), nir_imm_int(b, 29));
-   defs->geom_flags =
-      nir_ubitfield_extract(b, nir_channel(b, desc, 1),
-                            nir_imm_int(b, 31), nir_imm_int(b, 30));
-}
-
-struct brw_nir_rt_bvh_primitive_leaf_positions_defs {
-   nir_def *positions[3];
-};
-
-static inline void
-brw_nir_rt_load_bvh_primitive_leaf_positions(nir_builder *b,
-                                             struct brw_nir_rt_bvh_primitive_leaf_positions_defs *defs,
-                                             nir_def *leaf_addr)
-{
-   for (unsigned i = 0; i < ARRAY_SIZE(defs->positions); i++) {
-      defs->positions[i] =
-         brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 16 + i * 4 * 3), 4, 3, 32);
-   }
-}
-
-static inline nir_def *
-brw_nir_rt_load_primitive_id_from_hit(nir_builder *b,
-                                      nir_def *is_procedural,
-                                      const struct brw_nir_rt_mem_hit_defs *defs)
-{
-   if (!is_procedural) {
-      is_procedural =
-         nir_ieq_imm(b, defs->leaf_type,
-                        BRW_RT_BVH_NODE_TYPE_PROCEDURAL);
-   }
-
-   nir_def *prim_id_proc, *prim_id_quad;
-   nir_push_if(b, is_procedural);
-   {
-      /* For procedural leafs, the index is in dw[3]. */
-      nir_def *offset =
-         nir_iadd_imm(b, nir_ishl_imm(b, defs->prim_leaf_index, 2), 12);
-      prim_id_proc = nir_load_global(b, nir_iadd(b, defs->prim_leaf_ptr,
-                                                 nir_u2u64(b, offset)),
-                                     4, /* align */ 1, 32);
-   }
-   nir_push_else(b, NULL);
-   {
-      /* For quad leafs, the index is dw[2] and there is a 16bit additional
-       * offset in dw[3].
-       */
-      prim_id_quad = nir_load_global(b, nir_iadd_imm(b, defs->prim_leaf_ptr, 8),
-                                     4, /* align */ 1, 32);
-      prim_id_quad = nir_iadd(b,
-                              prim_id_quad,
-                              defs->prim_index_delta);
-   }
-   nir_pop_if(b, NULL);
-
-   return nir_if_phi(b, prim_id_proc, prim_id_quad);
-}
-
-static inline nir_def *
-brw_nir_rt_acceleration_structure_to_root_node(nir_builder *b,
-                                               nir_def *as_addr)
-{
-   /* The HW memory structure in which we specify what acceleration structure
-    * to traverse, takes the address to the root node in the acceleration
-    * structure, not the acceleration structure itself. To find that, we have
-    * to read the root node offset from the acceleration structure which is
-    * the first QWord.
-    *
-    * But if the acceleration structure pointer is NULL, then we should return
-    * NULL as root node pointer.
-    *
-    * TODO: we could optimize this by assuming that for a given version of the
-    * BVH, we can find the root node at a given offset.
-    */
-   nir_def *root_node_ptr, *null_node_ptr;
-   nir_push_if(b, nir_ieq_imm(b, as_addr, 0));
-   {
-      null_node_ptr = nir_imm_int64(b, 0);
-   }
-   nir_push_else(b, NULL);
-   {
-      root_node_ptr =
-         nir_iadd(b, as_addr, brw_nir_rt_load(b, as_addr, 256, 1, 64));
-   }
-   nir_pop_if(b, NULL);
-
-   return nir_if_phi(b, null_node_ptr, root_node_ptr);
-}
-
-#endif /* BRW_NIR_RT_BUILDER_H */
--- a/src/intel/compiler/elk/brw_rt.h
+++ b/src/intel/compiler/elk/brw_rt.h
@ -1,292 +0,0 @@
-/*
- * Copyright © 2020 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_RT_H
-#define BRW_RT_H
-
-#include <stdint.h>
-
-#include "compiler/shader_enums.h"
-#include "util/macros.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** Vulkan defines shaderGroupHandleSize = 32 */
-#define BRW_RT_SBT_HANDLE_SIZE 32
-
-/** RT_DISPATCH_GLOBALS size (see gen_rt.xml) */
-#define BRW_RT_DISPATCH_GLOBALS_SIZE 80
-
-/** Offset after the RT dispatch globals at which "push" constants live */
-#define BRW_RT_PUSH_CONST_OFFSET 128
-
-/** Stride of the resume SBT */
-#define BRW_BTD_RESUME_SBT_STRIDE 8
-
-/* Vulkan always uses exactly two levels of BVH: world and object.  At the API
- * level, these are referred to as top and bottom.
- */
-enum brw_rt_bvh_level {
-   BRW_RT_BVH_LEVEL_WORLD = 0,
-   BRW_RT_BVH_LEVEL_OBJECT = 1,
-};
-#define BRW_RT_MAX_BVH_LEVELS 2
-
-enum brw_rt_bvh_node_type {
-   BRW_RT_BVH_NODE_TYPE_INTERNAL = 0,
-   BRW_RT_BVH_NODE_TYPE_INSTANCE = 1,
-   BRW_RT_BVH_NODE_TYPE_PROCEDURAL = 3,
-   BRW_RT_BVH_NODE_TYPE_QUAD = 4,
-};
-
-/** HitKind values returned for triangle geometry
- *
- * This enum must match the SPIR-V enum.
- */
-enum brw_rt_hit_kind {
-   BRW_RT_HIT_KIND_FRONT_FACE = 0xfe,
-   BRW_RT_HIT_KIND_BACK_FACE = 0xff,
-};
-
-/** Ray flags
- *
- * This enum must match the SPIR-V RayFlags enum.
- */
-enum brw_rt_ray_flags {
-   BRW_RT_RAY_FLAG_FORCE_OPAQUE                    = 0x01,
-   BRW_RT_RAY_FLAG_FORCE_NON_OPAQUE                = 0x02,
-   BRW_RT_RAY_FLAG_TERMINATE_ON_FIRST_HIT          = 0x04,
-   BRW_RT_RAY_FLAG_SKIP_CLOSEST_HIT_SHADER         = 0x08,
-   BRW_RT_RAY_FLAG_CULL_BACK_FACING_TRIANGLES      = 0x10,
-   BRW_RT_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES     = 0x20,
-   BRW_RT_RAY_FLAG_CULL_OPAQUE                     = 0x40,
-   BRW_RT_RAY_FLAG_CULL_NON_OPAQUE                 = 0x80,
-   BRW_RT_RAY_FLAG_SKIP_TRIANGLES                  = 0x100,
-   BRW_RT_RAY_FLAG_SKIP_AABBS                      = 0x200,
-};
-
-struct brw_rt_scratch_layout {
-   /** Number of stack IDs per DSS */
-   uint32_t stack_ids_per_dss;
-
-   /** Start offset (in bytes) of the hardware MemRay stack */
-   uint32_t ray_stack_start;
-
-   /** Stride (in bytes) of the hardware MemRay stack */
-   uint32_t ray_stack_stride;
-
-   /** Start offset (in bytes) of the SW stacks */
-   uint64_t sw_stack_start;
-
-   /** Size (in bytes) of the SW stack for a single shader invocation */
-   uint32_t sw_stack_size;
-
-   /** Total size (in bytes) of the RT scratch memory area */
-   uint64_t total_size;
-};
-
-/** Parameters passed to the raygen trampoline shader
- *
- * This struct is carefully construected to be 32B and must be passed to the
- * raygen trampoline shader as as inline constant data.
- */
-struct brw_rt_raygen_trampoline_params {
-   /** The GPU address of the RT_DISPATCH_GLOBALS */
-   uint64_t rt_disp_globals_addr;
-
-   /** The GPU address of the BINDLESS_SHADER_RECORD for the raygen shader */
-   uint64_t raygen_bsr_addr;
-
-   /** 1 if this is an indirect dispatch, 0 otherwise */
-   uint8_t is_indirect;
-
-   /** The integer log2 of the local group size
-    *
-    * Ray-tracing shaders don't have a concept of local vs. global workgroup
-    * size.  They only have a single 3D launch size.  The raygen trampoline
-    * shader is always dispatched with a local workgroup size equal to the
-    * SIMD width but the shape of the local workgroup is determined at
-    * dispatch time based on the shape of the launch and passed to the
-    * trampoline via this field.  (There's no sense having a Z dimension on
-    * the local workgroup if the launch is 2D.)
-    *
-    * We use the integer log2 of the size because there's no point in
-    * non-power-of-two sizes and  shifts are cheaper than division.
-    */
-   uint8_t local_group_size_log2[3];
-
-   uint32_t pad[3];
-};
-
-/** Size of the "hot zone" in bytes
- *
- * The hot zone is a SW-defined data structure which is a single uvec4
- * containing two bits of information:
- *
- *  - hotzone.x: Stack offset (in bytes)
- *
- *    This is the offset (in bytes) into the per-thread scratch space at which
- *    the current shader's stack starts.  This is incremented by the calling
- *    shader prior to any shader call type instructions and gets decremented
- *    by the resume shader as part of completing the return operation.
- *
- *
- *  - hotzone.yzw: The launch ID associated with the current thread
- *
- *    Inside a bindless shader, the only information we have is the DSS ID
- *    from the hardware EU and a per-DSS stack ID.  In particular, the three-
- *    dimensional launch ID is lost the moment we leave the raygen trampoline.
- */
-#define BRW_RT_SIZEOF_HOTZONE 16
-
-/* From the BSpec "Address Computation for Memory Based Data Structures:
- * Ray and TraversalStack (Async Ray Tracing)":
- *
- *    sizeof(Ray) = 64B, sizeof(HitInfo) = 32B, sizeof(TravStack) = 32B.
- */
-#define BRW_RT_SIZEOF_RAY 64
-#define BRW_RT_SIZEOF_HIT_INFO 32
-#define BRW_RT_SIZEOF_TRAV_STACK 32
-
-/* From the BSpec:
- *
- *    syncStackSize = (maxBVHLevels % 2 == 1) ?
- *       (sizeof(HitInfo) * 2 +
- *          (sizeof(Ray) + sizeof(TravStack)) * maxBVHLevels + 32B) :
- *       (sizeof(HitInfo) * 2 +
- *          (sizeof(Ray) + sizeof(TravStack)) * maxBVHLevels);
- *
- * The select is just to align to 64B.
- */
-#define BRW_RT_SIZEOF_RAY_QUERY \
-   (BRW_RT_SIZEOF_HIT_INFO * 2 + \
-    (BRW_RT_SIZEOF_RAY + BRW_RT_SIZEOF_TRAV_STACK) * BRW_RT_MAX_BVH_LEVELS + \
-    (BRW_RT_MAX_BVH_LEVELS % 2 ? 32 : 0))
-
-#define BRW_RT_SIZEOF_SHADOW_RAY_QUERY  \
-   (BRW_RT_SIZEOF_HIT_INFO * 2 + \
-    (BRW_RT_SIZEOF_RAY + BRW_RT_SIZEOF_TRAV_STACK) * BRW_RT_MAX_BVH_LEVELS)
-
-#define BRW_RT_SIZEOF_HW_STACK \
-   (BRW_RT_SIZEOF_HIT_INFO * 2 + \
-    BRW_RT_SIZEOF_RAY * BRW_RT_MAX_BVH_LEVELS + \
-    BRW_RT_SIZEOF_TRAV_STACK * BRW_RT_MAX_BVH_LEVELS)
-
-/* This is a mesa-defined region for hit attribute data */
-#define BRW_RT_SIZEOF_HIT_ATTRIB_DATA 64
-#define BRW_RT_OFFSETOF_HIT_ATTRIB_DATA BRW_RT_SIZEOF_HW_STACK
-
-#define BRW_RT_ASYNC_STACK_STRIDE \
-   ALIGN_POT(BRW_RT_OFFSETOF_HIT_ATTRIB_DATA + \
-             BRW_RT_SIZEOF_HIT_ATTRIB_DATA, 64)
-
-static inline void
-brw_rt_compute_scratch_layout(struct brw_rt_scratch_layout *layout,
-                              const struct intel_device_info *devinfo,
-                              uint32_t stack_ids_per_dss,
-                              uint32_t sw_stack_size)
-{
-   layout->stack_ids_per_dss = stack_ids_per_dss;
-
-   const uint32_t dss_count = intel_device_info_dual_subslice_id_bound(devinfo);
-   const uint32_t num_stack_ids = dss_count * stack_ids_per_dss;
-
-   uint64_t size = 0;
-
-   /* The first thing in our scratch area is an array of "hot zones" which
-    * store the stack offset as well as the launch IDs for each active
-    * invocation.
-    */
-   size += BRW_RT_SIZEOF_HOTZONE * num_stack_ids;
-
-   /* Next, we place the HW ray stacks */
-   assert(size % 64 == 0); /* Cache-line aligned */
-   assert(size < UINT32_MAX);
-   layout->ray_stack_start = size;
-   layout->ray_stack_stride = BRW_RT_ASYNC_STACK_STRIDE;
-   size += num_stack_ids * layout->ray_stack_stride;
-
-   /* Finally, we place the SW stacks for the individual ray-tracing shader
-    * invocations.  We align these to 64B to ensure that we don't have any
-    * shared cache lines which could hurt performance.
-    */
-   assert(size % 64 == 0);
-   layout->sw_stack_start = size;
-   layout->sw_stack_size = ALIGN(sw_stack_size, 64);
-
-   /* Currently it's always the case that sw_stack_size is a power of
-    * two, but power-of-two SW stack sizes are prone to causing
-    * collisions in the hashing function used by the L3 to map memory
-    * addresses to banks, which can cause stack accesses from most
-    * DSSes to bottleneck on a single L3 bank.  Fix it by padding the
-    * SW stack by a single cacheline if it was a power of two.
-    */
-   if (layout->sw_stack_size > 64 &&
-       util_is_power_of_two_nonzero(layout->sw_stack_size))
-      layout->sw_stack_size += 64;
-
-   size += num_stack_ids * layout->sw_stack_size;
-
-   layout->total_size = size;
-}
-
-static inline uint32_t
-brw_rt_ray_queries_hw_stacks_size(const struct intel_device_info *devinfo)
-{
-   /* Maximum slice/subslice/EU ID can be computed from the max_scratch_ids
-    * which includes all the threads.
-    */
-   uint32_t max_eu_id = devinfo->max_scratch_ids[MESA_SHADER_COMPUTE];
-   uint32_t max_simd_size = 16; /* Cannot run in SIMD32 with ray queries */
-   return max_eu_id * max_simd_size * BRW_RT_SIZEOF_RAY_QUERY;
-}
-
-static inline uint32_t
-brw_rt_ray_queries_shadow_stack_size(const struct intel_device_info *devinfo)
-{
-   /* Maximum slice/subslice/EU ID can be computed from the max_scratch_ids
-    * which includes all the threads.
-    */
-   uint32_t max_eu_id = devinfo->max_scratch_ids[MESA_SHADER_COMPUTE];
-   uint32_t max_simd_size = 16; /* Cannot run in SIMD32 with ray queries */
-   return max_eu_id * max_simd_size * BRW_RT_SIZEOF_SHADOW_RAY_QUERY;
-}
-
-static inline uint32_t
-brw_rt_ray_queries_shadow_stacks_size(const struct intel_device_info *devinfo,
-                                      uint32_t ray_queries)
-{
-   /* Don't bother a shadow stack if we only have a single query. We can
-    * directly write in the HW buffer.
-    */
-   return (ray_queries > 1 ? ray_queries : 0) * brw_rt_ray_queries_shadow_stack_size(devinfo) +
-          ray_queries * 4; /* Ctrl + Level data */
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* BRW_RT_H */
--- a/src/intel/compiler/elk/intel_clc.c
+++ b/src/intel/compiler/elk/intel_clc.c
@ -1,676 +0,0 @@
-/*
- * Copyright © 2021 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_compiler.h"
-#include "brw_kernel.h"
-#include "compiler/brw_disasm.h"
-#include "compiler/clc/clc.h"
-#include "compiler/glsl_types.h"
-#include "compiler/nir/nir_serialize.h"
-#include "dev/intel_debug.h"
-#include "util/build_id.h"
-#include "util/disk_cache.h"
-#include "util/macros.h"
-#include "util/mesa-sha1.h"
-#include "util/u_dynarray.h"
-
-#include <errno.h>
-#include <fcntl.h>
-#include <getopt.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/mman.h>
-
-/* Shader functions */
-#define SPIR_V_MAGIC_NUMBER 0x07230203
-
-static struct disk_cache *
-get_disk_cache(struct brw_compiler *compiler)
-{
-#ifdef ENABLE_SHADER_CACHE
-   char renderer[14];
-   ASSERTED int len = snprintf(renderer, sizeof(renderer), "brw_clc_%04x",
-                               compiler->devinfo->pci_device_id);
-   assert(len == sizeof(renderer) - 2);
-
-   const struct build_id_note *note =
-      build_id_find_nhdr_for_addr(get_disk_cache);
-   if (note == NULL) {
-      fprintf(stderr, "Failed to find build-id\n");
-      abort();
-   }
-
-   unsigned build_id_len = build_id_length(note);
-   if (build_id_len < 20) {
-      fprintf(stderr, "build-id too short.  It needs to be a SHA\n");
-      abort();
-   }
-
-   struct mesa_sha1 sha1_ctx;
-   uint8_t sha1[20];
-   _mesa_sha1_init(&sha1_ctx);
-   _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
-   _mesa_sha1_final(&sha1_ctx, sha1);
-
-   char timestamp[41];
-   _mesa_sha1_format(timestamp, sha1);
-
-   const uint64_t driver_flags = brw_get_compiler_config_value(compiler);
-
-   return disk_cache_create(renderer, timestamp, driver_flags);
-#endif
-   return NULL;
-}
-
-static void
-compiler_log(void *data, unsigned *id, const char *fmt, ...)
-{
-   va_list args;
-   va_start(args, fmt);
-   if (INTEL_DEBUG(DEBUG_CS))
-      vfprintf(stderr, fmt, args);
-   va_end(args);
-}
-
-static void
-msg_callback(void *priv, const char *msg)
-{
-   (void)priv;
-   fprintf(stderr, "%s", msg);
-}
-
-static void
-print_u32_data(FILE *fp, const char *prefix, const char *arr_name,
-               const uint32_t *data, size_t len)
-{
-   assert(len % 4 == 0);
-   fprintf(fp, "static const uint32_t %s_%s[] = {", prefix, arr_name);
-   for (unsigned i = 0; i < (len / 4); i++) {
-      if (i % 4 == 0)
-         fprintf(fp,"\n   ");
-
-      fprintf(fp, " 0x%08" PRIx32 ",", data[i]);
-   }
-   fprintf(fp, "\n};\n");
-}
-
-static void
-print_u8_data(FILE *fp, const char *prefix, const char *arr_name,
-               const uint8_t *data, size_t len)
-{
-   fprintf(fp, "static const uint8_t %s_%s[] = {", prefix, arr_name);
-   for (unsigned i = 0; i < len; i++) {
-      if (i % 16 == 0)
-         fprintf(fp,"\n   ");
-
-      fprintf(fp, " 0x%02" PRIx8 ",", data[i]);
-   }
-   fprintf(fp, "\n};\n");
-}
-
-static const char *
-reloc_type_str(enum brw_shader_reloc_type type)
-{
-   switch (type) {
-#define CASE(e) case e: return #e;
-   CASE(BRW_SHADER_RELOC_TYPE_U32)
-   CASE(BRW_SHADER_RELOC_TYPE_MOV_IMM)
-#undef CASE
-   default:
-      unreachable("Unknown relocation type");
-   }
-}
-
-static void
-print_cs_prog_data_fields(FILE *fp, const char *prefix, const char *pad,
-                          const struct brw_cs_prog_data *cs_prog_data)
-{
-#define PROG_DATA_FIELD(fmt, field) \
-   fprintf(fp, "%s." #field " = " fmt ",\n", pad, cs_prog_data->field)
-
-#define PROG_DATA_BOOL_FIELD(field) \
-   fprintf(fp, "%s." #field " = %s,\n", pad, \
-           cs_prog_data->field ? "true" : "false")
-
-   PROG_DATA_FIELD("%u", base.nr_params);
-   assert(cs_prog_data->base.stage == MESA_SHADER_COMPUTE);
-   fprintf(fp, "%s.base.stage = MESA_SHADER_COMPUTE,\n", pad);
-   assert(cs_prog_data->base.zero_push_reg == 0);
-   assert(cs_prog_data->base.push_reg_mask_param == 0);
-   PROG_DATA_FIELD("%u", base.curb_read_length);
-   PROG_DATA_FIELD("%u", base.total_scratch);
-   PROG_DATA_FIELD("%u", base.total_shared);
-   PROG_DATA_FIELD("%u", base.program_size);
-   PROG_DATA_FIELD("%u", base.const_data_size);
-   PROG_DATA_FIELD("%u", base.const_data_offset);
-   PROG_DATA_FIELD("%u", base.num_relocs);
-   fprintf(fp, "%s.base.relocs = %s_relocs,\n", pad, prefix);
-   assert(!cs_prog_data->base.has_ubo_pull);
-   assert(cs_prog_data->base.dispatch_grf_start_reg == 0);
-   assert(!cs_prog_data->base.use_alt_mode);
-   assert(cs_prog_data->base.param == 0);
-   PROG_DATA_BOOL_FIELD(base.uses_atomic_load_store);
-   fprintf(fp, "%s.local_size = { %u, %u, %u },\n", pad,
-           cs_prog_data->local_size[0],
-           cs_prog_data->local_size[1],
-           cs_prog_data->local_size[2]);
-   fprintf(fp, "%s.prog_offset = { %u, %u, %u },\n", pad,
-           cs_prog_data->prog_offset[0],
-           cs_prog_data->prog_offset[1],
-           cs_prog_data->prog_offset[2]);
-   PROG_DATA_FIELD("%u", prog_mask);
-   PROG_DATA_FIELD("%u", prog_spilled);
-   PROG_DATA_BOOL_FIELD(uses_barrier);
-   PROG_DATA_BOOL_FIELD(uses_num_work_groups);
-   assert(!cs_prog_data->uses_inline_data);
-   assert(!cs_prog_data->uses_btd_stack_ids);
-   PROG_DATA_FIELD("%u", push.per_thread.dwords);
-   PROG_DATA_FIELD("%u", push.per_thread.regs);
-   PROG_DATA_FIELD("%u", push.per_thread.size);
-   PROG_DATA_FIELD("%u", push.cross_thread.dwords);
-   PROG_DATA_FIELD("%u", push.cross_thread.regs);
-   PROG_DATA_FIELD("%u", push.cross_thread.size);
-
-#undef PROG_DATA_FIELD
-#undef PROG_DATA_BOOL_FIELD
-}
-
-static void
-print_kernel(FILE *fp, const char *prefix,
-             const struct brw_kernel *kernel,
-             const struct brw_isa_info *isa)
-{
-   struct mesa_sha1 sha1_ctx;
-   _mesa_sha1_init(&sha1_ctx);
-
-#define SHA1_UPDATE_VALUE(val) \
-   _mesa_sha1_update(&sha1_ctx, &val, sizeof(val))
-
-   fprintf(fp, "#include \"intel/compiler/brw_kernel.h\"\n");
-   fprintf(fp, "\n");
-
-   fprintf(fp, "static const struct brw_shader_reloc %s_relocs[] = {\n",
-           prefix);
-   for (unsigned i = 0; i < kernel->prog_data.base.num_relocs; i++) {
-      const struct brw_shader_reloc *reloc = &kernel->prog_data.base.relocs[i];
-      fprintf(fp, "   { %"PRIu32", %s, %"PRIu32", %"PRIu32" },\n",
-              reloc->id, reloc_type_str(reloc->type),
-              reloc->offset, reloc->delta);
-   }
-   fprintf(fp, "};\n");
-   _mesa_sha1_update(&sha1_ctx, kernel->prog_data.base.relocs,
-                     kernel->prog_data.base.num_relocs *
-                     sizeof(kernel->prog_data.base.relocs[0]));
-
-   /* Get rid of the pointers before we hash */
-   struct brw_cs_prog_data cs_prog_data = kernel->prog_data;
-   cs_prog_data.base.relocs = NULL;
-   assert(cs_prog_data.base.param == NULL);
-   _mesa_sha1_update(&sha1_ctx, &cs_prog_data, sizeof(cs_prog_data));
-
-   SHA1_UPDATE_VALUE(kernel->args_size);
-   SHA1_UPDATE_VALUE(kernel->arg_count);
-   _mesa_sha1_update(&sha1_ctx, kernel->args,
-                     kernel->arg_count * sizeof(kernel->args[0]));
-
-   fprintf(fp, "static const struct brw_kernel_arg_desc %s_args[] = {\n",
-           prefix);
-   for (unsigned i = 0; i < kernel->arg_count; i++) {
-      fprintf(fp, "   { %d, %d },\n",
-              kernel->args[i].offset, kernel->args[i].size);
-   }
-   fprintf(fp, "};\n\n");
-
-   _mesa_sha1_update(&sha1_ctx, kernel->code,
-                     kernel->prog_data.base.program_size);
-
-   fprintf(fp, "#if 0  /* BEGIN KERNEL ASSEMBLY */\n");
-   fprintf(fp, "\n");
-   brw_disassemble_with_errors(isa, kernel->code, 0, fp);
-   fprintf(fp, "\n");
-   fprintf(fp, "#endif /* END KERNEL ASSEMBLY */\n");
-   print_u32_data(fp, prefix, "code", kernel->code,
-                  kernel->prog_data.base.program_size);
-
-   fprintf(fp, "static const struct brw_kernel %s = {\n", prefix);
-   fprintf(fp, "   .prog_data = {\n");
-   print_cs_prog_data_fields(fp, prefix, "      ", &kernel->prog_data);
-   fprintf(fp, "   },\n");
-   fprintf(fp, "   .args_size = %d,\n", (int)kernel->args_size);
-   fprintf(fp, "   .arg_count = %d,\n", (int)kernel->arg_count);
-   fprintf(fp, "   .args = %s_args,\n", prefix);
-   fprintf(fp, "   .code = %s_code,\n", prefix);
-   fprintf(fp, "};\n");
-
-   unsigned char sha1[20];
-   _mesa_sha1_final(&sha1_ctx, sha1);
-   char sha1_str[41];
-   _mesa_sha1_format(sha1_str, sha1);
-   fprintf(fp, "const char *%s_sha1 = \"%s\";\n", prefix, sha1_str);
-}
-
-static void
-print_usage(char *exec_name, FILE *f)
-{
-   fprintf(f,
-"Usage: %s [options] -- [clang args]\n"
-"Options:\n"
-"  -h  --help              Print this help.\n"
-"  -e, --entrypoint <name> Specify the entry-point name.\n"
-"  -L, --llvm17-wa         Enable LLVM 17 workarounds for opaque pointers"
-"  -p, --platform <name>   Specify the target platform name.\n"
-"      --prefix <prefix>   Prefix for variable names in generated C code.\n"
-"  -o, --out <filename>    Specify the output filename.\n"
-"  -i, --in <filename>     Specify one input filename. Accepted multiple times.\n"
-"  -s, --spv <filename>    Specify the output filename for spirv.\n"
-"  -n, --nir               Specify whether to output serialized NIR instead of ISA.\n"
-"  -t, --text <filename>   Specify the output filename for the parsed text\n"
-"  -v, --verbose           Print more information during compilation.\n"
-"  -M, --llvm-version      Print LLVM version.\n"
-   , exec_name);
-}
-
-#define OPT_PREFIX 1000
-
-struct intel_clc_params {
-   char *entry_point;
-   char *platform;
-   char *outfile;
-   char *spv_outfile;
-   char *txt_outfile;
-   char *prefix;
-
-   bool output_nir;
-   bool print_info;
-   bool llvm17_wa;
-
-   void *mem_ctx;
-
-   struct intel_device_info devinfo;
-};
-
-#include "compiler/spirv/nir_spirv.h"
-
-static int
-output_nir(const struct intel_clc_params *params, struct clc_binary *binary)
-{
-   struct spirv_to_nir_options spirv_options = {
-      .environment = NIR_SPIRV_OPENCL,
-      .caps = {
-         .address = true,
-         .groups = true,
-         .image_write_without_format = true,
-         .int8 = true,
-         .int16 = true,
-         .int64 = true,
-         .int64_atomics = true,
-         .kernel = true,
-         .linkage = true, /* We receive linked kernel from clc */
-         .float_controls = true,
-         .generic_pointers = true,
-         .storage_8bit = true,
-         .storage_16bit = true,
-         .subgroup_arithmetic = true,
-         .subgroup_basic = true,
-         .subgroup_ballot = true,
-         .subgroup_dispatch = true,
-         .subgroup_quad = true,
-         .subgroup_shuffle = true,
-         .subgroup_vote = true,
-
-         .intel_subgroup_shuffle = true,
-         .intel_subgroup_buffer_block_io = true,
-      },
-      .shared_addr_format = nir_address_format_62bit_generic,
-      .global_addr_format = nir_address_format_62bit_generic,
-      .temp_addr_format = nir_address_format_62bit_generic,
-      .constant_addr_format = nir_address_format_64bit_global,
-      .create_library = true,
-   };
-
-   FILE *fp = params->outfile != NULL ?
-      fopen(params->outfile, "w") : stdout;
-   if (!fp) {
-      fprintf(stderr, "Failed to open %s\n", params->outfile);
-      return -1;
-   }
-
-   spirv_library_to_nir_builder(fp, binary->data, binary->size / 4,
-                                &spirv_options);
-
-   nir_shader *nir = brw_nir_from_spirv(params->mem_ctx,
-                                        binary->data, binary->size,
-                                        params->llvm17_wa);
-   if (!nir) {
-      fprintf(stderr, "Failed to generate NIR out of SPIRV\n");
-      return -1;
-   }
-
-   struct blob blob;
-   blob_init(&blob);
-   nir_serialize(&blob, nir, false /* strip */);
-   print_u8_data(fp, params->prefix, "nir", blob.data, blob.size);
-   blob_finish(&blob);
-
-   if (params->outfile)
-      fclose(fp);
-
-   return 0;
-}
-
-static int
-output_isa(const struct intel_clc_params *params, struct clc_binary *binary)
-{
-   struct brw_kernel kernel = {};
-   char *error_str;
-
-   struct brw_isa_info _isa, *isa = &_isa;
-   brw_init_isa_info(isa, &params->devinfo);
-
-   struct brw_compiler *compiler = brw_compiler_create(params->mem_ctx,
-                                                       &params->devinfo);
-   compiler->shader_debug_log = compiler_log;
-   compiler->shader_perf_log = compiler_log;
-   struct disk_cache *disk_cache = get_disk_cache(compiler);
-
-   if (!brw_kernel_from_spirv(compiler, disk_cache, &kernel, NULL, params->mem_ctx,
-                              binary->data, binary->size,
-                              params->entry_point, &error_str)) {
-      fprintf(stderr, "Compile failed: %s\n", error_str);
-      return -1;
-   }
-
-   if (params->print_info) {
-      fprintf(stdout, "kernel info:\n");
-      fprintf(stdout, "   uses_barrier           : %u\n", kernel.prog_data.uses_barrier);
-      fprintf(stdout, "   uses_num_work_groups   : %u\n", kernel.prog_data.uses_num_work_groups);
-      fprintf(stdout, "   uses_inline_data       : %u\n", kernel.prog_data.uses_inline_data);
-      fprintf(stdout, "   local_size             : %ux%ux%u\n",
-              kernel.prog_data.local_size[0],
-              kernel.prog_data.local_size[1],
-              kernel.prog_data.local_size[2]);
-      fprintf(stdout, "   curb_read_length       : %u\n", kernel.prog_data.base.curb_read_length);
-      fprintf(stdout, "   total_scratch          : %u\n", kernel.prog_data.base.total_scratch);
-      fprintf(stdout, "   total_shared           : %u\n", kernel.prog_data.base.total_shared);
-      fprintf(stdout, "   program_size           : %u\n", kernel.prog_data.base.program_size);
-      fprintf(stdout, "   const_data_size        : %u\n", kernel.prog_data.base.const_data_size);
-      fprintf(stdout, "   uses_atomic_load_store : %u\n", kernel.prog_data.base.uses_atomic_load_store);
-      fprintf(stdout, "   dispatch_grf_start_reg : %u\n", kernel.prog_data.base.dispatch_grf_start_reg);
-   }
-
-   char *prefix = params->prefix;
-   char prefix_tmp[256];
-   if (prefix == NULL) {
-      bool is_pt_5 = (params->devinfo.verx10 % 10) == 5;
-      snprintf(prefix_tmp, sizeof(prefix_tmp), "gfx%d%s_clc_%s",
-               params->devinfo.ver, is_pt_5 ? "5" : "", params->entry_point);
-      prefix = prefix_tmp;
-   }
-
-   if (params->outfile != NULL) {
-      FILE *fp = fopen(params->outfile, "w");
-      print_kernel(fp, prefix, &kernel, isa);
-      fclose(fp);
-   } else {
-      print_kernel(stdout, prefix, &kernel, isa);
-   }
-
-   return 0;
-}
-
-static void
-print_llvm_version(FILE *out)
-{
-   fprintf(out, "%s\n", MESA_LLVM_VERSION_STRING);
-}
-
-int main(int argc, char **argv)
-{
-   int exit_code = 0;
-
-   process_intel_debug_variable();
-
-   static struct option long_options[] ={
-      {"help",         no_argument,         0, 'h'},
-      {"entrypoint",   required_argument,   0, 'e'},
-      {"platform",     required_argument,   0, 'p'},
-      {"prefix",       required_argument,   0, OPT_PREFIX},
-      {"in",           required_argument,   0, 'i'},
-      {"out",          required_argument,   0, 'o'},
-      {"spv",          required_argument,   0, 's'},
-      {"text",         required_argument,   0, 't'},
-      {"nir",          no_argument,         0, 'n'},
-      {"llvm17-wa",    no_argument,         0, 'L'},
-      {"llvm-version", no_argument,         0, 'M'},
-      {"verbose",      no_argument,         0, 'v'},
-      {0, 0, 0, 0}
-   };
-
-   struct intel_clc_params params = {};
-
-   struct util_dynarray clang_args;
-   struct util_dynarray input_files;
-
-   struct clc_binary spirv_obj = {0};
-   struct clc_parsed_spirv parsed_spirv_data = {0};
-   struct disk_cache *disk_cache = NULL;
-
-   params.mem_ctx = ralloc_context(NULL);
-
-   util_dynarray_init(&clang_args, params.mem_ctx);
-   util_dynarray_init(&input_files, params.mem_ctx);
-
-   int ch;
-   while ((ch = getopt_long(argc, argv, "he:p:s:t:i:no:MLv", long_options, NULL)) != -1)
-   {
-      switch (ch)
-      {
-      case 'h':
-         print_usage(argv[0], stdout);
-         goto end;
-      case 'e':
-         params.entry_point = optarg;
-         break;
-      case 'p':
-         params.platform = optarg;
-         break;
-      case 'o':
-         params.outfile = optarg;
-         break;
-      case 'i':
-         util_dynarray_append(&input_files, char *, optarg);
-	 break;
-      case 'n':
-         params.output_nir = true;
-         break;
-      case 's':
-         params.spv_outfile = optarg;
-         break;
-      case 't':
-         params.txt_outfile = optarg;
-         break;
-      case 'v':
-         params.print_info = true;
-         break;
-      case 'L':
-         params.llvm17_wa = true;
-         break;
-      case 'M':
-         print_llvm_version(stdout);
-         return EXIT_SUCCESS;
-      case OPT_PREFIX:
-         params.prefix = optarg;
-         break;
-      default:
-         fprintf(stderr, "Unrecognized option \"%s\".\n", optarg);
-         print_usage(argv[0], stderr);
-         goto fail;
-      }
-   }
-
-   for (int i = optind; i < argc; i++) {
-      util_dynarray_append(&clang_args, char *, argv[i]);
-   }
-
-   if (util_dynarray_num_elements(&input_files, char *) == 0) {
-      fprintf(stderr, "No input file(s).\n");
-      print_usage(argv[0], stderr);
-      goto fail;
-   }
-
-   struct clc_logger logger = {
-      .error = msg_callback,
-      .warning = msg_callback,
-   };
-
-   size_t total_size = 0;
-   char *all_inputs = NULL;
-   util_dynarray_foreach(&input_files, char *, infile) {
-      int fd = open(*infile, O_RDONLY);
-      if (fd < 0) {
-         fprintf(stderr, "Failed to open %s\n", *infile);
-         goto fail;
-      }
-
-      off_t len = lseek(fd, 0, SEEK_END);
-      size_t new_size = total_size + len;
-      all_inputs = reralloc_size(params.mem_ctx, all_inputs, new_size + 1);
-      if (!all_inputs) {
-         fprintf(stderr, "Failed to allocate memory\n");
-         goto fail;
-      }
-      lseek(fd, 0, SEEK_SET);
-      read(fd, all_inputs + total_size, len);
-      close(fd);
-      total_size = new_size;
-      all_inputs[total_size] = '\0';
-   }
-
-   if (params.txt_outfile) {
-      FILE *fp = fopen(params.txt_outfile, "w");
-      fwrite(all_inputs, total_size, 1, fp);
-      fclose(fp);
-   }
-
-   const char *allowed_spirv_extensions[] = {
-      "SPV_EXT_shader_atomic_float_add",
-      "SPV_EXT_shader_atomic_float_min_max",
-      "SPV_KHR_float_controls",
-      "SPV_INTEL_subgroups",
-      NULL,
-   };
-
-   struct clc_compile_args clc_args = {
-      .source = {
-         .name = "intel_clc_files",
-         .value = all_inputs,
-      },
-      .features = {
-         .fp16 = true,
-         .intel_subgroups = true,
-         .subgroups = true,
-         .subgroups_ifp = true,
-      },
-      .args = util_dynarray_begin(&clang_args),
-      .num_args = util_dynarray_num_elements(&clang_args, char *),
-      .allowed_spirv_extensions = allowed_spirv_extensions,
-   };
-
-   if (!clc_compile_c_to_spirv(&clc_args, &logger, &spirv_obj)) {
-      goto fail;
-   }
-
-   if (params.spv_outfile) {
-      FILE *fp = fopen(params.spv_outfile, "w");
-      fwrite(spirv_obj.data, spirv_obj.size, 1, fp);
-      fclose(fp);
-   }
-
-   glsl_type_singleton_init_or_ref();
-
-   if (params.output_nir) {
-      exit_code = output_nir(&params, &spirv_obj);
-   } else {
-      if (params.platform == NULL) {
-         fprintf(stderr, "No target platform name specified.\n");
-         print_usage(argv[0], stderr);
-         goto fail;
-      }
-
-      int pci_id = intel_device_name_to_pci_device_id(params.platform);
-      if (pci_id < 0) {
-         fprintf(stderr, "Invalid target platform name: %s\n", params.platform);
-         goto fail;
-      }
-
-      if (!intel_get_device_info_from_pci_id(pci_id, &params.devinfo)) {
-         fprintf(stderr, "Failed to get device information.\n");
-         goto fail;
-      }
-
-      if (params.devinfo.verx10 < 125) {
-         fprintf(stderr, "Platform currently not supported.\n");
-         goto fail;
-      }
-
-      if (params.entry_point == NULL) {
-         fprintf(stderr, "No entry-point name specified.\n");
-         print_usage(argv[0], stderr);
-         goto fail;
-      }
-
-      struct clc_parsed_spirv parsed_spirv_data;
-      if (!clc_parse_spirv(&spirv_obj, &logger, &parsed_spirv_data))
-         goto fail;
-
-      const struct clc_kernel_info *kernel_info = NULL;
-      for (unsigned i = 0; i < parsed_spirv_data.num_kernels; i++) {
-         if (strcmp(parsed_spirv_data.kernels[i].name, params.entry_point) == 0) {
-            kernel_info = &parsed_spirv_data.kernels[i];
-            break;
-         }
-      }
-      if (kernel_info == NULL) {
-         fprintf(stderr, "Kernel entrypoint %s not found\n", params.entry_point);
-         goto fail;
-      }
-
-      exit_code = output_isa(&params, &spirv_obj);
-   }
-
-   glsl_type_singleton_decref();
-
-   goto end;
-
-fail:
-   exit_code = 1;
-
-end:
-   disk_cache_destroy(disk_cache);
-   clc_free_parsed_spirv(&parsed_spirv_data);
-   clc_free_spirv(&spirv_obj);
-   ralloc_free(params.mem_ctx);
-
-   return exit_code;
-}
--- a/src/intel/compiler/elk/meson.build
+++ b/src/intel/compiler/elk/meson.build
@ -65,7 +65,6 @@ libintel_compiler_elk_files = files(
  'brw_fs_reg_allocate.cpp',
  'brw_fs_register_coalesce.cpp',
  'brw_fs_saturate_propagation.cpp',
-  'brw_fs_scoreboard.cpp',
  'brw_fs_sel_peephole.cpp',
  'brw_fs_thread_payload.cpp',
  'brw_fs_validate.cpp',
@ -81,23 +80,14 @@ libintel_compiler_elk_files = files(
  'brw_ir_vec4.h',
  'brw_isa_info.h',
  'brw_lower_logical_sends.cpp',
-  'brw_mesh.cpp',
  'brw_nir.h',
  'brw_nir.c',
  'brw_nir_analyze_boolean_resolves.c',
  'brw_nir_analyze_ubo_ranges.c',
  'brw_nir_attribute_workarounds.c',
-  'brw_nir_lower_cooperative_matrix.c',
  'brw_nir_lower_cs_intrinsics.c',
  'brw_nir_lower_alpha_to_coverage.c',
-  'brw_nir_lower_intersection_shader.c',
-  'brw_nir_lower_ray_queries.c',
-  'brw_nir_lower_rt_intrinsics.c',
-  'brw_nir_lower_shader_calls.c',
  'brw_nir_lower_storage_image.c',
-  'brw_nir_rt.h',
-  'brw_nir_rt.c',
-  'brw_nir_rt_builder.h',
  'brw_packed_float.c',
  'brw_predicated_break.cpp',
  'brw_prim.h',
@ -105,7 +95,6 @@ libintel_compiler_elk_files = files(
  'brw_reg.h',
  'brw_reg_type.c',
  'brw_reg_type.h',
-  'brw_rt.h',
  'brw_schedule_instructions.cpp',
  'brw_shader.cpp',
  'brw_shader.h',
@ -173,7 +162,6 @@ if with_tests
        'test_fs_combine_constants.cpp',
        'test_fs_copy_propagation.cpp',
        'test_fs_saturate_propagation.cpp',
-        'test_fs_scoreboard.cpp',
        'test_simd_selection.cpp',
        'test_vec4_cmod_propagation.cpp',
        'test_vec4_copy_propagation.cpp',
@ -228,10 +216,6 @@ asm_testcases = [
  ['ivb', 'gfx7'],
  ['hsw', 'gfx7.5'],
  ['bdw', 'gfx8'],
-  ['skl', 'gfx9'],
-  ['icl', 'gfx11'],
-  ['tgl', 'gfx12'],
-  ['dg2', 'gfx12.5'],
 ]

 test_runner = find_program('tests/run-test.py')
--- a/src/intel/compiler/elk/test_fs_scoreboard.cpp
+++ b/src/intel/compiler/elk/test_fs_scoreboard.cpp
@ -1,893 +0,0 @@
-/*
- * Copyright © 2019 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <gtest/gtest.h>
-#include "brw_fs.h"
-#include "brw_fs_builder.h"
-#include "brw_cfg.h"
-
-using namespace brw;
-
-class scoreboard_test : public ::testing::Test {
-protected:
-   scoreboard_test();
-   ~scoreboard_test() override;
-
-   struct brw_compiler *compiler;
-   struct brw_compile_params params;
-   struct intel_device_info *devinfo;
-   void *ctx;
-   struct brw_wm_prog_data *prog_data;
-   struct gl_shader_program *shader_prog;
-   fs_visitor *v;
-   fs_builder bld;
-};
-
-scoreboard_test::scoreboard_test()
-   : bld(NULL, 0)
-{
-   ctx = ralloc_context(NULL);
-   compiler = rzalloc(ctx, struct brw_compiler);
-   devinfo = rzalloc(ctx, struct intel_device_info);
-   devinfo->ver = 12;
-   devinfo->verx10 = devinfo->ver * 10;
-
-   compiler->devinfo = devinfo;
-   brw_init_isa_info(&compiler->isa, devinfo);
-
-   params = {};
-   params.mem_ctx = ctx;
-
-   prog_data = ralloc(ctx, struct brw_wm_prog_data);
-   nir_shader *shader =
-      nir_shader_create(ctx, MESA_SHADER_FRAGMENT, NULL, NULL);
-
-   v = new fs_visitor(compiler, &params, NULL, &prog_data->base, shader, 8,
-                      false, false);
-
-   bld = fs_builder(v).at_end();
-}
-
-scoreboard_test::~scoreboard_test()
-{
-   delete v;
-   v = NULL;
-
-   ralloc_free(ctx);
-   ctx = NULL;
-}
-
-static fs_inst *
-instruction(bblock_t *block, int num)
-{
-   fs_inst *inst = (fs_inst *)block->start();
-   for (int i = 0; i < num; i++) {
-      inst = (fs_inst *)inst->next;
-   }
-   return inst;
-}
-
-static void
-lower_scoreboard(fs_visitor *v)
-{
-   const bool print = getenv("TEST_DEBUG");
-
-   if (print) {
-      fprintf(stderr, "= Before =\n");
-      v->cfg->dump();
-   }
-
-   v->lower_scoreboard();
-
-   if (print) {
-      fprintf(stderr, "\n= After =\n");
-      v->cfg->dump();
-   }
-}
-
-fs_inst *
-emit_SEND(const fs_builder &bld, const fs_reg &dst,
-          const fs_reg &desc, const fs_reg &payload)
-{
-   fs_inst *inst = bld.emit(SHADER_OPCODE_SEND, dst, desc, desc, payload);
-   inst->mlen = 1;
-   return inst;
-}
-
-static tgl_swsb
-tgl_swsb_testcase(unsigned regdist, unsigned sbid, enum tgl_sbid_mode mode)
-{
-   tgl_swsb swsb = tgl_swsb_sbid(mode, sbid);
-   swsb.regdist = regdist;
-   return swsb;
-}
-
-bool operator ==(const tgl_swsb &a, const tgl_swsb &b)
-{
-   return a.mode == b.mode &&
-          a.regdist == b.regdist &&
-          (a.mode == TGL_SBID_NULL || a.sbid == b.sbid);
-}
-
-std::ostream &operator<<(std::ostream &os, const tgl_swsb &swsb) {
-   if (swsb.regdist)
-      os << "@" << swsb.regdist;
-
-   if (swsb.mode) {
-      if (swsb.regdist)
-         os << " ";
-      os << "$" << swsb.sbid;
-      if (swsb.mode & TGL_SBID_DST)
-         os << ".dst";
-      if (swsb.mode & TGL_SBID_SRC)
-         os << ".src";
-   }
-
-   return os;
-}
-
-TEST_F(scoreboard_test, RAW_inorder_inorder)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   fs_reg y = v->vgrf(glsl_int_type());
-   bld.ADD(   x, g[1], g[2]);
-   bld.MUL(   y, g[3], g[4]);
-   bld.AND(g[5],    x,    y);
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   lower_scoreboard(v);
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
-   EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
-   EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_regdist(1));
-}
-
-TEST_F(scoreboard_test, RAW_inorder_outoforder)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   bld.ADD(          x, g[1], g[2]);
-   bld.MUL(       g[3], g[4], g[5]);
-   emit_SEND(bld, g[6], g[7],    x);
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   lower_scoreboard(v);
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
-   EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
-   EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(2, 0, TGL_SBID_SET));
-}
-
-TEST_F(scoreboard_test, RAW_outoforder_inorder)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   fs_reg y = v->vgrf(glsl_int_type());
-   emit_SEND(bld,    x, g[1], g[2]);
-   bld.MUL(          y, g[3], g[4]);
-   bld.AND(       g[5],    x,    y);
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   lower_scoreboard(v);
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
-   EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
-   EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(1, 0, TGL_SBID_DST));
-}
-
-TEST_F(scoreboard_test, RAW_outoforder_outoforder)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   /* The second SEND depends on the first, and would need to refer to two
-    * SBIDs.  Since it is not possible we expect a SYNC instruction to be
-    * added.
-    */
-   fs_reg x = v->vgrf(glsl_int_type());
-   emit_SEND(bld,    x, g[1], g[2]);
-   emit_SEND(bld, g[3],    x, g[4])->sfid++;
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(1, block0->end_ip);
-
-   lower_scoreboard(v);
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
-
-   fs_inst *sync = instruction(block0, 1);
-   EXPECT_EQ(sync->opcode, BRW_OPCODE_SYNC);
-   EXPECT_EQ(sync->sched, tgl_swsb_sbid(TGL_SBID_DST, 0));
-
-   EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_sbid(TGL_SBID_SET, 1));
-}
-
-TEST_F(scoreboard_test, WAR_inorder_inorder)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   bld.ADD(g[1],    x, g[2]);
-   bld.MUL(g[3], g[4], g[5]);
-   bld.AND(   x, g[6], g[7]);
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   lower_scoreboard(v);
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
-   EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
-   EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_null());
-}
-
-TEST_F(scoreboard_test, WAR_inorder_outoforder)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   bld.ADD(       g[1],    x, g[2]);
-   bld.MUL(       g[3], g[4], g[5]);
-   emit_SEND(bld,    x, g[6], g[7]);
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   lower_scoreboard(v);
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
-   EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
-   EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(2, 0, TGL_SBID_SET));
-}
-
-TEST_F(scoreboard_test, WAR_outoforder_inorder)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   emit_SEND(bld, g[1], g[2],    x);
-   bld.MUL(       g[4], g[5], g[6]);
-   bld.AND(          x, g[7], g[8]);
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   lower_scoreboard(v);
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
-   EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
-   EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_sbid(TGL_SBID_SRC, 0));
-}
-
-TEST_F(scoreboard_test, WAR_outoforder_outoforder)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   emit_SEND(bld, g[1], g[2],    x);
-   emit_SEND(bld,    x, g[3], g[4])->sfid++;
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(1, block0->end_ip);
-
-   lower_scoreboard(v);
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
-
-   fs_inst *sync = instruction(block0, 1);
-   EXPECT_EQ(sync->opcode, BRW_OPCODE_SYNC);
-   EXPECT_EQ(sync->sched, tgl_swsb_sbid(TGL_SBID_SRC, 0));
-
-   EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_sbid(TGL_SBID_SET, 1));
-}
-
-TEST_F(scoreboard_test, WAW_inorder_inorder)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   bld.ADD(   x, g[1], g[2]);
-   bld.MUL(g[3], g[4], g[5]);
-   bld.AND(   x, g[6], g[7]);
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   lower_scoreboard(v);
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
-   EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
-
-   /* NOTE: We only need this RegDist if a long instruction is followed by a
-    * short one.  The pass is currently conservative about this and adding the
-    * annotation.
-    */
-   EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_regdist(2));
-}
-
-TEST_F(scoreboard_test, WAW_inorder_outoforder)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   bld.ADD(          x, g[1], g[2]);
-   bld.MUL(       g[3], g[4], g[5]);
-   emit_SEND(bld,    x, g[6], g[7]);
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   lower_scoreboard(v);
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
-   EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
-   EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(2, 0, TGL_SBID_SET));
-}
-
-TEST_F(scoreboard_test, WAW_outoforder_inorder)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   emit_SEND(bld,    x, g[1], g[2]);
-   bld.MUL(       g[3], g[4], g[5]);
-   bld.AND(          x, g[6], g[7]);
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   lower_scoreboard(v);
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
-   EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
-   EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_sbid(TGL_SBID_DST, 0));
-}
-
-TEST_F(scoreboard_test, WAW_outoforder_outoforder)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   emit_SEND(bld, x, g[1], g[2]);
-   emit_SEND(bld, x, g[3], g[4])->sfid++;
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(1, block0->end_ip);
-
-   lower_scoreboard(v);
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
-
-   fs_inst *sync = instruction(block0, 1);
-   EXPECT_EQ(sync->opcode, BRW_OPCODE_SYNC);
-   EXPECT_EQ(sync->sched, tgl_swsb_sbid(TGL_SBID_DST, 0));
-
-   EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_sbid(TGL_SBID_SET, 1));
-}
-
-
-TEST_F(scoreboard_test, loop1)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   bld.XOR(   x, g[1], g[2]);
-
-   bld.emit(BRW_OPCODE_DO);
-
-   bld.ADD(   x, g[1], g[2]);
-   bld.emit(BRW_OPCODE_WHILE)->predicate = BRW_PREDICATE_NORMAL;
-
-   bld.MUL(   x, g[1], g[2]);
-
-   v->calculate_cfg();
-   lower_scoreboard(v);
-
-   bblock_t *body = v->cfg->blocks[2];
-   fs_inst *add = instruction(body, 0);
-   EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
-   EXPECT_EQ(add->sched, tgl_swsb_regdist(1));
-
-   bblock_t *last_block = v->cfg->blocks[3];
-   fs_inst *mul = instruction(last_block, 0);
-   EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
-   EXPECT_EQ(mul->sched, tgl_swsb_regdist(1));
-}
-
-TEST_F(scoreboard_test, loop2)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   bld.XOR(   x, g[1], g[2]);
-   bld.XOR(g[3], g[1], g[2]);
-   bld.XOR(g[4], g[1], g[2]);
-   bld.XOR(g[5], g[1], g[2]);
-
-   bld.emit(BRW_OPCODE_DO);
-
-   bld.ADD(   x, g[1], g[2]);
-   bld.emit(BRW_OPCODE_WHILE)->predicate = BRW_PREDICATE_NORMAL;
-
-   bld.MUL(   x, g[1], g[2]);
-
-   v->calculate_cfg();
-   lower_scoreboard(v);
-
-   /* Now the write in ADD has the tightest RegDist for both ADD and MUL. */
-
-   bblock_t *body = v->cfg->blocks[2];
-   fs_inst *add = instruction(body, 0);
-   EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
-   EXPECT_EQ(add->sched, tgl_swsb_regdist(2));
-
-   bblock_t *last_block = v->cfg->blocks[3];
-   fs_inst *mul = instruction(last_block, 0);
-   EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
-   EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
-}
-
-TEST_F(scoreboard_test, loop3)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   bld.XOR(   x, g[1], g[2]);
-
-   bld.emit(BRW_OPCODE_DO);
-
-   /* For the ADD in the loop body this extra distance will always apply. */
-   bld.XOR(g[3], g[1], g[2]);
-   bld.XOR(g[4], g[1], g[2]);
-   bld.XOR(g[5], g[1], g[2]);
-   bld.XOR(g[6], g[1], g[2]);
-
-   bld.ADD(   x, g[1], g[2]);
-   bld.emit(BRW_OPCODE_WHILE)->predicate = BRW_PREDICATE_NORMAL;
-
-   bld.MUL(   x, g[1], g[2]);
-
-   v->calculate_cfg();
-   lower_scoreboard(v);
-
-   bblock_t *body = v->cfg->blocks[2];
-   fs_inst *add = instruction(body, 4);
-   EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
-   EXPECT_EQ(add->sched, tgl_swsb_regdist(5));
-
-   bblock_t *last_block = v->cfg->blocks[3];
-   fs_inst *mul = instruction(last_block, 0);
-   EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
-   EXPECT_EQ(mul->sched, tgl_swsb_regdist(1));
-}
-
-
-TEST_F(scoreboard_test, conditional1)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   bld.XOR(   x, g[1], g[2]);
-   bld.emit(BRW_OPCODE_IF);
-
-   bld.ADD(   x, g[1], g[2]);
-
-   bld.emit(BRW_OPCODE_ENDIF);
-   bld.MUL(   x, g[1], g[2]);
-
-   v->calculate_cfg();
-   lower_scoreboard(v);
-
-   bblock_t *body = v->cfg->blocks[1];
-   fs_inst *add = instruction(body, 0);
-   EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
-   EXPECT_EQ(add->sched, tgl_swsb_regdist(2));
-
-   bblock_t *last_block = v->cfg->blocks[2];
-   fs_inst *mul = instruction(last_block, 1);
-   EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
-   EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
-}
-
-TEST_F(scoreboard_test, conditional2)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   bld.XOR(   x, g[1], g[2]);
-   bld.XOR(g[3], g[1], g[2]);
-   bld.XOR(g[4], g[1], g[2]);
-   bld.XOR(g[5], g[1], g[2]);
-   bld.emit(BRW_OPCODE_IF);
-
-   bld.ADD(   x, g[1], g[2]);
-
-   bld.emit(BRW_OPCODE_ENDIF);
-   bld.MUL(   x, g[1], g[2]);
-
-   v->calculate_cfg();
-   lower_scoreboard(v);
-
-   bblock_t *body = v->cfg->blocks[1];
-   fs_inst *add = instruction(body, 0);
-   EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
-   EXPECT_EQ(add->sched, tgl_swsb_regdist(5));
-
-   bblock_t *last_block = v->cfg->blocks[2];
-   fs_inst *mul = instruction(last_block, 1);
-   EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
-   EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
-}
-
-TEST_F(scoreboard_test, conditional3)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   bld.XOR(   x, g[1], g[2]);
-   bld.emit(BRW_OPCODE_IF);
-
-   bld.XOR(g[3], g[1], g[2]);
-   bld.XOR(g[4], g[1], g[2]);
-   bld.XOR(g[5], g[1], g[2]);
-   bld.ADD(   x, g[1], g[2]);
-
-   bld.emit(BRW_OPCODE_ENDIF);
-   bld.MUL(   x, g[1], g[2]);
-
-   v->calculate_cfg();
-   lower_scoreboard(v);
-
-   bblock_t *body = v->cfg->blocks[1];
-   fs_inst *add = instruction(body, 3);
-   EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
-   EXPECT_EQ(add->sched, tgl_swsb_regdist(5));
-
-   bblock_t *last_block = v->cfg->blocks[2];
-   fs_inst *mul = instruction(last_block, 1);
-   EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
-   EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
-}
-
-TEST_F(scoreboard_test, conditional4)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   bld.XOR(   x, g[1], g[2]);
-   bld.emit(BRW_OPCODE_IF);
-
-   bld.ADD(   x, g[1], g[2]);
-   bld.XOR(g[3], g[1], g[2]);
-   bld.XOR(g[4], g[1], g[2]);
-   bld.XOR(g[5], g[1], g[2]);
-
-   bld.emit(BRW_OPCODE_ENDIF);
-   bld.MUL(   x, g[1], g[2]);
-
-   v->calculate_cfg();
-   lower_scoreboard(v);
-
-   bblock_t *body = v->cfg->blocks[1];
-   fs_inst *add = instruction(body, 0);
-   EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
-   EXPECT_EQ(add->sched, tgl_swsb_regdist(2));
-
-   bblock_t *last_block = v->cfg->blocks[2];
-   fs_inst *mul = instruction(last_block, 1);
-   EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
-   EXPECT_EQ(mul->sched, tgl_swsb_regdist(3));
-}
-
-TEST_F(scoreboard_test, conditional5)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   bld.XOR(   x, g[1], g[2]);
-   bld.emit(BRW_OPCODE_IF);
-
-   bld.ADD(   x, g[1], g[2]);
-   bld.emit(BRW_OPCODE_ELSE);
-
-   bld.ROL(   x, g[1], g[2]);
-
-   bld.emit(BRW_OPCODE_ENDIF);
-   bld.MUL(   x, g[1], g[2]);
-
-   v->calculate_cfg();
-   lower_scoreboard(v);
-
-   bblock_t *then_body = v->cfg->blocks[1];
-   fs_inst *add = instruction(then_body, 0);
-   EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
-   EXPECT_EQ(add->sched, tgl_swsb_regdist(2));
-
-   bblock_t *else_body = v->cfg->blocks[2];
-   fs_inst *rol = instruction(else_body, 0);
-   EXPECT_EQ(rol->opcode, BRW_OPCODE_ROL);
-   EXPECT_EQ(rol->sched, tgl_swsb_regdist(2));
-
-   bblock_t *last_block = v->cfg->blocks[3];
-   fs_inst *mul = instruction(last_block, 1);
-   EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
-   EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
-}
-
-TEST_F(scoreboard_test, conditional6)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   bld.XOR(   x, g[1], g[2]);
-   bld.emit(BRW_OPCODE_IF);
-
-   bld.XOR(g[3], g[1], g[2]);
-   bld.XOR(g[4], g[1], g[2]);
-   bld.XOR(g[5], g[1], g[2]);
-   bld.ADD(   x, g[1], g[2]);
-   bld.emit(BRW_OPCODE_ELSE);
-
-   bld.XOR(g[6], g[1], g[2]);
-   bld.XOR(g[7], g[1], g[2]);
-   bld.XOR(g[8], g[1], g[2]);
-   bld.XOR(g[9], g[1], g[2]);
-   bld.ROL(   x, g[1], g[2]);
-
-   bld.emit(BRW_OPCODE_ENDIF);
-   bld.MUL(   x, g[1], g[2]);
-
-   v->calculate_cfg();
-   lower_scoreboard(v);
-
-   bblock_t *then_body = v->cfg->blocks[1];
-   fs_inst *add = instruction(then_body, 3);
-   EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
-   EXPECT_EQ(add->sched, tgl_swsb_regdist(5));
-
-   bblock_t *else_body = v->cfg->blocks[2];
-   fs_inst *rol = instruction(else_body, 4);
-   EXPECT_EQ(rol->opcode, BRW_OPCODE_ROL);
-   EXPECT_EQ(rol->sched, tgl_swsb_regdist(6));
-
-   bblock_t *last_block = v->cfg->blocks[3];
-   fs_inst *mul = instruction(last_block, 1);
-   EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
-   EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
-}
-
-TEST_F(scoreboard_test, conditional7)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   bld.XOR(   x, g[1], g[2]);
-   bld.emit(BRW_OPCODE_IF);
-
-   bld.ADD(   x, g[1], g[2]);
-   bld.XOR(g[3], g[1], g[2]);
-   bld.XOR(g[4], g[1], g[2]);
-   bld.XOR(g[5], g[1], g[2]);
-   bld.emit(BRW_OPCODE_ELSE);
-
-   bld.ROL(   x, g[1], g[2]);
-   bld.XOR(g[6], g[1], g[2]);
-   bld.XOR(g[7], g[1], g[2]);
-   bld.XOR(g[8], g[1], g[2]);
-   bld.XOR(g[9], g[1], g[2]);
-
-   bld.emit(BRW_OPCODE_ENDIF);
-   bld.MUL(   x, g[1], g[2]);
-
-   v->calculate_cfg();
-   lower_scoreboard(v);
-
-   bblock_t *then_body = v->cfg->blocks[1];
-   fs_inst *add = instruction(then_body, 0);
-   EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
-   EXPECT_EQ(add->sched, tgl_swsb_regdist(2));
-
-   bblock_t *else_body = v->cfg->blocks[2];
-   fs_inst *rol = instruction(else_body, 0);
-   EXPECT_EQ(rol->opcode, BRW_OPCODE_ROL);
-   EXPECT_EQ(rol->sched, tgl_swsb_regdist(2));
-
-   bblock_t *last_block = v->cfg->blocks[3];
-   fs_inst *mul = instruction(last_block, 1);
-   EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
-   EXPECT_EQ(mul->sched, tgl_swsb_regdist(6));
-}
-
-TEST_F(scoreboard_test, conditional8)
-{
-   fs_reg g[16];
-   for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
-      g[i] = v->vgrf(glsl_int_type());
-
-   fs_reg x = v->vgrf(glsl_int_type());
-   bld.XOR(   x, g[1], g[2]);
-   bld.XOR(g[3], g[1], g[2]);
-   bld.XOR(g[4], g[1], g[2]);
-   bld.XOR(g[5], g[1], g[2]);
-   bld.XOR(g[6], g[1], g[2]);
-   bld.XOR(g[7], g[1], g[2]);
-   bld.emit(BRW_OPCODE_IF);
-
-   bld.ADD(   x, g[1], g[2]);
-   bld.emit(BRW_OPCODE_ELSE);
-
-   bld.ROL(   x, g[1], g[2]);
-
-   bld.emit(BRW_OPCODE_ENDIF);
-   bld.MUL(   x, g[1], g[2]);
-
-   v->calculate_cfg();
-   lower_scoreboard(v);
-
-   bblock_t *then_body = v->cfg->blocks[1];
-   fs_inst *add = instruction(then_body, 0);
-   EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
-   EXPECT_EQ(add->sched, tgl_swsb_regdist(7));
-
-   /* Note that the ROL will have RegDist 2 and not 7, illustrating the
-    * physical CFG edge between the then-block and the else-block.
-    */
-   bblock_t *else_body = v->cfg->blocks[2];
-   fs_inst *rol = instruction(else_body, 0);
-   EXPECT_EQ(rol->opcode, BRW_OPCODE_ROL);
-   EXPECT_EQ(rol->sched, tgl_swsb_regdist(2));
-
-   bblock_t *last_block = v->cfg->blocks[3];
-   fs_inst *mul = instruction(last_block, 1);
-   EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
-   EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
-}
-
-TEST_F(scoreboard_test, gfx125_RaR_over_different_pipes)
-{
-   devinfo->verx10 = 125;
-   brw_init_isa_info(&compiler->isa, devinfo);
-
-   fs_reg a = v->vgrf(glsl_int_type());
-   fs_reg b = v->vgrf(glsl_int_type());
-   fs_reg f = v->vgrf(glsl_float_type());
-   fs_reg x = v->vgrf(glsl_int_type());
-
-   bld.ADD(f, x, x);
-   bld.ADD(a, x, x);
-   bld.ADD(x, b, b);
-
-   v->calculate_cfg();
-   bblock_t *block0 = v->cfg->blocks[0];
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   lower_scoreboard(v);
-   ASSERT_EQ(0, block0->start_ip);
-   ASSERT_EQ(2, block0->end_ip);
-
-   EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
-   EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
-   EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_regdist(1));
-}
--- a/src/intel/compiler/elk/tests/gen11/cr0.asm
+++ b/src/intel/compiler/elk/tests/gen11/cr0.asm
@ -1,7 +0,0 @@
-and(1)          cr0<1>UD        cr0<0,1,0>UD    0xfffffb7fUD    { align1 1N switch };
-and(1)          cr0<1>UD        cr0<0,1,0>UD    0xffffff7fUD    { align1 1N switch };
-and(1)          cr0<1>UD        cr0<0,1,0>UD    0xffffffcfUD    { align1 1N switch };
-and(1)          cr0<1>UD        cr0<0,1,0>UD    0xfffffbffUD    { align1 1N switch };
-or(1)           cr0<1>UD        cr0<0,1,0>UD    0x00000400UD    { align1 1N switch };
-or(1)           cr0<1>UD        cr0<0,1,0>UD    0x00000030UD    { align1 1N switch };
-or(1)           cr0<1>UD        cr0<0,1,0>UD    0x00000080UD    { align1 1N switch };
--- a/src/intel/compiler/elk/tests/gen11/cr0.expected
+++ b/src/intel/compiler/elk/tests/gen11/cr0.expected
@ -1,7 +0,0 @@
-05 80 00 00 00 00 00 30 00 10 00 06 7f fb ff ff
-05 80 00 00 00 00 00 30 00 10 00 06 7f ff ff ff
-05 80 00 00 00 00 00 30 00 10 00 06 cf ff ff ff
-05 80 00 00 00 00 00 30 00 10 00 06 ff fb ff ff
-06 80 00 00 00 00 00 30 00 10 00 06 00 04 00 00
-06 80 00 00 00 00 00 30 00 10 00 06 30 00 00 00
-06 80 00 00 00 00 00 30 00 10 00 06 80 00 00 00
--- a/src/intel/compiler/elk/tests/gen11/rol.asm
+++ b/src/intel/compiler/elk/tests/gen11/rol.asm
@ -1 +0,0 @@
-rol(16)         g3<1>UD         g2<0,1,0>UD     g2.1<0,1,0>UD   { align1 1H };
--- a/src/intel/compiler/elk/tests/gen11/rol.expected
+++ b/src/intel/compiler/elk/tests/gen11/rol.expected
@ -1 +0,0 @@
-0f 00 80 00 08 02 60 20 40 00 00 02 44 00 00 00
--- a/src/intel/compiler/elk/tests/gen11/ror.asm
+++ b/src/intel/compiler/elk/tests/gen11/ror.asm
@ -1 +0,0 @@
-ror(16)         g3<1>UD         g2<0,1,0>UD     g2.1<0,1,0>UD   { align1 1H };
--- a/src/intel/compiler/elk/tests/gen11/ror.expected
+++ b/src/intel/compiler/elk/tests/gen11/ror.expected
@ -1 +0,0 @@
-0e 00 80 00 08 02 60 20 40 00 00 02 44 00 00 00
--- a/src/intel/compiler/elk/tests/gen12.5/add3.asm
+++ b/src/intel/compiler/elk/tests/gen12.5/add3.asm
@ -1,7 +0,0 @@
-add3(8)         g118<1>D        -g117<8,8,1>D   g114<8,8,1>D    g115<1,1,1>D { align1 1Q I@2 };
-add3(16)        g55<1>D         g50<8,8,1>D     g46<8,8,1>D     -g53<1,1,1>D { align1 1H @2 $5.dst };
-add3(16)        g111<1>D        -g40<8,8,1>D    -g88<8,8,1>D    g111<1,1,1>D { align1 1H I@1 };
-add3(16)        g49<1>D         0x0008UW        g47<8,8,1>D     g26<1,1,1>D { align1 1H I@4 };
-add3(16)        g55<1>D         0x0008UW        g53<8,8,1>D     g65<1,1,1>D { align1 2H I@3 };
-add3(8)         g57<1>D         g52<8,8,1>D     (abs)g48<8,8,1>D (abs)g59<1,1,1>D { align1 1Q I@4 };
-add3(16)        g51<1>D         g63<8,8,1>D     -g122<8,8,1>D   (abs)g27<1,1,1>D { align1 1H I@7 };
--- a/src/intel/compiler/elk/tests/gen12.5/add3.expected
+++ b/src/intel/compiler/elk/tests/gen12.5/add3.expected
@ -1,7 +0,0 @@
-52 1a 03 00 68 2e 04 76 05 75 0e 0e 05 72 05 73
-52 a5 04 00 68 0e 04 37 05 32 2e 0e 05 2e 05 35
-52 19 04 00 68 2e 04 6f 05 28 8e 0e 05 58 05 6f
-52 1c 04 00 60 41 04 31 08 00 0e 0e 05 2f 05 1a
-52 1b 24 00 60 41 04 37 08 00 0e 0e 05 35 05 41
-52 1c 03 00 68 0e 04 39 05 34 5e 0e 05 30 05 3b
-52 1f 04 00 68 0e 04 33 05 3f 9e 0e 05 7a 05 1b
--- a/src/intel/compiler/elk/tests/gen12.5/send.asm
+++ b/src/intel/compiler/elk/tests/gen12.5/send.asm
@ -1,30 +0,0 @@
-(+f0.0.any8h) send(1) g57UD     g58UD           nullUD          0x6210c500                0x02000000
-                            ugm MsgDesc: ( load, a32, d32, V8, transpose, L1STATE_L3MOCS dst_len = 1, src0_len = 1, src1_len = 0 bti )  BTI 2  base_offset 0  { align1 WE_all 1N $5 };
-(+f0.0.any8h) send(1) g28UD     g29UD           nullUD          0x6210c500                0x02000000
-                            ugm MsgDesc: ( load, a32, d32, V8, transpose, L1STATE_L3MOCS dst_len = 1, src0_len = 1, src1_len = 0 bti )  BTI 2  base_offset 0  { align1 WE_all 1N $2 };
-(+f0.0.any32h) send(1) g57UD    g58UD           nullUD          0x6210c500                0x02000000
-                            ugm MsgDesc: ( load, a32, d32, V8, transpose, L1STATE_L3MOCS dst_len = 1, src0_len = 1, src1_len = 0 bti )  BTI 2  base_offset 0  { align1 WE_all 1N $0 };
-send(8)         nullUD          g79UD           g10UD           0x6200f506                0x04000100
-                            ugm MsgDesc: ( store_cmask, a32, d32, xyzw, L1STATE_L3MOCS dst_len = 0, src0_len = 1, src1_len = 4 bti )  BTI 4  base_offset 0  { align1 1Q $0 };
-send(16)        nullUD          g9UD            g7UD            0x44000504                a0.1<0>UD
-                            ugm MsgDesc: ( store, a32, d32, V1, L1STATE_L3MOCS dst_len = 0, src0_len = 2, src1_len = 0 ss )  surface_state_index 0  { align1 1H @1 $0 };
-send(1)         g4UD            g0UD            nullUD          0x0210151f                0x00000000
-                            tgm MsgDesc: ( fence, a32, tile, evict, normal_routing dst_len = 1, src0_len = 1, src1_len = 0 flat )  base_offset 0  { align1 WE_all 1N $3 };
-send(8)         nullUD          g36UD           g37UD           0x02000b04                0x00000040
-                            slm MsgDesc: ( store, a32, d16u32, V1, L1STATE_L3MOCS dst_len = 0, src0_len = 1, src1_len = 1 flat )  base_offset 0  { align1 1Q $1 };
-send(8)         nullUD          g34UD           g35UD           0x02000b04                0x00000040
-                            slm MsgDesc: ( store, a32, d16u32, V1, L1STATE_L3MOCS dst_len = 0, src0_len = 1, src1_len = 1 flat )  base_offset 0  { align1 1Q $0 };
-send(8)         nullUD          g6UD            g7UD            0x0200f506                0x00000100
-                            slm MsgDesc: ( store_cmask, a32, d32, xyzw, L1STATE_L3MOCS dst_len = 0, src0_len = 1, src1_len = 4 flat )  base_offset 0  { align1 1Q $6 };
-send(16)        nullUD          g82UD           g91UD           0x04040519                0x00000080
-                            slm MsgDesc: ( atomic_or, a32, d32, V1, L1UC_L3WB dst_len = 0, src0_len = 2, src1_len = 2 flat )  base_offset 0  { align1 2H $0 };
-send(1)         g10UD           g0UD            nullUD          0x0210011f                0x00000000
-                            slm MsgDesc: ( fence, a32, threadgroup, none, normal_routing dst_len = 1, src0_len = 1, src1_len = 0 flat )  base_offset 0  { align1 WE_all 1N $1 };
-send(1)         g23UD           g117UD          nullUD          0x2210c500                a0.1<0>UD
-                            ugm MsgDesc: ( load, a32, d32, V8, transpose, L1STATE_L3MOCS dst_len = 1, src0_len = 1, bss )  src1_len = 0 ex_bso surface_state_index 0  { align1 WE_all 1N @1 $10 };
-send(8)         nullUD          g14UD           g24UD           0x040350fc                a0.1<0>UD
-                            dp data 1 MsgDesc: (DC typed surface write, Surface = 252, SIMD16, Mask = 0x0)  src1_len = 4 ex_bso mlen 2 rlen 0 { align1 1Q @1 $5 };
-send(8)         nullUD          g51UD           g52UD           0x02000000                0x00000040
-                            rt accel MsgDesc: SIMD8,  mlen 1 ex_mlen 1 rlen 0 { align1 1Q $2 };
-send(16)        nullUD          g88UD           g98UD           0x02000100                0x00000080
-                            rt accel MsgDesc: SIMD16,  mlen 1 ex_mlen 2 rlen 0 { align1 1H $6 };
--- a/src/intel/compiler/elk/tests/gen12.5/send.expected
+++ b/src/intel/compiler/elk/tests/gen12.5/send.expected
@ -1,15 +0,0 @@
-31 45 00 88 00 00 0c 39 8e 3a 00 fa 00 00 30 04
-31 42 00 88 00 00 0c 1c 8e 1d 00 fa 00 00 30 04
-31 40 00 8c 00 00 0c 39 8e 3a 00 fa 00 00 30 04
-31 40 03 00 00 00 00 00 8c 4f 0c fa 25 0a 3c 04
-31 90 04 00 00 01 02 00 14 09 08 fa 04 07 00 04
-31 43 00 80 00 00 0c 04 0c 00 3e da 00 00 04 00
-31 41 03 00 00 00 00 00 0c 24 08 e6 0c 25 02 00
-31 40 03 00 00 00 00 00 0c 22 08 e6 0c 23 02 00
-31 46 03 00 00 00 00 00 0c 06 0c ea 24 07 3c 00
-31 40 24 00 00 00 00 00 14 52 32 ea 14 5b 00 01
-31 41 00 80 00 00 0c 0a 0c 00 3e e2 00 00 00 00
-31 9a 00 80 80 01 0e 17 8c 75 00 fa 00 00 30 00
-31 95 03 00 80 01 02 00 14 0e f8 c1 24 18 d4 00
-31 42 03 00 00 00 00 00 0c 33 00 80 0c 34 00 00
-31 46 04 00 00 00 00 00 0c 58 00 82 14 62 00 00
--- a/src/intel/compiler/elk/tests/gen12.5/swsb.asm
+++ b/src/intel/compiler/elk/tests/gen12.5/swsb.asm
@ -1,23 +0,0 @@
-mul(8)          g37<1>D         g99<8,8,1>D     g36<16,8,2>UW   { align1 1Q I@1 };
-mul(8)          g37<1>D         g99<8,8,1>D     g36<16,8,2>UW   { align1 1Q I@2 };
-mul(8)          g37<1>D         g99<8,8,1>D     g36<16,8,2>UW   { align1 1Q I@3 };
-mul(8)          g37<1>D         g99<8,8,1>D     g36<16,8,2>UW   { align1 1Q I@4 };
-mul(8)          g37<1>D         g99<8,8,1>D     g36<16,8,2>UW   { align1 1Q I@5 };
-mul(8)          g37<1>D         g99<8,8,1>D     g36<16,8,2>UW   { align1 1Q I@6 };
-mul(8)          g37<1>D         g99<8,8,1>D     g36<16,8,2>UW   { align1 1Q I@7 };
-
-mov(8)          g36<1>UD        g35<8,8,1>F                     { align1 1Q F@1 };
-mov(8)          g36<1>UD        g35<8,8,1>F                     { align1 1Q F@2 };
-mov(8)          g36<1>UD        g35<8,8,1>F                     { align1 1Q F@3 };
-mov(8)          g36<1>UD        g35<8,8,1>F                     { align1 1Q F@4 };
-mov(8)          g36<1>UD        g35<8,8,1>F                     { align1 1Q F@5 };
-mov(8)          g36<1>UD        g35<8,8,1>F                     { align1 1Q F@6 };
-mov(8)          g36<1>UD        g35<8,8,1>F                     { align1 1Q F@7 };
-
-add(1)          a0<1>UD         a0<0,1,0>UD     0x00000800UD    { align1 WE_all 1N A@1 };
-add(1)          a0<1>UD         a0<0,1,0>UD     0x00000800UD    { align1 WE_all 1N A@2 };
-add(1)          a0<1>UD         a0<0,1,0>UD     0x00000800UD    { align1 WE_all 1N A@3 };
-add(1)          a0<1>UD         a0<0,1,0>UD     0x00000800UD    { align1 WE_all 1N A@4 };
-add(1)          a0<1>UD         a0<0,1,0>UD     0x00000800UD    { align1 WE_all 1N A@5 };
-add(1)          a0<1>UD         a0<0,1,0>UD     0x00000800UD    { align1 WE_all 1N A@6 };
-add(1)          a0<1>UD         a0<0,1,0>UD     0x00000800UD    { align1 WE_all 1N A@7 };
--- a/src/intel/compiler/elk/tests/gen12.5/swsb.expected
+++ b/src/intel/compiler/elk/tests/gen12.5/swsb.expected
@ -1,21 +0,0 @@
-41 19 03 00 60 06 05 25 05 63 46 01 06 24 56 00
-41 1a 03 00 60 06 05 25 05 63 46 01 06 24 56 00
-41 1b 03 00 60 06 05 25 05 63 46 01 06 24 56 00
-41 1c 03 00 60 06 05 25 05 63 46 01 06 24 56 00
-41 1d 03 00 60 06 05 25 05 63 46 01 06 24 56 00
-41 1e 03 00 60 06 05 25 05 63 46 01 06 24 56 00
-41 1f 03 00 60 06 05 25 05 63 46 01 06 24 56 00
-61 11 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
-61 12 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
-61 13 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
-61 14 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
-61 15 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
-61 16 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
-61 17 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
-40 09 00 80 20 82 01 10 00 10 00 02 00 08 00 00
-40 0a 00 80 20 82 01 10 00 10 00 02 00 08 00 00
-40 0b 00 80 20 82 01 10 00 10 00 02 00 08 00 00
-40 0c 00 80 20 82 01 10 00 10 00 02 00 08 00 00
-40 0d 00 80 20 82 01 10 00 10 00 02 00 08 00 00
-40 0e 00 80 20 82 01 10 00 10 00 02 00 08 00 00
-40 0f 00 80 20 82 01 10 00 10 00 02 00 08 00 00
--- a/src/intel/compiler/elk/tests/gen12/dp4a.asm
+++ b/src/intel/compiler/elk/tests/gen12/dp4a.asm
@ -1,33 +0,0 @@
-dp4a(8)         g10<1>D         g2<8,8,1>D      g6<8,8,1>D      g7<1,1,1>D { align1 1Q @1 };
-dp4a(8)         g10<1>D         g2<8,8,1>D      g6<8,8,1>D      g7<1,1,1>UD { align1 1Q @1 };
-dp4a(8)         g10<1>D         g2<8,8,1>D      g8<8,8,1>D      g9<1,1,1>D { align1 1Q @1 };
-dp4a(8)         g10<1>D         g2<8,8,1>D      g8<8,8,1>D      g9<1,1,1>UD { align1 1Q @1 };
-dp4a(8)         g10<1>UD        g2<8,8,1>UD     g6<8,8,1>UD     g7<1,1,1>UD { align1 1Q @1 };
-dp4a(8)         g10<1>UD        g2<8,8,1>UD     g8<8,8,1>UD     g9<1,1,1>UD { align1 1Q @1 };
-dp4a(8)         g5<1>D          g2<8,8,1>D      g3<8,8,1>D      g4<1,1,1>D { align1 1Q @3 $0.dst };
-dp4a(8)         g5<1>D          g2<8,8,1>D      g3<8,8,1>D      g4<1,1,1>UD { align1 1Q @3 $0.dst };
-dp4a(8)         g5<1>UD         g2<8,8,1>UD     g3<8,8,1>UD     g4<1,1,1>UD { align1 1Q @3 $0.dst };
-dp4a(8)         g6<1>D          g2<8,8,1>D      g3<8,8,1>D      g4<1,1,1>D { align1 1Q @4 $1.dst };
-dp4a(8)         g6<1>D          g2<8,8,1>D      g3<8,8,1>D      g4<1,1,1>UD { align1 1Q @4 $1.dst };
-dp4a(8)         g6<1>D          g2<8,8,1>D      g4<8,8,1>D      g5<1,1,1>D { align1 1Q @4 $0.dst };
-dp4a(8)         g6<1>D          g2<8,8,1>D      g4<8,8,1>D      g5<1,1,1>UD { align1 1Q @4 $0.dst };
-dp4a(8)         g6<1>UD         g2<8,8,1>UD     g3<8,8,1>UD     g4<1,1,1>UD { align1 1Q @4 $1.dst };
-dp4a(8)         g6<1>UD         g2<8,8,1>UD     g4<8,8,1>UD     g5<1,1,1>UD { align1 1Q @4 $0.dst };
-dp4a(8)         g7<1>D          g2<8,8,1>D      g5<8,8,1>D      g6<1,1,1>D { align1 1Q @1 };
-dp4a(8)         g7<1>D          g2<8,8,1>D      g5<8,8,1>D      g6<1,1,1>UD { align1 1Q @1 };
-dp4a(8)         g7<1>UD         g2<8,8,1>UD     g5<8,8,1>UD     g6<1,1,1>UD { align1 1Q @1 };
-dp4a(8)         g8<1>D          g2<8,8,1>D      g4<8,8,1>D      g5<1,1,1>D { align1 1Q @3 $0.dst };
-dp4a(8)         g8<1>D          g2<8,8,1>D      g4<8,8,1>D      g5<1,1,1>D { align1 1Q @4 $0.dst };
-dp4a(8)         g8<1>D          g2<8,8,1>D      g4<8,8,1>D      g5<1,1,1>UD { align1 1Q @3 $0.dst };
-dp4a(8)         g8<1>D          g2<8,8,1>D      g4<8,8,1>D      g5<1,1,1>UD { align1 1Q @4 $0.dst };
-dp4a(8)         g8<1>D          g2<8,8,1>D      g6<8,8,1>D      g7<1,1,1>D { align1 1Q @1 };
-dp4a(8)         g8<1>D          g2<8,8,1>D      g6<8,8,1>D      g7<1,1,1>UD { align1 1Q @1 };
-dp4a(8)         g8<1>UD         g2<8,8,1>UD     g4<8,8,1>UD     g5<1,1,1>UD { align1 1Q @3 $0.dst };
-dp4a(8)         g8<1>UD         g2<8,8,1>UD     g4<8,8,1>UD     g5<1,1,1>UD { align1 1Q @4 $0.dst };
-dp4a(8)         g8<1>UD         g2<8,8,1>UD     g6<8,8,1>UD     g7<1,1,1>UD { align1 1Q @1 };
-dp4a.sat(8)     g10<1>D         g5<8,8,1>D      g6<8,8,1>D      g7<1,1,1>D { align1 1Q @1 $2.dst };
-dp4a.sat(8)     g10<1>D         g5<8,8,1>D      g6<8,8,1>D      g7<1,1,1>UD { align1 1Q @1 $2.dst };
-dp4a.sat(8)     g10<1>UD        g5<8,8,1>UD     g6<8,8,1>UD     g7<1,1,1>UD { align1 1Q @1 $2.dst };
-dp4a.sat(8)     g8<1>D          g5<8,8,1>D      g3<8,8,1>D      g4<1,1,1>D { align1 1Q $2.dst };
-dp4a.sat(8)     g8<1>D          g5<8,8,1>D      g3<8,8,1>D      g4<1,1,1>UD { align1 1Q $2.dst };
-dp4a.sat(8)     g8<1>UD         g5<8,8,1>UD     g3<8,8,1>UD     g4<1,1,1>UD { align1 1Q $2.dst };
--- a/src/intel/compiler/elk/tests/gen12/dp4a.expected
+++ b/src/intel/compiler/elk/tests/gen12/dp4a.expected
@ -1,33 +0,0 @@
-58 01 03 00 68 0e 04 0a 05 02 0e 0e 05 06 05 07
-58 01 03 00 68 0e 04 0a 05 02 0a 0e 05 06 05 07
-58 01 03 00 68 0e 04 0a 05 02 0e 0e 05 08 05 09
-58 01 03 00 68 0e 04 0a 05 02 0a 0e 05 08 05 09
-58 01 03 00 28 0a 04 0a 05 02 0a 0a 05 06 05 07
-58 01 03 00 28 0a 04 0a 05 02 0a 0a 05 08 05 09
-58 b0 03 00 68 0e 04 05 05 02 0e 0e 05 03 05 04
-58 b0 03 00 68 0e 04 05 05 02 0a 0e 05 03 05 04
-58 b0 03 00 28 0a 04 05 05 02 0a 0a 05 03 05 04
-58 c1 03 00 68 0e 04 06 05 02 0e 0e 05 03 05 04
-58 c1 03 00 68 0e 04 06 05 02 0a 0e 05 03 05 04
-58 c0 03 00 68 0e 04 06 05 02 0e 0e 05 04 05 05
-58 c0 03 00 68 0e 04 06 05 02 0a 0e 05 04 05 05
-58 c1 03 00 28 0a 04 06 05 02 0a 0a 05 03 05 04
-58 c0 03 00 28 0a 04 06 05 02 0a 0a 05 04 05 05
-58 01 03 00 68 0e 04 07 05 02 0e 0e 05 05 05 06
-58 01 03 00 68 0e 04 07 05 02 0a 0e 05 05 05 06
-58 01 03 00 28 0a 04 07 05 02 0a 0a 05 05 05 06
-58 b0 03 00 68 0e 04 08 05 02 0e 0e 05 04 05 05
-58 c0 03 00 68 0e 04 08 05 02 0e 0e 05 04 05 05
-58 b0 03 00 68 0e 04 08 05 02 0a 0e 05 04 05 05
-58 c0 03 00 68 0e 04 08 05 02 0a 0e 05 04 05 05
-58 01 03 00 68 0e 04 08 05 02 0e 0e 05 06 05 07
-58 01 03 00 68 0e 04 08 05 02 0a 0e 05 06 05 07
-58 b0 03 00 28 0a 04 08 05 02 0a 0a 05 04 05 05
-58 c0 03 00 28 0a 04 08 05 02 0a 0a 05 04 05 05
-58 01 03 00 28 0a 04 08 05 02 0a 0a 05 06 05 07
-58 92 03 00 6c 0e 04 0a 05 05 0e 0e 05 06 05 07
-58 92 03 00 6c 0e 04 0a 05 05 0a 0e 05 06 05 07
-58 92 03 00 2c 0a 04 0a 05 05 0a 0a 05 06 05 07
-58 22 03 00 6c 0e 04 08 05 05 0e 0e 05 03 05 04
-58 22 03 00 6c 0e 04 08 05 05 0a 0e 05 03 05 04
-58 22 03 00 2c 0a 04 08 05 05 0a 0a 05 03 05 04
--- a/src/intel/compiler/elk/tests/gen12/send.asm
+++ b/src/intel/compiler/elk/tests/gen12/send.asm
@ -1,43 +0,0 @@
-send(16)        g113UD          g12UD           nullUD          a0<0>UD         0x00000000
-                            dp data 1 MsgDesc: indirect ex_mlen 0           { align1 1H @1 $6 };
-(+f1.0) send(16) nullUD         g15UD           g17UD           a0<0>UD         0x00000080
-                            dp data 1 MsgDesc: indirect ex_mlen 2           { align1 1H @1 $4 };
-send(8)         g104UD          g119UD          nullUD          0x04116e13                0x00000000
-                            dp data 1 MsgDesc: (DC typed surface read, Surface = 19, SIMD8, Mask = 0xe)  mlen 2 ex_mlen 0 rlen 1 { align1 2Q $8 };
-send(8)         nullUD          g92UD           g117UD          0x020350fc                a0.1<0>UD
-                            dp data 1 MsgDesc: (DC typed surface write, Surface = 252, SIMD16, Mask = 0x0)  mlen 1 rlen 0 { align1 1Q @1 $8 };
-(+f0.0.any8h) send(8) g55UD     g118UD          nullUD          0x02184201                0x00000000
-                            data MsgDesc: (DC unaligned OWORD block read, bti 1, 2)  mlen 1 ex_mlen 0 rlen 1 { align1 WE_all 1Q @3 $9 };
-send(8)         nullUD          g126UD          nullUD          0x02000000                0x00000000
-                            thread_spawner MsgDesc:  mlen 1 ex_mlen 0 rlen 0 { align1 WE_all 1Q @1 EOT };
-send(8)         g18UD           g24UD           nullUD          0x04115e10                0x00000000
-                            dp data 1 MsgDesc: (DC typed surface read, Surface = 16, SIMD16, Mask = 0xe)  mlen 2 ex_mlen 0 rlen 1 { align1 1Q $1 };
-send(8)         g19UD           g28UD           nullUD          0x04116e10                0x00000000
-                            dp data 1 MsgDesc: (DC typed surface read, Surface = 16, SIMD8, Mask = 0xe)  mlen 2 ex_mlen 0 rlen 1 { align1 2Q @7 $2 };
-send(16)        g50UD           g36UD           nullUD          a0<0>UD         0x00000000
-                            sampler MsgDesc: indirect ex_mlen 0             { align1 1H @1 $3 };
-send(8)         nullUD          g25UD           g21UD           0x02035001                0x00000100
-                            dp data 1 MsgDesc: (DC typed surface write, Surface = 1, SIMD16, Mask = 0x0)  mlen 1 ex_mlen 4 rlen 0 { align1 1Q $9 };
-send(8)         g5UD            g25UD           nullUD          0x02415001                0x00000000
-                            dp data 1 MsgDesc: (DC typed surface read, Surface = 1, SIMD16, Mask = 0x0)  mlen 1 ex_mlen 0 rlen 4 { align1 1Q $10 };
-send(8)         g27UD           g35UD           nullUD          0x04146efd                0x00000000
-                            dp data 1 MsgDesc: (DC A64 untyped surface read, Surface = 253, SIMD8, Mask = 0xe)  mlen 2 ex_mlen 0 rlen 1 { align1 1Q @1 $0 };
-send(8)         nullUD          g36UD           g38UD           0x04035001                0x00000100
-                            dp data 1 MsgDesc: (DC typed surface write, Surface = 1, SIMD16, Mask = 0x0)  mlen 2 ex_mlen 4 rlen 0 { align1 1Q @1 $1 };
-send(8)         nullUD          g126UD          g118UD          0x02080007                0x00000200
-                            urb MsgDesc: offset 0 SIMD8 write  mlen 1 ex_mlen 8 rlen 0 { align1 1Q @1 EOT };
-send(8)         g14UD           g37UD           nullUD          0x02110401                0x00000000
-                            data MsgDesc: (DC byte scattered read, bti 1, 4)  mlen 1 ex_mlen 0 rlen 1 { align1 1Q @1 $0 };
-send(1)         g100UD          g0UD            nullUD          0x0219e000                0x00000000
-                            data MsgDesc: (DC mfence, bti 0, 32)  mlen 1 ex_mlen 0 rlen 1 { align1 WE_all 1N $1 };
-send(1)         g15UD           g0UD            nullUD          0x0219e000                0x00000000
-                            data MsgDesc: (DC mfence, bti 0, 32)  mlen 1 ex_mlen 0 rlen 1 { align1 WE_all 1N $5 };
-
-sendc(16)       nullUD          g119UD          nullUD          0x10031000                0x00000000
-                            render MsgDesc: RT write SIMD16 LastRT Surface = 0  mlen 8 ex_mlen 0 rlen 0 { align1 1H @1 EOT };
-sendc(8)        nullUD          g125UD          g123UD          0x04031400                0x00000080
-                            render MsgDesc: RT write SIMD8 LastRT Surface = 0  mlen 2 ex_mlen 2 rlen 0 { align1 1Q @1 EOT };
-sendc(16)       nullUD          g119UD          nullUD          0x10031000                0x00000000
-                            render MsgDesc: RT write SIMD16 LastRT Surface = 0  mlen 8 ex_mlen 0 rlen 0 { align1 1H @1 EOT };
-sendc(16)       nullUD          g123UD          g119UD          0x08031000                0x00000100
-                            render MsgDesc: RT write SIMD16 LastRT Surface = 0  mlen 4 ex_mlen 4 rlen 0 { align1 1H @1 EOT };
--- a/src/intel/compiler/elk/tests/gen12/send.expected
+++ b/src/intel/compiler/elk/tests/gen12/send.expected
@ -1,21 +0,0 @@
-31 96 04 00 00 00 05 71 04 0c 00 c0 00 00 00 00
-31 94 84 01 00 00 01 00 04 0f 00 c0 14 11 00 00
-31 48 13 00 00 00 0c 68 14 77 26 cc 00 00 5a 00
-31 98 03 00 00 01 02 00 0c 5c f8 c1 04 75 d4 00
-31 b9 03 88 00 00 0c 37 0c 76 02 a4 00 00 10 02
-31 01 03 80 04 00 00 00 0c 7e 00 70 00 00 00 00
-31 41 03 00 00 00 0c 12 14 18 20 cc 00 00 56 00
-31 f2 13 00 00 00 0c 13 14 1c 20 cc 00 00 5a 00
-31 93 04 00 00 00 05 32 04 24 00 20 00 00 00 00
-31 49 03 00 00 00 00 00 0c 19 02 c0 24 15 d4 00
-31 4a 03 00 00 00 24 05 0c 19 02 c0 00 00 54 00
-31 90 03 00 00 00 0c 1b 14 23 fa cd 00 00 1a 01
-31 91 03 00 00 00 00 00 14 24 02 c0 24 26 d4 00
-31 01 03 00 04 00 00 00 0c 7e 0e 60 44 76 00 02
-31 90 03 00 00 00 0c 0e 0c 25 02 a8 00 00 40 00
-31 41 00 80 00 00 0c 64 0c 00 00 a0 00 00 78 02
-31 45 00 80 00 00 0c 0f 0c 00 00 a0 00 00 78 02
-32 01 04 00 04 00 00 00 44 77 00 50 00 00 c4 00
-32 01 03 00 04 00 00 00 14 7d 00 58 14 7b c4 00
-32 01 04 00 04 00 00 00 44 77 00 50 00 00 c4 00
-32 01 04 00 04 00 00 00 24 7b 00 50 24 77 c4 00
--- a/src/intel/compiler/elk/tests/gen12/swsb.asm
+++ b/src/intel/compiler/elk/tests/gen12/swsb.asm
@ -1,40 +0,0 @@
-cmp.l.f0.0(8)   g55<1>UD        g54<8,8,1>UD    0x00000290UD    { align1 1Q @1 };
-mov(16)         g6<1>D          g20<8,8,1>W                     { align1 2H @2 };
-add(16)         g122<1>F        g98<8,8,1>F     (abs)g102<8,8,1>F { align1 1H @3 };
-shl(8)          g75<1>D         g122<8,8,1>D    0x00000002UD    { align1 1Q @4 };
-sel.l(4)        g90.4<1>D       g90.3<0,1,0>D   g90.4<4,4,1>D   { align1 WE_all 1N @5 };
-and(16)         g58<1>UD        g16<8,8,1>UD    g56<8,8,1>UD    { align1 1H @6 };
-or.nz.f0.0(16)  null<1>UD       g105<8,8,1>UD   g103<8,8,1>UD   { align1 1H @7 };
-
-math cos(16)    g17<1>F         g15<8,8,1>F     null<8,8,1>F    { align1 1H @1 $0 };
-math exp(16)    g1<1>F          g29<8,8,1>F     null<8,8,1>F    { align1 1H @5 $2 };
-math sqrt(8)    g9<1>HF         g6<8,8,1>HF     null<8,8,1>F    { align1 1Q @1 $3 };
-math intdiv(8)  g103<1>D        g101<8,8,1>D    g35<8,8,1>D     { align1 1Q @4 $4 };
-math intmod(8)  g101<1>D        g97<8,8,1>D     g76<8,8,1>D     { align1 2Q @2 $5 };
-math inv(16)    g10<1>F         g8<8,8,1>F      null<8,8,1>F    { align1 2H @2 $6 };
-math log(16)    g102<1>F        g100<8,8,1>F    null<8,8,1>F    { align1 2H @1 $7 };
-math rsq(16)    g76<1>F         g74<8,8,1>F     null<8,8,1>F    { align1 1H @7 $8 };
-math sin(16)    g123<1>F        g121<8,8,1>F    null<8,8,1>F    { align1 1H @4 $9 };
-math sqrt(16)   g43<1>F         g47<8,8,1>F     null<8,8,1>F    { align1 2H @7 $10 };
-math cos(8)     g103<1>HF       g98<8,8,1>HF    null<8,8,1>F    { align1 1Q @3 $11 };
-math exp(8)     g54<1>HF        g52<8,8,1>HF    null<8,8,1>F    { align1 1Q @1 $12 };
-math intdiv(8)  g35<1>D         g31<8,8,1>D     g33<8,8,1>D     { align1 4Q @2 $13 };
-math intmod(8)  g101<1>D        g97<8,8,1>D     g99<8,8,1>D     { align1 2Q @4 $14 };
-math inv(8)     g102<1>HF       g92<8,8,1>HF    null<8,8,1>F    { align1 1Q @6 $15 };
-
-sel.ge(16)      g7<1>UW         g7<16,16,1>UW   g89<16,8,2>UW   { align1 1H @7 $0.dst };
-mov(16)         a0<1>UW         0x03e0UW                        { align1 WE_all 1H @3 $1.dst };
-add(16)         g100<1>D        g102<8,8,1>D    -2114D          { align1 1H @3 $2.dst };
-add(16)         g100<1>D        g105<8,8,1>D    (abs)g18<8,8,1>D { align1 1H @3 $3.dst };
-add(16)         g36<1>D         g36<8,8,1>D     g106<8,8,1>D    { align1 1H @7 $4.dst };
-and(16)         g49<1>UD        g45<8,8,1>UD    g47<8,8,1>UD    { align1 1H @3 $5.dst };
-asr(16)         g102<2>W        g41<16,8,2>W    g28<8,8,1>UD    { align1 2H @6 $6.dst };
-cmp.l.f0.0(8)   g97<1>F         (abs)g96<8,8,1>F 0x3d4ccccdF  /* 0.05F */ { align1 1Q @3 $7.dst };
-cmp.nz.f0.0(8)  g100<1>F        g98<8,8,1>F     g99<8,8,1>F     { align1 1Q @1 $8.dst };
-(+f0.0) sel(8)  g64<1>D         -g15<8,8,1>D    g15<8,8,1>D     { align1 1Q @1 $9.dst };
-mov(16)         g15<1>UD        g13<8,8,1>D                     { align1 1H @1 $10.dst };
-mul(8)          acc0<1>UD       g10<8,4,2>UD    g101<16,8,2>UW  { align1 1Q @7 $11.dst };
-or(16)          g51<1>UW        g51<16,16,1>UW  g75<16,8,2>UW   { align1 1H @7 $12.dst };
-sel.ge(16)      g28<1>W         g28<16,16,1>W   g92<16,8,2>W    { align1 2H @7 $13.dst };
-xor(16)         g10<1>UD        g10<8,8,1>UD    g100<8,8,1>UD   { align1 1H @7 $14.dst };
-and(16)         g39<1>UD        g35<8,8,1>UD    g37<8,8,1>UD    { align1 2H @5 $15.dst };
--- a/src/intel/compiler/elk/tests/gen12/swsb.expected
+++ b/src/intel/compiler/elk/tests/gen12/swsb.expected
@ -1,38 +0,0 @@
-70 01 03 00 20 82 05 37 05 36 46 52 90 02 00 00
-61 02 24 00 60 05 05 06 05 14 46 00 00 00 00 00
-40 03 04 00 a0 0a 05 7a 05 62 46 0a 05 66 46 01
-69 04 03 00 60 86 05 4b 05 7a 46 02 02 00 00 00
-62 05 02 80 60 06 85 5a 64 5a 00 56 85 5a 34 00
-65 06 04 00 20 02 05 3a 05 10 46 02 05 38 46 00
-66 07 04 00 20 02 01 00 05 69 46 22 05 67 46 00
-38 90 04 00 a0 0a 05 11 05 0f 46 7a 01 00 46 00
-38 d2 04 00 a0 0a 05 01 05 1d 46 3a 01 00 46 00
-38 93 03 00 90 09 05 09 05 06 46 4a 01 00 46 00
-38 c4 03 00 60 06 05 67 05 65 46 c6 05 23 46 00
-38 a5 13 00 60 06 05 65 05 61 46 d6 05 4c 46 00
-38 a6 24 00 a0 0a 05 0a 05 08 46 1a 01 00 46 00
-38 97 24 00 a0 0a 05 66 05 64 46 2a 01 00 46 00
-38 f8 04 00 a0 0a 05 4c 05 4a 46 5a 01 00 46 00
-38 c9 04 00 a0 0a 05 7b 05 79 46 6a 01 00 46 00
-38 fa 24 00 a0 0a 05 2b 05 2f 46 4a 01 00 46 00
-38 bb 03 00 90 09 05 67 05 62 46 7a 01 00 46 00
-38 9c 03 00 90 09 05 36 05 34 46 3a 01 00 46 00
-38 ad 33 00 60 06 05 23 05 1f 46 c6 05 21 46 00
-38 ce 13 00 60 06 05 65 05 61 46 d6 05 63 46 00
-38 ef 03 00 90 09 05 66 05 5c 46 1a 01 00 46 00
-62 f0 04 00 10 01 05 07 05 07 58 41 06 59 56 00
-61 b1 04 80 10 41 01 10 00 00 00 00 e0 03 e0 03
-40 b2 04 00 60 86 05 64 05 66 46 06 be f7 ff ff
-40 b3 04 00 60 06 05 64 05 69 46 06 05 12 46 01
-40 f4 04 00 60 06 05 24 05 24 46 06 05 6a 46 00
-65 b5 04 00 20 02 05 31 05 2d 46 02 05 2f 46 00
-6c e6 24 00 50 05 06 66 06 29 56 02 05 1c 46 00
-70 b7 03 00 a0 9a 05 61 05 60 46 5a cd cc 4c 3d
-70 98 03 00 a0 0a 05 64 05 62 46 2a 05 63 46 00
-62 99 03 01 60 26 05 40 05 0f 46 06 05 0f 46 00
-61 9a 04 00 20 06 05 0f 05 0d 46 00 00 00 00 00
-41 fb 03 00 20 02 01 20 06 0a 44 01 06 65 56 00
-66 fc 04 00 10 01 05 33 05 33 58 01 06 4b 56 00
-62 fd 24 00 50 05 05 1c 05 1c 58 45 06 5c 56 00
-67 fe 04 00 20 02 05 0a 05 0a 46 02 05 64 46 00
-65 df 24 00 20 02 05 27 05 23 46 02 05 25 46 00
--- a/src/intel/compiler/elk/tests/gen12/sync.asm
+++ b/src/intel/compiler/elk/tests/gen12/sync.asm
@ -1,33 +0,0 @@
-sync nop(16)                    null<0,1,0>UB                   { align1 WE_all 1H @1 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 1N @1 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 1N @2 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 1N @3 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 1N @4 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 1N @5 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 1N @6 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 1N @7 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 3N @1 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 3N @2 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 3N @3 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 3N @4 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 3N @5 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 3N @6 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 3N @7 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 5N @1 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 5N @2 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 5N @3 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 5N @4 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 5N @5 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 5N @6 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 5N @7 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 7N @1 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 7N @2 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 7N @3 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 7N @4 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 7N @5 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 7N @6 };
-sync nop(1)                     null<0,1,0>UB                   { align1 WE_all 7N @7 };
-sync nop(32)                    null<0,1,0>UB                   { align1 WE_all @1 };
-sync nop(8)                     null<0,1,0>UB                   { align1 WE_all 1Q @1 };
-sync allwr(16)                  null<0,1,0>UB                   { align1 1H };
-sync allwr(8)                   null<0,1,0>UB                   { align1 1Q };
--- a/src/intel/compiler/elk/tests/gen12/sync.expected
+++ b/src/intel/compiler/elk/tests/gen12/sync.expected
@ -1,33 +0,0 @@
-01 01 04 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 01 00 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 02 00 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 03 00 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 04 00 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 05 00 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 06 00 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 07 00 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 01 10 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 02 10 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 03 10 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 04 10 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 05 10 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 06 10 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 07 10 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 01 20 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 02 20 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 03 20 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 04 20 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 05 20 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 06 20 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 07 20 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 01 30 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 02 30 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 03 30 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 04 30 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 05 30 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 06 30 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 07 30 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 01 05 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 01 03 80 00 00 00 00 00 00 00 00 00 00 00 00
-01 00 04 00 00 00 00 00 00 00 00 30 00 00 00 00
-01 00 03 00 00 00 00 00 00 00 00 30 00 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/add.asm
+++ b/src/intel/compiler/elk/tests/gen9/add.asm
@ -1,40 +0,0 @@
-add(8)          g124<1>F        g7<8,8,1>D      1D              { align1 1Q };
-add(16)         g120<1>F        g11<8,8,1>D     1D              { align1 1H };
-add(16)         g4<1>F          g1<0,1,0>F      -g1.4<0,1,0>F   { align1 1H };
-add(8)          g3.8<1>UW       g3<8,8,1>UW     0x0008UW        { align1 WE_all 1Q };
-add(16)         g3<1>D          g18<8,8,1>D     g12<8,8,1>D     { align1 1H };
-add(16)         g6<1>UW         g1.4<1,4,0>UW   0x11001010V     { align1 WE_all 1H };
-add(32)         g10<1>UW        g1.4<1,4,0>UW   0x11001010V     { align1 WE_all };
-add(8)          g2<1>D          g96<8,8,1>D     -1023D          { align1 1Q };
-add(8)          g4<1>F          g5.6<0,1,0>F    g7.2<0,1,0>F    { align1 1Q };
-add(8)          g53<1>DF        g49<4,4,1>DF    g51<4,4,1>DF    { align1 1Q };
-add.sat(16)     g5<1>UD         g3<8,8,1>UD     0x00000001UD    { align1 1H };
-add(1)          g125.3<1>UD     g0.3<0,1,0>UD   g7<0,1,0>UD     { align1 WE_all 1N };
-add(8)          a0<1>UW         g34<16,8,2>UW   0x0080UW        { align1 1Q };
-add(8)          g8<1>DF         g2<0,1,0>DF     g3.2<0,1,0>DF   { align1 2Q };
-add(16)         a0<1>UW         g3<16,8,2>UW    0x0040UW        { align1 1H };
-add.sat.le.f0.0(8) g125<1>F     -g6<8,8,1>F     0x3f000000F  /* 0.5F */ { align1 1Q };
-add.z.f0.0(8)   g8<1>F          g2<0,1,0>F      -g2.4<0,1,0>F   { align1 1Q };
-add.z.f0.0(16)  g3<1>F          g2<0,1,0>F      -g2.1<0,1,0>F   { align1 1H };
-add(8)          g3<1>UD         g2<8,8,1>UD     0xffffffffUD    { align1 1Q };
-(+f0.0) add(8)  g15<1>D         -g15<8,8,1>D    31D             { align1 1Q };
-add(1)          a0<1>UD         a0<0,1,0>UD     0x00000200UD    { align1 WE_all 1N };
-add.sat(8)      g124<1>F        g7<8,8,1>F      -g6<8,8,1>F     { align1 1Q };
-add(8)          g8<1>UD         g6<8,8,1>D      0x00000001UD    { align1 1Q };
-add(16)         g11<1>UD        g9<8,8,1>D      0x00000001UD    { align1 1H };
-(+f0.0) add(16) g8<1>D          -g8<8,8,1>D     31D             { align1 1H };
-add.sat(16)     g126<1>F        g2<0,1,0>F      g2.4<0,1,0>F    { align1 1H };
-add.sat(8)      g124<1>F        g17<8,8,1>D     1D              { align1 1Q };
-add(16)         g114<1>D        g118<8,8,1>D    g116<8,8,1>D    { align1 2H };
-add.z.f0.0(16)  null<1>D        g120<8,8,1>D    1D              { align1 1H };
-add.z.f0.0(16)  null<1>D        g116<8,8,1>D    1D              { align1 2H };
-add.z.f0.0(8)   g3<1>D          g5<8,8,1>D      g4<8,8,1>D      { align1 1Q };
-add(16)         g20<1>UD        g17<8,8,1>UD    1D              { align1 1H };
-add(8)          g7<1>F          -g6<4>.xyxyF    g6<4>.zwzwF     { align16 1Q };
-add(16)         g9<1>F          -g7<4>.xyxyF    g7<4>.zwzwF     { align16 1H };
-add(8)          g7<1>UD         g2<8,8,1>UD     -g6<8,8,1>UD    { align1 WE_all 1Q };
-add.le.f0.0(16) g1<1>D          g3.1<0,1,0>D    -g6<8,8,1>D     { align1 1H };
-add.sat(8)      g10<1>UD        g9<8,8,1>UD     0x00000001UD    { align1 1Q };
-add(1)          g14<1>UD        g14<0,1,0>UD    0x00000001UD    { align1 WE_all 3N };
-add(8)          g25<1>Q         g22<4,4,1>Q     -g24<4,4,1>Q    { align1 1Q };
-add(8)          g12<1>Q         g5<4,4,1>Q      -g11<4,4,1>Q    { align1 2Q };
--- a/src/intel/compiler/elk/tests/gen9/add.expected
+++ b/src/intel/compiler/elk/tests/gen9/add.expected
@ -1,40 +0,0 @@
-40 00 60 00 e8 0a 80 2f e0 00 8d 0e 01 00 00 00
-40 00 80 00 e8 0a 00 2f 60 01 8d 0e 01 00 00 00
-40 00 80 00 e8 3a 80 20 20 00 00 3a 30 40 00 00
-40 00 60 00 4c 12 70 20 60 00 8d 16 08 00 08 00
-40 00 80 00 28 0a 60 20 40 02 8d 0a 80 01 8d 00
-40 00 80 00 4c 12 c0 20 28 00 28 36 10 10 00 11
-40 00 a0 00 4c 12 40 21 28 00 28 36 10 10 00 11
-40 00 60 00 28 0a 40 20 00 0c 8d 0e 01 fc ff ff
-40 00 60 00 e8 3a 80 20 b8 00 00 3a e8 00 00 00
-40 00 60 00 c8 32 a0 26 20 06 69 32 60 06 69 00
-40 00 80 80 08 02 a0 20 60 00 8d 06 01 00 00 00
-40 00 00 00 0c 02 ac 2f 0c 00 00 02 e0 00 00 00
-40 00 60 00 40 12 00 22 40 04 ae 16 80 00 80 00
-40 10 60 00 c8 32 00 21 40 00 00 32 70 00 00 00
-40 00 80 00 40 12 00 22 60 00 ae 16 40 00 40 00
-40 00 60 86 e8 3a a0 2f c0 40 8d 3e 00 00 00 3f
-40 00 60 01 e8 3a 00 21 40 00 00 3a 50 40 00 00
-40 00 80 01 e8 3a 60 20 40 00 00 3a 44 40 00 00
-40 00 60 00 08 02 60 20 40 00 8d 06 ff ff ff ff
-40 00 61 00 28 0a e0 21 e0 41 8d 0e 1f 00 00 00
-40 00 00 00 04 00 00 22 00 02 00 06 00 02 00 00
-40 00 60 80 e8 3a 80 2f e0 00 8d 3a c0 40 8d 00
-40 00 60 00 08 0a 00 21 c0 00 8d 06 01 00 00 00
-40 00 80 00 08 0a 60 21 20 01 8d 06 01 00 00 00
-40 00 81 00 28 0a 00 21 00 41 8d 0e 1f 00 00 00
-40 00 80 80 e8 3a c0 2f 40 00 00 3a 50 00 00 00
-40 00 60 80 e8 0a 80 2f 20 02 8d 0e 01 00 00 00
-40 20 80 00 28 0a 40 2e c0 0e 8d 0a 80 0e 8d 00
-40 00 80 01 20 0a 00 20 00 0f 8d 0e 01 00 00 00
-40 20 80 01 20 0a 00 20 80 0e 8d 0e 01 00 00 00
-40 00 60 01 28 0a 60 20 a0 00 8d 0a 80 00 8d 00
-40 00 80 00 08 02 80 22 20 02 8d 0e 01 00 00 00
-40 01 60 00 e8 3a ef 20 c4 40 64 3a ce 00 6e 00
-40 01 80 00 e8 3a 2f 21 e4 40 64 3a ee 00 6e 00
-40 00 60 00 0c 02 e0 20 40 00 8d 02 c0 40 8d 00
-40 00 80 06 28 0a 20 20 64 00 00 0a c0 40 8d 00
-40 00 60 80 08 02 40 21 20 01 8d 06 01 00 00 00
-40 10 00 00 0c 02 c0 21 c0 01 00 06 01 00 00 00
-40 00 60 00 28 4b 20 23 c0 02 69 4a 00 43 69 00
-40 10 60 00 28 4b 80 21 a0 00 69 4a 60 41 69 00
--- a/src/intel/compiler/elk/tests/gen9/and.asm
+++ b/src/intel/compiler/elk/tests/gen9/and.asm
@ -1,29 +0,0 @@
-and(8)          g3<1>UD         g2<0,1,0>UD     ~g2.2<0,1,0>D   { align1 1Q };
-and(16)         g3<1>UD         g2<0,1,0>UD     ~g2.2<0,1,0>D   { align1 1H };
-and(8)          g8<1>UD         g0.1<0,1,0>UW   0x07ffUW        { align1 1Q };
-and(16)         g18<1>UD        g0.1<0,1,0>UW   0x07ffUW        { align1 1H };
-and(1)          g7<1>UD         g5<0,1,0>UD     0x000000f0UD    { align1 WE_all 1N };
-and.nz.f0.0(8)  null<1>UD       g36<8,8,1>UD    g37<8,8,1>UD    { align1 1Q };
-and.nz.f0.0(16) null<1>UD       g70<8,8,1>UD    g72<8,8,1>UD    { align1 1H };
-and.z.f0.0(16)  g21<1>UD        g19<8,8,1>UD    g17<8,8,1>UD    { align1 1H };
-and(8)          g61<1>UD        g79<8,8,1>UD    g32.1<8,4,2>UD  { align1 2Q };
-and(8)          g96<1>D         ~g94<8,8,1>D    ~g95<8,8,1>D    { align1 1Q };
-and(16)         g24<1>D         ~g20<8,8,1>D    ~g22<8,8,1>D    { align1 1H };
-and(1)          a0<1>UD         g4<0,1,0>UD     0x000000ffUD    { align1 WE_all 1N };
-and(16)         g118<1>UD       g114<8,8,1>UD   0x0000003fUD    { align1 2H };
-and(1)          g4<1>UD         g20<0,1,0>UD    0x000000ffUD    { align1 WE_all 3N };
-and.z.f0.0(8)   null<1>D        g13<8,8,1>UD    0x0000001fUD    { align1 1Q };
-and(8)          g21<1>UD        g15<8,8,1>UD    0x00000003UD    { align1 WE_all 1Q };
-and.z.f0.0(8)   null<1>UD       g20<8,8,1>UD    0x00000001UD    { align1 1Q };
-and.z.f0.0(16)  null<1>UD       g45<8,8,1>UD    0x00000001UD    { align1 1H };
-and(8)          g4<1>UW         g3<8,8,1>UW     0xfffcUW        { align1 1Q };
-and(16)         g13<1>UW        g19<16,8,2>UW   0xfffcUW        { align1 1H };
-and.nz.f0.0(8)  null<1>UD       ~g2.2<0,1,0>D   g9<8,8,1>UD     { align1 1Q };
-and(8)          g18<1>UD        ~g2.2<0,1,0>D   g7<8,8,1>UD     { align1 1Q };
-and.nz.f0.0(16) null<1>UD       ~g2.2<0,1,0>D   g14<8,8,1>UD    { align1 1H };
-and(16)         g30<1>UD        ~g2.2<0,1,0>D   g10<8,8,1>UD    { align1 1H };
-and.nz.f0.0(8)  g10<1>UD        g9<8,8,1>UD     0x00000001UD    { align1 1Q };
-and.nz.f0.0(16) g16<1>UD        g14<8,8,1>UD    0x00000001UD    { align1 1H };
-and.z.f0.0(8)   g9<1>UD         g8<8,8,1>UD     0x00000003UD    { align1 1Q };
-and(8)          g12<1>UQ        g9<4,4,1>UQ     g11<4,4,1>UQ    { align1 1Q };
-and(8)          g26<1>UQ        g18<4,4,1>UQ    g22<4,4,1>UQ    { align1 2Q };
--- a/src/intel/compiler/elk/tests/gen9/and.expected
+++ b/src/intel/compiler/elk/tests/gen9/and.expected
@ -1,29 +0,0 @@
-05 00 60 00 08 02 60 20 40 00 00 0a 48 40 00 00
-05 00 80 00 08 02 60 20 40 00 00 0a 48 40 00 00
-05 00 60 00 08 12 00 21 02 00 00 16 ff 07 ff 07
-05 00 80 00 08 12 40 22 02 00 00 16 ff 07 ff 07
-05 00 00 00 0c 02 e0 20 a0 00 00 06 f0 00 00 00
-05 00 60 02 00 02 00 20 80 04 8d 02 a0 04 8d 00
-05 00 80 02 00 02 00 20 c0 08 8d 02 00 09 8d 00
-05 00 80 01 08 02 a0 22 60 02 8d 02 20 02 8d 00
-05 10 60 00 08 02 a0 27 e0 09 8d 02 04 04 8a 00
-05 00 60 00 28 0a 00 2c c0 4b 8d 0a e0 4b 8d 00
-05 00 80 00 28 0a 00 23 80 42 8d 0a c0 42 8d 00
-05 00 00 00 04 02 00 22 80 00 00 06 ff 00 00 00
-05 20 80 00 08 02 c0 2e 40 0e 8d 06 3f 00 00 00
-05 10 00 00 0c 02 80 20 80 02 00 06 ff 00 00 00
-05 00 60 01 20 02 00 20 a0 01 8d 06 1f 00 00 00
-05 00 60 00 0c 02 a0 22 e0 01 8d 06 03 00 00 00
-05 00 60 01 00 02 00 20 80 02 8d 06 01 00 00 00
-05 00 80 01 00 02 00 20 a0 05 8d 06 01 00 00 00
-05 00 60 00 48 12 80 20 60 00 8d 16 fc ff fc ff
-05 00 80 00 48 12 a0 21 60 02 ae 16 fc ff fc ff
-05 00 60 02 00 0a 00 20 48 40 00 02 20 01 8d 00
-05 00 60 00 08 0a 40 22 48 40 00 02 e0 00 8d 00
-05 00 80 02 00 0a 00 20 48 40 00 02 c0 01 8d 00
-05 00 80 00 08 0a c0 23 48 40 00 02 40 01 8d 00
-05 00 60 02 08 02 40 21 20 01 8d 06 01 00 00 00
-05 00 80 02 08 02 00 22 c0 01 8d 06 01 00 00 00
-05 00 60 01 08 02 20 21 00 01 8d 06 03 00 00 00
-05 00 60 00 08 43 80 21 20 01 69 42 60 01 69 00
-05 10 60 00 08 43 40 23 40 02 69 42 c0 02 69 00
--- a/src/intel/compiler/elk/tests/gen9/asr.asm
+++ b/src/intel/compiler/elk/tests/gen9/asr.asm
@ -1,6 +0,0 @@
-asr(8)          g19<1>D         g7<8,8,1>D      0x00000001UD    { align1 1Q };
-asr(16)         g20<1>D         g2.7<0,1,0>D    0x0000001fUD    { align1 1H };
-asr.nz.f0.0(8)  null<1>D        -g0<0,1,0>W     15D             { align1 1Q };
-asr.nz.f0.0(16) null<1>D        -g0<0,1,0>W     15D             { align1 1H };
-asr(8)          g2<1>D          -g0<0,1,0>W     15D             { align1 1Q };
-asr(16)         g2<1>D          -g0<0,1,0>W     15D             { align1 1H };
--- a/src/intel/compiler/elk/tests/gen9/asr.expected
+++ b/src/intel/compiler/elk/tests/gen9/asr.expected
@ -1,6 +0,0 @@
-0c 00 60 00 28 0a 60 22 e0 00 8d 06 01 00 00 00
-0c 00 80 00 28 0a 80 22 5c 00 00 06 1f 00 00 00
-0c 00 60 02 20 1a 00 20 00 40 00 0e 0f 00 00 00
-0c 00 80 02 20 1a 00 20 00 40 00 0e 0f 00 00 00
-0c 00 60 00 28 1a 40 20 00 40 00 0e 0f 00 00 00
-0c 00 80 00 28 1a 40 20 00 40 00 0e 0f 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/bfe.asm
+++ b/src/intel/compiler/elk/tests/gen9/bfe.asm
@ -1,4 +0,0 @@
-bfe(8)          g96<1>UD        g89<4,4,1>UD    g30<4,4,1>UD    g91<4,4,1>UD { align16 1Q };
-bfe(16)         g13<1>UD        g44<4,4,1>UD    g115<4,4,1>UD   g126<4,4,1>UD { align16 1H };
-bfe(8)          g18<1>D         g17<4,4,1>D     g16<4,4,1>D     g49<4,4,1>D { align16 1Q };
-bfe(16)         g13<1>D         g11<4,4,1>D     g42<4,4,1>D     g5<4,4,1>D { align16 1H };
--- a/src/intel/compiler/elk/tests/gen9/bfe.expected
+++ b/src/intel/compiler/elk/tests/gen9/bfe.expected
@ -1,4 +0,0 @@
-18 01 60 00 00 90 1e 60 c8 91 05 39 3c 20 c7 16
-18 01 80 00 00 90 1e 0d c8 c1 02 39 e6 20 87 1f
-18 01 60 00 00 48 1e 12 c8 11 01 39 20 20 47 0c
-18 01 80 00 00 48 1e 0d c8 b1 00 39 54 20 47 01
--- a/src/intel/compiler/elk/tests/gen9/bfi1.asm
+++ b/src/intel/compiler/elk/tests/gen9/bfi1.asm
@ -1,2 +0,0 @@
-bfi1(8)         g20<1>UD        g19<8,8,1>D     g18<8,8,1>D     { align1 1Q };
-bfi1(16)        g16<1>UD        g14<8,8,1>D     g12<8,8,1>D     { align1 1H };
--- a/src/intel/compiler/elk/tests/gen9/bfi1.expected
+++ b/src/intel/compiler/elk/tests/gen9/bfi1.expected
@ -1,2 +0,0 @@
-19 00 60 00 08 0a 80 22 60 02 8d 0a 40 02 8d 00
-19 00 80 00 08 0a 00 22 c0 01 8d 0a 80 01 8d 00
--- a/src/intel/compiler/elk/tests/gen9/bfi2.asm
+++ b/src/intel/compiler/elk/tests/gen9/bfi2.asm
@ -1,2 +0,0 @@
-bfi2(8)         g31<1>UD        g88<4,4,1>UD    g90<4,4,1>UD    g91<4,4,1>UD { align16 1Q };
-bfi2(16)        g5<1>UD         g42<4,4,1>UD    g40<4,4,1>UD    g126<4,4,1>UD { align16 1H };
--- a/src/intel/compiler/elk/tests/gen9/bfi2.expected
+++ b/src/intel/compiler/elk/tests/gen9/bfi2.expected
@ -1,2 +0,0 @@
-1a 01 60 00 00 90 1e 1f c8 81 05 39 b4 20 c7 16
-1a 01 80 00 00 90 1e 05 c8 a1 02 39 50 20 87 1f
--- a/src/intel/compiler/elk/tests/gen9/bfrev.asm
+++ b/src/intel/compiler/elk/tests/gen9/bfrev.asm
@ -1,2 +0,0 @@
-bfrev(8)        g5<1>UD         g5<8,8,1>UD                     { align1 1Q };
-bfrev(16)       g6<1>UD         g8<8,8,1>UD                     { align1 1H };
--- a/src/intel/compiler/elk/tests/gen9/bfrev.expected
+++ b/src/intel/compiler/elk/tests/gen9/bfrev.expected
@ -1,2 +0,0 @@
-17 00 60 00 08 02 a0 20 a0 00 8d 00 00 00 00 00
-17 00 80 00 08 02 c0 20 00 01 8d 00 00 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/break.asm
+++ b/src/intel/compiler/elk/tests/gen9/break.asm
@ -1,6 +0,0 @@
-break(8)        JIP: LABEL0         UIP: LABEL1                 { align1 1Q };
-break(16)       JIP: LABEL0         UIP: LABEL1                 { align1 1H };
-LABEL0:
-(+f0.0) break(8) JIP: LABEL1        UIP: LABEL1                 { align1 1Q };
-(+f0.0) break(16) JIP: LABEL1       UIP: LABEL1                 { align1 1H };
-LABEL1:
--- a/src/intel/compiler/elk/tests/gen9/break.expected
+++ b/src/intel/compiler/elk/tests/gen9/break.expected
@ -1,4 +0,0 @@
-28 00 60 00 20 0e 00 20 40 00 00 00 20 00 00 00
-28 00 80 00 20 0e 00 20 30 00 00 00 10 00 00 00
-28 00 61 00 20 0e 00 20 20 00 00 00 20 00 00 00
-28 00 81 00 20 0e 00 20 10 00 00 00 10 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/cbit.asm
+++ b/src/intel/compiler/elk/tests/gen9/cbit.asm
@ -1,2 +0,0 @@
-cbit(8)         g9<1>UD         g31<8,8,1>UD                    { align1 1Q };
-cbit(16)        g6<1>UD         g8<8,8,1>UD                     { align1 1H };
--- a/src/intel/compiler/elk/tests/gen9/cbit.expected
+++ b/src/intel/compiler/elk/tests/gen9/cbit.expected
@ -1,2 +0,0 @@
-4d 00 60 00 08 02 20 21 e0 03 8d 00 00 00 00 00
-4d 00 80 00 08 02 c0 20 00 01 8d 00 00 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/cmp.asm
+++ b/src/intel/compiler/elk/tests/gen9/cmp.asm
@ -1,104 +0,0 @@
-cmp.z.f0.0(8)   null<1>F        g20<8,8,1>F     0xbf800000F  /* -1F */ { align1 1Q };
-cmp.nz.f0.0(8)  g59<1>DF        g2.1<0,1,0>DF   g59<4,4,1>DF    { align1 1Q };
-cmp.nz.f0.0(8)  g49<1>F         g47<8,8,1>F     g14.1<0,1,0>F   { align1 1Q };
-cmp.nz.f0.0(8)  null<1>D        g7<8,8,1>D      0D              { align1 1Q };
-cmp.z.f0.0(8)   g5<1>D          g4<8,8,1>D      g2.5<0,1,0>D    { align1 1Q };
-cmp.z.f0.0(16)  g7<1>D          g5<8,8,1>D      g2.5<0,1,0>D    { align1 1H };
-cmp.l.f0.0(16)  g28<1>F         g26<8,8,1>F     g24<8,8,1>F     { align1 1H };
-cmp.ge.f0.0(16) g30<1>F         g26<8,8,1>F     g24<8,8,1>F     { align1 1H };
-cmp.nz.f0.0(8)  g43<1>D         g42<8,8,1>D     g2.1<0,1,0>D    { align1 1Q };
-cmp.z.f0.0(8)   g86<1>DF        (abs)g6.2<0,1,0>DF g68<4,4,1>DF { align1 1Q };
-cmp.le.f0.0(8)  g108<1>D        g106<8,8,1>D    0D              { align1 1Q };
-cmp.nz.f0.0(8)  null<1>DF       g6.2<0,1,0>DF   g66<4,4,1>DF    { align1 1Q };
-cmp.l.f0.0(8)   g5<1>DF         g36<4,4,1>DF    g53<4,4,1>DF    { align1 1Q };
-cmp.ge.f0.0(8)  g18<1>DF        g36<4,4,1>DF    g53<4,4,1>DF    { align1 1Q };
-cmp.z.f0.0(8)   g34<1>DF        (abs)g106<4,4,1>DF g52<4,4,1>DF { align1 2Q };
-cmp.le.f0.0(16) g35<1>D         g21<8,8,1>D     0D              { align1 1H };
-cmp.nz.f0.0(8)  null<1>DF       g106<4,4,1>DF   g50<4,4,1>DF    { align1 2Q };
-cmp.nz.f0.0(8)  g113<1>DF       g3.1<0,1,0>DF   g59<4,4,1>DF    { align1 2Q };
-cmp.l.f0.0(8)   null<1>UD       g12<8,8,1>UD    0x00000004UD    { align1 1Q };
-cmp.l.f0.0(8)   g53<1>F         g52<8,8,1>F     g51<8,8,1>F     { align1 1Q };
-cmp.ge.f0.0(8)  g55<1>F         g52<8,8,1>F     g51<8,8,1>F     { align1 1Q };
-cmp.ge.f0.0(8)  g15<1>D         (abs)g12<8,8,1>D 1D             { align1 1Q };
-cmp.l.f0.0(8)   null<1>D        g6<0,1,0>D      2D              { align1 1Q };
-(+f0.1) cmp.z.f0.1(8) null<1>D  g8<8,8,1>D      0D              { align1 1Q };
-cmp.nz.f0.0(16) g11<1>D         g9<8,8,1>D      3D              { align1 1H };
-(+f0.1) cmp.z.f0.1(16) null<1>D g11<8,8,1>D     0D              { align1 1H };
-cmp.z.f0.0(8)   null<1>D        g22<8,8,1>D     1D              { align1 1Q };
-cmp.z.f0.0(16)  null<1>D        g47<8,8,1>D     1D              { align1 1H };
-cmp.ge.f0.0(8)  g30<1>UD        g29<8,8,1>UD    g5.7<0,1,0>UD   { align1 1Q };
-cmp.l.f0.0(8)   g31<1>UD        g29<8,8,1>UD    g5.3<0,1,0>UD   { align1 1Q };
-cmp.ge.f0.0(16) g50<1>UD        g48<8,8,1>UD    g7.7<0,1,0>UD   { align1 1H };
-cmp.l.f0.0(16)  g52<1>UD        g48<8,8,1>UD    g7.3<0,1,0>UD   { align1 1H };
-cmp.nz.f0.0(16) g9<1>F          g2.5<0,1,0>F    g1.1<0,1,0>F    { align1 1H };
-cmp.ge.f0.0(8)  null<1>D        g38<8,8,1>D     32D             { align1 1Q };
-cmp.ge.f0.0(8)  null<1>DF       g21<4,4,1>DF    g13<4,4,1>DF    { align1 1Q };
-cmp.ge.f0.0(16) g3<1>D          g1.1<0,1,0>D    g1<0,1,0>D      { align1 1H };
-cmp.l.f0.0(16)  g5<1>D          g1.1<0,1,0>D    g1<0,1,0>D      { align1 1H };
-cmp.z.f0.0(8)   g25<1>F         g4.3<0,1,0>F    g4.1<0,1,0>F    { align1 1Q };
-cmp.l.f0.0(8)   g33<1>D         g5<0,1,0>D      1D              { align1 1Q };
-cmp.l.f0.0(8)   g43<1>DF        g39<4,4,1>DF    g37<4,4,1>DF    { align1 2Q };
-cmp.ge.f0.0(8)  g46<1>DF        g39<4,4,1>DF    g37<4,4,1>DF    { align1 2Q };
-cmp.l.f0.0(16)  null<1>D        g6<0,1,0>D      1D              { align1 1H };
-cmp.z.f0.0(16)  g62<1>F         g12<8,8,1>F     g6.3<0,1,0>F    { align1 1H };
-cmp.nz.f0.0(8)  null<1>F        g2<0,1,0>F      0x0F  /* 0F */  { align1 1Q };
-cmp.nz.f0.0(16) null<1>F        g2<0,1,0>F      0x0F  /* 0F */  { align1 1H };
-cmp.ge.f0.0(16) null<1>UD       g46<8,8,1>UD    0x00000040UD    { align1 1H };
-cmp.z.f0.0(16)  null<1>F        g14<8,8,1>F     g6.1<0,1,0>F    { align1 1H };
-cmp.nz.f0.0(16) null<1>D        g6<0,1,0>D      0D              { align1 1H };
-cmp.l.f0.0(16)  null<1>UD       g39<8,8,1>UD    0x00000004UD    { align1 1H };
-cmp.le.f0.0(8)  null<1>F        g2<8,8,1>F      0x3f000000F  /* 0.5F */ { align1 1Q };
-cmp.le.f0.0(16) null<1>F        g2<8,8,1>F      0x3f000000F  /* 0.5F */ { align1 1H };
-cmp.le.f0.0(8)  g20<1>F         g5.3<0,1,0>F    0x0F  /* 0F */  { align1 1Q };
-cmp.ge.f0.0(8)  null<1>F        (abs)g26<8,8,1>F 0x5d5e0b6bF  /* 1e+18F */ { align1 1Q };
-cmp.g.f0.0(8)   g80<1>F         (abs)g44<8,8,1>F 0x3f800000F  /* 1F */ { align1 1Q };
-cmp.ge.f0.0(16) null<1>D        g67<8,8,1>D     32D             { align1 1H };
-cmp.g.f0.0(8)   null<1>F        g124<8,8,1>F    0x0F  /* 0F */  { align1 1Q };
-cmp.z.f0.0(8)   g4<1>F          g13<8,4,2>F     g2.5<0,1,0>F    { align1 2Q };
-cmp.g.f0.0(16)  null<1>F        g120<8,8,1>F    0x0F  /* 0F */  { align1 1H };
-cmp.g.f0.0(16)  g2<1>F          (abs)g17<8,8,1>F 0x3f800000F  /* 1F */ { align1 1H };
-cmp.l.f0.0(8)   null<1>DF       (abs)g5<0,1,0>DF g20<4,4,1>DF   { align1 1Q };
-cmp.nz.f0.0(8)  g29<1>D         g22.1<8,4,2>D   g3.2<0,1,0>D    { align1 2Q };
-cmp.l.f0.0(8)   null<1>DF       g11<4,4,1>DF    g8<4,4,1>DF     { align1 2Q };
-cmp.nz.f0.0(8)  g73<1>F         g6.1<0,1,0>F    g14<8,4,2>F     { align1 2Q };
-cmp.g.f0.0(8)   g7<1>D          g2<0,1,0>D      0D              { align1 1Q };
-cmp.l.f0.0(8)   null<1>F        g4.4<0,1,0>F    0x0F  /* 0F */  { align1 1Q };
-cmp.l.f0.0(16)  null<1>F        g6.4<0,1,0>F    0x0F  /* 0F */  { align1 1H };
-cmp.le.f0.0(8)  null<1>D        g2<8,8,1>D      50D             { align1 1Q };
-cmp.le.f0.0(16) null<1>D        g2<8,8,1>D      50D             { align1 1H };
-cmp.ge.f0.0(16) null<1>F        g35<8,8,1>F     0x3f000000F  /* 0.5F */ { align1 1H };
-cmp.le.f0.0(8)  g4<1>UD         g2<0,1,0>UD     0x00000001UD    { align1 1Q };
-cmp.g.f0.0(8)   g5<1>UD         g2<0,1,0>UD     0x00000001UD    { align1 1Q };
-cmp.le.f0.0(16) g5<1>UD         g2<0,1,0>UD     0x00000001UD    { align1 1H };
-cmp.g.f0.0(16)  g7<1>UD         g2<0,1,0>UD     0x00000001UD    { align1 1H };
-cmp.le.f0.0(16) g121<1>F        g27<8,8,1>F     0x461c3f9aF  /* 9999.9F */ { align1 1H };
-cmp.z.f0.0(8)   g5<1>D          g14<8,4,2>D     g3.1<0,1,0>D    { align1 2Q };
-cmp.g.f0.0(8)   null<1>D        g5.2<0,1,0>D    31D             { align1 1Q };
-cmp.g.f0.0(8)   null<1>UD       g4.2<0,1,0>UD   0x0000001fUD    { align1 1Q };
-(+f0.1) cmp.nz.f0.1(8) null<1>UW g0<8,8,1>UW    g0<8,8,1>UW     { align1 1Q };
-(+f0.1) cmp.nz.f0.1(16) null<1>UW g0<8,8,1>UW   g0<8,8,1>UW     { align1 1H };
-cmp.z.f0.0(16)  null<1>D        g1<8,8,1>D      1024D           { align1 2H };
-cmp.l.f0.0(16)  null<1>D        g118<8,8,1>D    32D             { align1 2H };
-cmp.nz.f0.0(8)  null<1>UD       g3<8,8,1>UD     0x00000000UD    { align1 1Q };
-cmp.nz.f0.0(16) null<1>UD       g3<8,8,1>UD     0x00000000UD    { align1 1H };
-cmp.g.f0.0(16)  null<1>D        g2.1<0,1,0>D    0D              { align1 1H };
-cmp.nz.f0.0(8)  null<1>Q        g6<4,4,1>Q      g3<4,4,1>Q      { align1 1Q };
-cmp.z.f0.0(8)   g8<1>Q          g5<4,4,1>Q      g3<4,4,1>Q      { align1 1Q };
-cmp.nz.f0.0(8)  g2<1>Q          g5<4,4,1>Q      g3<4,4,1>Q      { align1 1Q };
-cmp.nz.f0.0(8)  null<1>Q        g9<4,4,1>Q      g4<4,4,1>Q      { align1 2Q };
-cmp.z.f0.0(8)   g17<1>Q         g11<4,4,1>Q     g4<4,4,1>Q      { align1 2Q };
-cmp.nz.f0.0(8)  g20<1>Q         g11<4,4,1>Q     g4<4,4,1>Q      { align1 2Q };
-cmp.z.f0.0(8)   null<1>UD       g5<8,8,1>UD     0x00000000UD    { align1 1Q };
-cmp.z.f0.0(16)  null<1>UD       g15<8,8,1>UD    0x00000000UD    { align1 1H };
-cmp.g.f0.0(16)  g1<1>D          g8<8,8,1>D      0D              { align1 1H };
-cmp.ge.f0.0(8)  null<1>UD       g10<8,8,1>UD    g8<8,8,1>UD     { align1 1Q };
-cmp.ge.f0.0(8)  null<1>DF       g37<4,4,1>DF    g26<4,4,1>DF    { align1 2Q };
-cmp.l.f0.0(8)   null<1>Q        g20<4,4,1>Q     g25<4,4,1>Q     { align1 1Q };
-cmp.l.f0.0(8)   null<1>Q        g2<4,4,1>Q      g12<4,4,1>Q     { align1 2Q };
-cmp.ge.f0.0(8)  null<1>Q        g20<4,4,1>Q     g27<4,4,1>Q     { align1 1Q };
-cmp.ge.f0.0(8)  null<1>Q        g2<4,4,1>Q      g8<4,4,1>Q      { align1 2Q };
-cmp.le.f0.0(8)  null<1>UD       g18<8,8,1>UD    0x000000ffUD    { align1 1Q };
-cmp.le.f0.0(16) null<1>UD       g32<8,8,1>UD    0x000000ffUD    { align1 1H };
-cmp.z.f0.0(8)   null<1>Q        g12<4,4,1>Q     g7<4,4,1>Q      { align1 1Q };
-cmp.z.f0.0(8)   null<1>Q        g26<4,4,1>Q     g12<4,4,1>Q     { align1 2Q };
-cmp.g.f0.0(16)  null<1>UD       g4.2<0,1,0>UD   0x0000001fUD    { align1 1H };
--- a/src/intel/compiler/elk/tests/gen9/cmp.expected
+++ b/src/intel/compiler/elk/tests/gen9/cmp.expected
@ -1,104 +0,0 @@
-10 00 60 01 e0 3a 00 20 80 02 8d 3e 00 00 80 bf
-10 00 60 02 c8 32 60 27 48 00 00 32 60 07 69 00
-10 00 60 02 e8 3a 20 26 e0 05 8d 3a c4 01 00 00
-10 00 60 02 20 0a 00 20 e0 00 8d 0e 00 00 00 00
-10 00 60 01 28 0a a0 20 80 00 8d 0a 54 00 00 00
-10 00 80 01 28 0a e0 20 a0 00 8d 0a 54 00 00 00
-10 00 80 05 e8 3a 80 23 40 03 8d 3a 00 03 8d 00
-10 00 80 04 e8 3a c0 23 40 03 8d 3a 00 03 8d 00
-10 00 60 02 28 0a 60 25 40 05 8d 0a 44 00 00 00
-10 00 60 01 c8 32 c0 2a d0 20 00 32 80 08 69 00
-10 00 60 06 28 0a 80 2d 40 0d 8d 0e 00 00 00 00
-10 00 60 02 c0 32 00 20 d0 00 00 32 40 08 69 00
-10 00 60 05 c8 32 a0 20 80 04 69 32 a0 06 69 00
-10 00 60 04 c8 32 40 22 80 04 69 32 a0 06 69 00
-10 10 60 01 c8 32 40 24 40 2d 69 32 80 06 69 00
-10 00 80 06 28 0a 60 24 a0 02 8d 0e 00 00 00 00
-10 10 60 02 c0 32 00 20 40 0d 69 32 40 06 69 00
-10 10 60 02 c8 32 20 2e 68 00 00 32 60 07 69 00
-10 00 60 05 00 02 00 20 80 01 8d 06 04 00 00 00
-10 00 60 05 e8 3a a0 26 80 06 8d 3a 60 06 8d 00
-10 00 60 04 e8 3a e0 26 80 06 8d 3a 60 06 8d 00
-10 00 60 04 28 0a e0 21 80 21 8d 0e 01 00 00 00
-10 00 60 05 20 0a 00 20 c0 00 00 0e 02 00 00 00
-10 00 61 01 21 0a 00 20 00 01 8d 0e 00 00 00 00
-10 00 80 02 28 0a 60 21 20 01 8d 0e 03 00 00 00
-10 00 81 01 21 0a 00 20 60 01 8d 0e 00 00 00 00
-10 00 60 01 20 0a 00 20 c0 02 8d 0e 01 00 00 00
-10 00 80 01 20 0a 00 20 e0 05 8d 0e 01 00 00 00
-10 00 60 04 08 02 c0 23 a0 03 8d 02 bc 00 00 00
-10 00 60 05 08 02 e0 23 a0 03 8d 02 ac 00 00 00
-10 00 80 04 08 02 40 26 00 06 8d 02 fc 00 00 00
-10 00 80 05 08 02 80 26 00 06 8d 02 ec 00 00 00
-10 00 80 02 e8 3a 20 21 54 00 00 3a 24 00 00 00
-10 00 60 04 20 0a 00 20 c0 04 8d 0e 20 00 00 00
-10 00 60 04 c0 32 00 20 a0 02 69 32 a0 01 69 00
-10 00 80 04 28 0a 60 20 24 00 00 0a 20 00 00 00
-10 00 80 05 28 0a a0 20 24 00 00 0a 20 00 00 00
-10 00 60 01 e8 3a 20 23 8c 00 00 3a 84 00 00 00
-10 00 60 05 28 0a 20 24 a0 00 00 0e 01 00 00 00
-10 10 60 05 c8 32 60 25 e0 04 69 32 a0 04 69 00
-10 10 60 04 c8 32 c0 25 e0 04 69 32 a0 04 69 00
-10 00 80 05 20 0a 00 20 c0 00 00 0e 01 00 00 00
-10 00 80 01 e8 3a c0 27 80 01 8d 3a cc 00 00 00
-10 00 60 02 e0 3a 00 20 40 00 00 3e 00 00 00 00
-10 00 80 02 e0 3a 00 20 40 00 00 3e 00 00 00 00
-10 00 80 04 00 02 00 20 c0 05 8d 06 40 00 00 00
-10 00 80 01 e0 3a 00 20 c0 01 8d 3a c4 00 00 00
-10 00 80 02 20 0a 00 20 c0 00 00 0e 00 00 00 00
-10 00 80 05 00 02 00 20 e0 04 8d 06 04 00 00 00
-10 00 60 06 e0 3a 00 20 40 00 8d 3e 00 00 00 3f
-10 00 80 06 e0 3a 00 20 40 00 8d 3e 00 00 00 3f
-10 00 60 06 e8 3a 80 22 ac 00 00 3e 00 00 00 00
-10 00 60 04 e0 3a 00 20 40 23 8d 3e 6b 0b 5e 5d
-10 00 60 03 e8 3a 00 2a 80 25 8d 3e 00 00 80 3f
-10 00 80 04 20 0a 00 20 60 08 8d 0e 20 00 00 00
-10 00 60 03 e0 3a 00 20 80 0f 8d 3e 00 00 00 00
-10 10 60 01 e8 3a 80 20 a0 01 8a 3a 54 00 00 00
-10 00 80 03 e0 3a 00 20 00 0f 8d 3e 00 00 00 00
-10 00 80 03 e8 3a 40 20 20 22 8d 3e 00 00 80 3f
-10 00 60 05 c0 32 00 20 a0 20 00 32 80 02 69 00
-10 10 60 02 28 0a a0 23 c4 02 8a 0a 68 00 00 00
-10 10 60 05 c0 32 00 20 60 01 69 32 00 01 69 00
-10 10 60 02 e8 3a 20 29 c4 00 00 3a c0 01 8a 00
-10 00 60 03 28 0a e0 20 40 00 00 0e 00 00 00 00
-10 00 60 05 e0 3a 00 20 90 00 00 3e 00 00 00 00
-10 00 80 05 e0 3a 00 20 d0 00 00 3e 00 00 00 00
-10 00 60 06 20 0a 00 20 40 00 8d 0e 32 00 00 00
-10 00 80 06 20 0a 00 20 40 00 8d 0e 32 00 00 00
-10 00 80 04 e0 3a 00 20 60 04 8d 3e 00 00 00 3f
-10 00 60 06 08 02 80 20 40 00 00 06 01 00 00 00
-10 00 60 03 08 02 a0 20 40 00 00 06 01 00 00 00
-10 00 80 06 08 02 a0 20 40 00 00 06 01 00 00 00
-10 00 80 03 08 02 e0 20 40 00 00 06 01 00 00 00
-10 00 80 06 e8 3a 20 2f 60 03 8d 3e 9a 3f 1c 46
-10 10 60 01 28 0a a0 20 c0 01 8a 0a 64 00 00 00
-10 00 60 03 20 0a 00 20 a8 00 00 0e 1f 00 00 00
-10 00 60 03 00 02 00 20 88 00 00 06 1f 00 00 00
-10 00 61 02 41 12 00 20 00 00 8d 12 00 00 8d 00
-10 00 81 02 41 12 00 20 00 00 8d 12 00 00 8d 00
-10 20 80 01 20 0a 00 20 20 00 8d 0e 00 04 00 00
-10 20 80 05 20 0a 00 20 c0 0e 8d 0e 20 00 00 00
-10 00 60 02 00 02 00 20 60 00 8d 06 00 00 00 00
-10 00 80 02 00 02 00 20 60 00 8d 06 00 00 00 00
-10 00 80 03 20 0a 00 20 44 00 00 0e 00 00 00 00
-10 00 60 02 20 4b 00 20 c0 00 69 4a 60 00 69 00
-10 00 60 01 28 4b 00 21 a0 00 69 4a 60 00 69 00
-10 00 60 02 28 4b 40 20 a0 00 69 4a 60 00 69 00
-10 10 60 02 20 4b 00 20 20 01 69 4a 80 00 69 00
-10 10 60 01 28 4b 20 22 60 01 69 4a 80 00 69 00
-10 10 60 02 28 4b 80 22 60 01 69 4a 80 00 69 00
-10 00 60 01 00 02 00 20 a0 00 8d 06 00 00 00 00
-10 00 80 01 00 02 00 20 e0 01 8d 06 00 00 00 00
-10 00 80 03 28 0a 20 20 00 01 8d 0e 00 00 00 00
-10 00 60 04 00 02 00 20 40 01 8d 02 00 01 8d 00
-10 10 60 04 c0 32 00 20 a0 04 69 32 40 03 69 00
-10 00 60 05 20 4b 00 20 80 02 69 4a 20 03 69 00
-10 10 60 05 20 4b 00 20 40 00 69 4a 80 01 69 00
-10 00 60 04 20 4b 00 20 80 02 69 4a 60 03 69 00
-10 10 60 04 20 4b 00 20 40 00 69 4a 00 01 69 00
-10 00 60 06 00 02 00 20 40 02 8d 06 ff 00 00 00
-10 00 80 06 00 02 00 20 00 04 8d 06 ff 00 00 00
-10 00 60 01 20 4b 00 20 80 01 69 4a e0 00 69 00
-10 10 60 01 20 4b 00 20 40 03 69 4a 80 01 69 00
-10 00 80 03 00 02 00 20 88 00 00 06 1f 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/cont.asm
+++ b/src/intel/compiler/elk/tests/gen9/cont.asm
@ -1,4 +0,0 @@
-cont(8)         JIP: LABEL0         UIP: LABEL1                 { align1 1Q };
-LABEL0:
-cont(16)        JIP: LABEL1         UIP: LABEL1                 { align1 1H };
-LABEL1:
--- a/src/intel/compiler/elk/tests/gen9/cont.expected
+++ b/src/intel/compiler/elk/tests/gen9/cont.expected
@ -1,2 +0,0 @@
-29 00 60 00 00 0e 00 34 20 00 00 00 10 00 00 00
-29 00 80 00 00 0e 00 34 10 00 00 00 10 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/cr0.asm
+++ b/src/intel/compiler/elk/tests/gen9/cr0.asm
@ -1,14 +0,0 @@
-and(1)          cr0<1>UD        cr0<0,1,0>UD    0xfffffb3fUD    { align1 1N switch };
-and(1)          cr0<1>UD        cr0<0,1,0>UD    0xffffff3fUD    { align1 1N switch };
-and(1)          cr0<1>UD        cr0<0,1,0>UD    0xfffffb7fUD    { align1 1N switch };
-and(1)          cr0<1>UD        cr0<0,1,0>UD    0xffffff7fUD    { align1 1N switch };
-and(1)          cr0<1>UD        cr0<0,1,0>UD    0xfffffbbfUD    { align1 1N switch };
-and(1)          cr0<1>UD        cr0<0,1,0>UD    0xffffffbfUD    { align1 1N switch };
-and(1)          cr0<1>UD        cr0<0,1,0>UD    0xffffffcfUD    { align1 1N switch };
-and(1)          cr0<1>UD        cr0<0,1,0>UD    0xfffffbffUD    { align1 1N switch };
-or(1)           cr0<1>UD        cr0<0,1,0>UD    0x00000400UD    { align1 1N switch };
-or(1)           cr0<1>UD        cr0<0,1,0>UD    0x00000030UD    { align1 1N switch };
-or(1)           cr0<1>UD        cr0<0,1,0>UD    0x00000040UD    { align1 1N switch };
-or(1)           cr0<1>UD        cr0<0,1,0>UD    0x00000440UD    { align1 1N switch };
-or(1)           cr0<1>UD        cr0<0,1,0>UD    0x00000080UD    { align1 1N switch };
-or(1)           cr0<1>UD        cr0<0,1,0>UD    0x00000480UD    { align1 1N switch };
--- a/src/intel/compiler/elk/tests/gen9/cr0.expected
+++ b/src/intel/compiler/elk/tests/gen9/cr0.expected
@ -1,14 +0,0 @@
-05 80 00 00 00 00 00 30 00 10 00 06 3f fb ff ff
-05 80 00 00 00 00 00 30 00 10 00 06 3f ff ff ff
-05 80 00 00 00 00 00 30 00 10 00 06 7f fb ff ff
-05 80 00 00 00 00 00 30 00 10 00 06 7f ff ff ff
-05 80 00 00 00 00 00 30 00 10 00 06 bf fb ff ff
-05 80 00 00 00 00 00 30 00 10 00 06 bf ff ff ff
-05 80 00 00 00 00 00 30 00 10 00 06 cf ff ff ff
-05 80 00 00 00 00 00 30 00 10 00 06 ff fb ff ff
-06 80 00 00 00 00 00 30 00 10 00 06 00 04 00 00
-06 80 00 00 00 00 00 30 00 10 00 06 30 00 00 00
-06 80 00 00 00 00 00 30 00 10 00 06 40 00 00 00
-06 80 00 00 00 00 00 30 00 10 00 06 40 04 00 00
-06 80 00 00 00 00 00 30 00 10 00 06 80 00 00 00
-06 80 00 00 00 00 00 30 00 10 00 06 80 04 00 00
--- a/src/intel/compiler/elk/tests/gen9/csel.asm
+++ b/src/intel/compiler/elk/tests/gen9/csel.asm
@ -1,13 +0,0 @@
-csel.nz(8)      g15<1>F         g11<4,4,1>F     (abs)g11<4,4,1>F g11<4,4,1>F { align16 1Q };
-csel.nz(16)     g14<1>F         g8<4,4,1>F      (abs)g8<4,4,1>F g8<4,4,1>F { align16 1H };
-csel.le(8)      g21<1>F         (abs)g5.3<0,1,0>F g5.0<0,1,0>F  g5.3<0,1,0>F { align16 1Q };
-csel.l(8)       g107<1>F        -g101<4,4,1>F   g101<4,4,1>F    g104<4,4,1>F { align16 1Q };
-csel.le(8)      g21<1>F         g5.0<0,1,0>F    (abs)g5.1<0,1,0>F g5.1<0,1,0>F { align16 1Q };
-csel.l(8)       g127<1>F        g2<4,4,1>F      g8<4,4,1>F      g4.0<0,1,0>F { align16 1Q };
-csel.l(16)      g126<1>F        g2<4,4,1>F      g13<4,4,1>F     g6.0<0,1,0>F { align16 1H };
-csel.le(16)     g13<1>F         (abs)g73<4,4,1>F g58<4,4,1>F    g73<4,4,1>F { align16 1H };
-csel.le(16)     g15<1>F         g58<4,4,1>F     (abs)g73<4,4,1>F g73<4,4,1>F { align16 1H };
-csel.l(16)      g69<1>F         -g65<4,4,1>F    g65<4,4,1>F     g67<4,4,1>F { align16 1H };
-csel.sat.g(8)   g125<1>F        g2.3<0,1,0>F    g2.2<0,1,0>F    g2.0<0,1,0>F { align16 1Q };
-csel.g(8)       g125<1>F        g2.3<0,1,0>F    g2.2<0,1,0>F    g2.0<0,1,0>F { align16 1Q };
-csel.g(16)      g122<1>F        g2.3<0,1,0>F    g2.2<0,1,0>F    g2.0<0,1,0>F { align16 1H };
--- a/src/intel/compiler/elk/tests/gen9/csel.expected
+++ b/src/intel/compiler/elk/tests/gen9/csel.expected
@ -1,13 +0,0 @@
-12 01 60 02 80 00 1e 0f c8 b1 00 39 16 20 c7 02
-12 01 80 02 80 00 1e 0e c8 81 00 39 10 20 07 02
-12 01 60 06 20 00 1e 15 01 56 20 00 0a 04 58 01
-12 01 60 05 40 00 1e 6b c8 51 06 39 ca 20 07 1a
-12 01 60 06 80 00 1e 15 01 50 20 40 0a 04 48 01
-12 01 60 05 00 00 1e 7f c8 21 00 39 10 04 00 01
-12 01 80 05 00 00 1e 7e c8 21 00 39 1a 04 80 01
-12 01 80 06 20 00 1e 0d c8 91 04 39 74 20 47 12
-12 01 80 06 80 00 1e 0f c8 a1 03 39 92 20 47 12
-12 01 80 05 40 00 1e 45 c8 11 04 39 82 20 c7 10
-12 01 60 83 00 00 1e 7d 01 26 20 80 04 04 80 00
-12 01 60 03 00 00 1e 7d 01 26 20 80 04 04 80 00
-12 01 80 03 00 00 1e 7a 01 26 20 80 04 04 80 00
--- a/src/intel/compiler/elk/tests/gen9/else.asm
+++ b/src/intel/compiler/elk/tests/gen9/else.asm
@ -1,4 +0,0 @@
-else(8)         JIP: LABEL0        UIP: LABEL0                  { align1 1Q };
-else(16)        JIP: LABEL0        UIP: LABEL0                  { align1 1H };
-else(32)        JIP: LABEL0        UIP: LABEL0                  { align1 };
-LABEL0:
--- a/src/intel/compiler/elk/tests/gen9/else.expected
+++ b/src/intel/compiler/elk/tests/gen9/else.expected
@ -1,3 +0,0 @@
-24 00 60 00 20 0e 00 20 30 00 00 00 30 00 00 00
-24 00 80 00 20 0e 00 20 20 00 00 00 20 00 00 00
-24 00 a0 00 20 0e 00 20 10 00 00 00 10 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/endif.asm
+++ b/src/intel/compiler/elk/tests/gen9/endif.asm
@ -1,4 +0,0 @@
-endif(8)        JIP: LABEL0                                     { align1 1Q };
-endif(16)       JIP: LABEL0                                     { align1 1H };
-endif(32)       JIP: LABEL0                                     { align1 };
-LABEL0:
--- a/src/intel/compiler/elk/tests/gen9/endif.expected
+++ b/src/intel/compiler/elk/tests/gen9/endif.expected
@ -1,3 +0,0 @@
-25 00 60 00 00 0e 00 00 00 00 00 08 30 00 00 00
-25 00 80 00 00 0e 00 00 00 00 00 08 20 00 00 00
-25 00 a0 00 00 0e 00 00 00 00 00 08 10 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/fbh.asm
+++ b/src/intel/compiler/elk/tests/gen9/fbh.asm
@ -1,2 +0,0 @@
-fbh(8)          g15<1>D         g35<8,8,1>D                     { align1 1Q };
-fbh(16)         g8<1>D          g4<8,8,1>D                      { align1 1H };
--- a/src/intel/compiler/elk/tests/gen9/fbh.expected
+++ b/src/intel/compiler/elk/tests/gen9/fbh.expected
@ -1,2 +0,0 @@
-4b 00 60 00 28 0a e0 21 60 04 8d 00 00 00 00 00
-4b 00 80 00 28 0a 00 21 80 00 8d 00 00 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/fbl.asm
+++ b/src/intel/compiler/elk/tests/gen9/fbl.asm
@ -1,3 +0,0 @@
-fbl(8)          g5<1>UD         g5<8,8,1>UD                     { align1 1Q };
-fbl(16)         g6<1>UD         g8<8,8,1>UD                     { align1 1H };
-fbl(1)          g43<1>UD        mask0<0,1,0>UD                  { align1 WE_all 1N };
--- a/src/intel/compiler/elk/tests/gen9/fbl.expected
+++ b/src/intel/compiler/elk/tests/gen9/fbl.expected
@ -1,3 +0,0 @@
-4c 00 60 00 08 02 a0 20 a0 00 8d 00 00 00 00 00
-4c 00 80 00 08 02 c0 20 00 01 8d 00 00 00 00 00
-4c 00 00 00 0c 00 60 25 00 08 00 00 00 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/frc.asm
+++ b/src/intel/compiler/elk/tests/gen9/frc.asm
@ -1,2 +0,0 @@
-frc(8)          g28<1>F         g4<8,8,1>F                      { align1 1Q };
-frc(16)         g3<1>F          g1<0,1,0>F                      { align1 1H };
--- a/src/intel/compiler/elk/tests/gen9/frc.expected
+++ b/src/intel/compiler/elk/tests/gen9/frc.expected
@ -1,2 +0,0 @@
-43 00 60 00 e8 3a 80 23 80 00 8d 00 00 00 00 00
-43 00 80 00 e8 3a 60 20 20 00 00 00 00 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/halt.asm
+++ b/src/intel/compiler/elk/tests/gen9/halt.asm
@ -1,6 +0,0 @@
-(-f0.1.any4h) halt(8) JIP: LABEL0      UIP: LABEL0              { align1 1Q };
-halt(8)         JIP: LABEL1            UIP: LABEL1              { align1 1Q };
-LABEL1:
-(-f0.1.any4h) halt(16) JIP: LABEL0     UIP: LABEL0              { align1 1H };
-halt(16)        JIP: LABEL0            UIP: LABEL0              { align1 1H };
-LABEL0:
--- a/src/intel/compiler/elk/tests/gen9/halt.expected
+++ b/src/intel/compiler/elk/tests/gen9/halt.expected
@ -1,4 +0,0 @@
-2a 00 76 00 21 0e 00 20 40 00 00 00 40 00 00 00
-2a 00 60 00 20 0e 00 20 10 00 00 00 10 00 00 00
-2a 00 96 00 21 0e 00 20 20 00 00 00 20 00 00 00
-2a 00 80 00 20 0e 00 20 10 00 00 00 10 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/if.asm
+++ b/src/intel/compiler/elk/tests/gen9/if.asm
@ -1,7 +0,0 @@
-(+f0.0) if(8)   JIP: LABEL0       UIP: LABEL1                   { align1 1Q };
-(-f0.0) if(8)   JIP: LABEL0       UIP: LABEL1                   { align1 1Q };
-LABEL0:
-(-f0.0) if(16)  JIP: LABEL1       UIP: LABEL1                   { align1 1H };
-(+f0.0) if(16)  JIP: LABEL1       UIP: LABEL1                   { align1 1H };
-(+f0.0) if(32)  JIP: LABEL1       UIP: LABEL1                   { align1 };
-LABEL1:
--- a/src/intel/compiler/elk/tests/gen9/if.expected
+++ b/src/intel/compiler/elk/tests/gen9/if.expected
@ -1,5 +0,0 @@
-22 00 61 00 20 0e 00 20 50 00 00 00 20 00 00 00
-22 00 71 00 20 0e 00 20 40 00 00 00 10 00 00 00
-22 00 91 00 20 0e 00 20 30 00 00 00 30 00 00 00
-22 00 81 00 20 0e 00 20 20 00 00 00 20 00 00 00
-22 00 a1 00 20 0e 00 20 10 00 00 00 10 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/lrp.asm
+++ b/src/intel/compiler/elk/tests/gen9/lrp.asm
@ -1,5 +0,0 @@
-lrp(8)          g4<1>F          g16<4,4,1>F     g7.2<0,1,0>F    g6.6<0,1,0>F { align16 1Q };
-lrp(16)         g4<1>F          g2.4<0,1,0>F    g2.2<0,1,0>F    g2.0<0,1,0>F { align16 1H };
-lrp.z.f0.0(8)   g8<1>F          g3.2<0,1,0>F    g3.1<0,1,0>F    g3.0<0,1,0>F { align16 1Q };
-lrp.sat(8)      g7<1>F          g10<4,4,1>F     g13<4,4,1>F     g16<4,4,1>F { align16 1Q };
-lrp.sat(16)     g18<1>F         g20<4,4,1>F     g26<4,4,1>F     g32<4,4,1>F { align16 1H };
--- a/src/intel/compiler/elk/tests/gen9/lrp.expected
+++ b/src/intel/compiler/elk/tests/gen9/lrp.expected
@ -1,5 +0,0 @@
-5c 01 60 00 00 00 1e 04 c8 01 21 80 0e 04 b0 01
-5c 01 80 00 00 00 1e 04 01 28 20 80 04 04 80 00
-5c 01 60 01 00 00 1e 08 01 34 20 40 06 04 c0 00
-5c 01 60 80 00 00 1e 07 c8 a1 00 39 1a 20 07 04
-5c 01 80 80 00 00 1e 12 c8 41 01 39 34 20 07 08
--- a/src/intel/compiler/elk/tests/gen9/lzd.asm
+++ b/src/intel/compiler/elk/tests/gen9/lzd.asm
@ -1,2 +0,0 @@
-lzd(8)          g25<1>UD        g3.1<0,1,0>UD                   { align1 1Q };
-lzd(16)         g27<1>UD        g3.1<0,1,0>UD                   { align1 1H };
--- a/src/intel/compiler/elk/tests/gen9/lzd.expected
+++ b/src/intel/compiler/elk/tests/gen9/lzd.expected
@ -1,2 +0,0 @@
-4a 00 60 00 08 02 20 23 64 00 00 00 00 00 00 00
-4a 00 80 00 08 02 60 23 64 00 00 00 00 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/mach.asm
+++ b/src/intel/compiler/elk/tests/gen9/mach.asm
@ -1,4 +0,0 @@
-mach(8)         g19<1>UD        g17<8,8,1>UD    0xaaaaaaabUD    { align1 1Q AccWrEnable };
-mach(8)         g23<1>D         g17<8,8,1>D     1431655766D     { align1 1Q AccWrEnable };
-mach(8)         g42<1>UD        g39<8,8,1>UD    0xaaaaaaabUD    { align1 2Q AccWrEnable };
-mach(8)         g50<1>D         g39<8,8,1>D     1431655766D     { align1 2Q AccWrEnable };
--- a/src/intel/compiler/elk/tests/gen9/mach.expected
+++ b/src/intel/compiler/elk/tests/gen9/mach.expected
@ -1,4 +0,0 @@
-49 00 60 10 08 02 60 22 20 02 8d 06 ab aa aa aa
-49 00 60 10 28 0a e0 22 20 02 8d 0e 56 55 55 55
-49 10 60 10 08 02 40 25 e0 04 8d 06 ab aa aa aa
-49 10 60 10 28 0a 40 26 e0 04 8d 0e 56 55 55 55
--- a/src/intel/compiler/elk/tests/gen9/mad.asm
+++ b/src/intel/compiler/elk/tests/gen9/mad.asm
@ -1,43 +0,0 @@
-mad(8)          g26<1>F         g22<4,4,1>F     g2.4<0,1,0>F    g5<4,4,1>F { align16 1Q };
-mad(16)         g14<1>F         g12<4,4,1>F     g4<4,4,1>F      g4<4,4,1>F { align16 1H };
-mad(8)          g64<1>DF        g62<4,4,1>DF    g40<4,4,1>DF    g92<4,4,1>DF { align16 1Q };
-mad(8)          g80<1>DF        -g50<4,4,1>DF   g24<4,4,1>DF    g80<4,4,1>DF { align16 1Q };
-mad(8)          g27<1>DF        g48<4,4,1>DF    g106<4,4,1>DF   g25<4,4,1>DF { align16 2Q };
-mad(8)          g13<1>F         -g14.0<0,1,0>F  g11<4,4,1>F     g6<4,4,1>F { align16 1Q };
-mad(16)         g29<1>F         -g33.0<0,1,0>F  g25<4,4,1>F     g15<4,4,1>F { align16 1H };
-mad(8)          g29<1>DF        g23<4,4,1>DF    g27<4,4,1>DF    -g25<4,4,1>DF { align16 1Q };
-mad.le.f0.0(8)  g5<1>F          g3<4,4,1>F      g4.2<0,1,0>F    g64<4,4,1>F { align16 1Q };
-mad.le.f0.0(16) g7<1>F          g4<4,4,1>F      g6.2<0,1,0>F    g16<4,4,1>F { align16 1H };
-mad(8)          g32<1>F         g31<4,4,1>F     g2.3<0,1,0>F    -g15<4,4,1>F { align16 1Q };
-mad(16)         g56<1>F         g54<4,4,1>F     g2.3<0,1,0>F    -g5<4,4,1>F { align16 1H };
-mad.sat(8)      g12<1>F         g4.1<0,1,0>F    g4.0<0,1,0>F    g8<4,4,1>F { align16 1Q };
-mad.sat(16)     g18<1>F         g6.1<0,1,0>F    g6.0<0,1,0>F    g10<4,4,1>F { align16 1H };
-mad(8)          g86<1>F         g88.6<0,1,0>F   -g88.7<0,1,0>F  g77<4,4,1>F { align16 1Q };
-mad(8)          g85<1>DF        g28<4,4,1>DF    g83<4,4,1>DF    -g81<4,4,1>DF { align16 2Q };
-mad(8)          g11<1>F         -g2.0<0,1,0>F   g10<4,4,1>F     (abs)g5.6<0,1,0>F { align16 1Q };
-mad(8)          g15<1>F         g2.1<0,1,0>F    g11<4,4,1>F     (abs)g5.6<0,1,0>F { align16 1Q };
-mad.l.f0.0(8)   g2<1>F          g22<4,4,1>F     g5.7<0,1,0>F    g6.3<0,1,0>F { align16 1Q };
-mad(8)          g79<1>DF        -g39<4,4,1>DF   g21<4,4,1>DF    g79<4,4,1>DF { align16 2Q };
-mad(8)          g117<1>F        -g116<4,4,1>F   g9.0<0,1,0>F    -g113<4,4,1>F { align16 1Q };
-mad.ge.f0.0(8)  g13<1>F         g28.0<0,1,0>F   g9<4,4,1>F      -g2.4<0,1,0>F { align16 1Q };
-mad.ge.f0.0(16) g23<1>F         g17.0<0,1,0>F   g6<4,4,1>F      -g3.0<0,1,0>F { align16 1H };
-mad(8)          g26<1>F         g2.0<0,1,0>F    -g2.1<0,1,0>F   (abs)g5.6<0,1,0>F { align16 1Q };
-mad(8)          g70<1>F         -g13<4,4,1>F    -g2.1<0,1,0>F   -g47<4,4,1>F { align16 1Q };
-mad(16)         g95<1>F         -g93<4,4,1>F    g85<4,4,1>F     -g85<4,4,1>F { align16 1H };
-mad(16)         g5<1>F          -g21<4,4,1>F    -g2.1<0,1,0>F   -g85<4,4,1>F { align16 1H };
-mad(16)         g56<1>F         g6.4<0,1,0>F    -g6.5<0,1,0>F   g51<4,4,1>F { align16 1H };
-mad.sat(8)      g124<1>F        -g7<4,4,1>F     g2.6<0,1,0>F    g2.1<0,1,0>F { align16 1Q };
-mad(16)         g71<1>F         g55.0<0,1,0>F   -g55.1<0,1,0>F  (abs)g1.0<0,1,0>F { align16 1H };
-mad(16)         g77<1>F         -g55.2<0,1,0>F  g71<4,4,1>F     (abs)g1.0<0,1,0>F { align16 1H };
-mad(16)         g37<1>F         g55.3<0,1,0>F   g77<4,4,1>F     (abs)g1.0<0,1,0>F { align16 1H };
-mad(8)          g43<1>DF        g42<4,4,1>DF    -g34<4,4,1>DF   g7<4,4,1>DF { align16 1Q };
-mad(8)          g3<1>DF         g2<4,4,1>DF     -g111<4,4,1>DF  g39<4,4,1>DF { align16 2Q };
-mad(8)          g12<1>F         -g17<4,4,1>F    (abs)g7<4,4,1>F g4.0<0,1,0>F { align16 1Q };
-mad(16)         g27<1>F         -g22<4,4,1>F    (abs)g19<4,4,1>F g29.0<0,1,0>F { align16 1H };
-mad.sat(8)      g125<1>F        g9<4,4,1>F      g6<4,4,1>F      -g64.0<0,1,0>F { align16 1Q };
-mad.l.f0.0(16)  g5<1>F          g9<4,4,1>F      g2.7<0,1,0>F    g3.3<0,1,0>F { align16 1H };
-mad(8)          g6<1>DF         -g55<4,4,1>DF   g2<4,4,1>DF     -g47<4,4,1>DF { align16 1Q };
-mad.z.f0.0(8)   g8<1>F          g3.2<0,1,0>F    g3.1<0,1,0>F    g3.0<0,1,0>F { align16 1Q };
-mad(8)          g63<1>DF        -g48<4,4,1>DF   g56<4,4,1>DF    -g44<4,4,1>DF { align16 2Q };
-mad.nz.f0.0(8)  g10<1>F         -g12.0<0,1,0>F  g7<4,4,1>F      g10<4,4,1>F { align16 1Q };
-mad.nz.f0.0(16) g15<1>F         -g33.0<0,1,0>F  g9<4,4,1>F      g17<4,4,1>F { align16 1H };
--- a/src/intel/compiler/elk/tests/gen9/mad.expected
+++ b/src/intel/compiler/elk/tests/gen9/mad.expected
@ -1,43 +0,0 @@
-5b 01 60 00 00 00 1e 1a c8 61 21 00 05 20 47 01
-5b 01 80 00 00 00 1e 0e c8 c1 00 39 08 20 07 01
-5b 01 60 00 00 d8 1e 40 c8 e1 03 39 50 20 07 17
-5b 01 60 00 40 d8 1e 50 c8 21 03 39 30 20 07 14
-5b 11 60 00 00 d8 1e 1b c8 01 03 39 d4 20 47 06
-5b 01 60 00 40 00 1e 0d 01 e0 00 39 16 20 87 01
-5b 01 80 00 40 00 1e 1d 01 10 02 39 32 20 c7 03
-5b 01 60 00 00 dc 1e 1d c8 71 01 39 36 20 47 06
-5b 01 60 06 00 00 1e 05 c8 31 20 80 08 20 07 10
-5b 01 80 06 00 00 1e 07 c8 41 20 80 0c 20 07 04
-5b 01 60 00 00 04 1e 20 c8 f1 21 c0 04 20 c7 03
-5b 01 80 00 00 04 1e 38 c8 61 23 c0 04 20 47 01
-5b 01 60 80 00 00 1e 0c 01 42 20 00 08 20 07 02
-5b 01 80 80 00 00 1e 12 01 62 20 00 0c 20 87 02
-5b 01 60 00 00 01 1e 56 01 8c 25 c0 b1 20 47 13
-5b 11 60 00 00 dc 1e 55 c8 c1 01 39 a6 20 47 14
-5b 01 60 00 40 02 1e 0b 01 20 00 39 14 04 70 01
-5b 01 60 00 00 02 1e 0f 01 22 00 39 16 04 70 01
-5b 01 60 05 00 00 1e 02 c8 61 21 c0 0b 04 98 01
-5b 11 60 00 40 d8 1e 4f c8 71 02 39 2a 20 c7 13
-5b 01 60 00 40 04 1e 75 c8 41 27 00 12 20 47 1c
-5b 01 60 04 00 04 1e 0d 01 c0 01 39 12 04 a0 00
-5b 01 80 04 00 04 1e 17 01 10 01 39 0c 04 c0 00
-5b 01 60 00 00 03 1e 1a 01 20 20 40 04 04 70 01
-5b 01 60 00 40 05 1e 46 c8 d1 20 40 04 20 c7 0b
-5b 01 80 00 40 04 1e 5f c8 d1 05 39 aa 20 47 15
-5b 01 80 00 40 05 1e 05 c8 51 21 40 04 20 47 15
-5b 01 80 00 00 01 1e 38 01 68 20 40 0d 20 c7 0c
-5b 01 60 80 40 00 1e 7c c8 71 20 80 05 04 88 00
-5b 01 80 00 00 03 1e 47 01 70 23 40 6e 04 40 00
-5b 01 80 00 40 02 1e 4d 01 74 03 39 8e 04 40 00
-5b 01 80 00 00 02 1e 25 01 76 03 39 9a 04 40 00
-5b 01 60 00 00 d9 1e 2b c8 a1 02 39 44 20 c7 01
-5b 11 60 00 00 d9 1e 03 c8 21 00 39 de 20 c7 09
-5b 01 60 00 c0 00 1e 0c c8 11 01 39 0e 04 00 01
-5b 01 80 00 c0 00 1e 1b c8 61 01 39 26 04 40 07
-5b 01 60 80 00 04 1e 7d c8 91 00 39 0c 04 00 10
-5b 01 80 05 00 00 1e 05 c8 91 20 c0 05 04 d8 00
-5b 01 60 00 40 dc 1e 06 c8 71 03 39 04 20 c7 0b
-5b 01 60 01 00 00 1e 08 01 34 20 40 06 04 c0 00
-5b 11 60 00 40 dc 1e 3f c8 01 03 39 70 20 07 0b
-5b 01 60 02 40 00 1e 0a 01 c0 00 39 0e 20 87 02
-5b 01 80 02 40 00 1e 0f 01 10 02 39 12 20 47 04
--- a/src/intel/compiler/elk/tests/gen9/math.asm
+++ b/src/intel/compiler/elk/tests/gen9/math.asm
@ -1,31 +0,0 @@
-math sqrt(16)   g20<1>F         g18<8,8,1>F     null<8,8,1>F    { align1 1H };
-math inv(8)     g95<1>F         g94<8,8,1>F     null<8,8,1>F    { align1 1Q };
-math inv(16)    g10<1>F         g8<8,8,1>F      null<8,8,1>F    { align1 1H };
-math intmod(8)  g3<1>UD         g1<0,1,0>UD     g1.2<0,1,0>UD   { align1 1Q };
-math intmod(8)  g4<1>UD         g1<0,1,0>UD     g1.2<0,1,0>UD   { align1 2Q };
-math sqrt(8)    g24<1>F         g23<8,8,1>F     null<8,8,1>F    { align1 1Q };
-math rsq(8)     g5<1>F          g2<8,8,1>F      null<8,8,1>F    { align1 1Q };
-math pow(8)     g11<1>F         g10<8,8,1>F     0x42fc6666F  /* 126.2F */ { align1 1Q };
-math pow(16)    g18<1>F         g16<8,8,1>F     0x42fc6666F  /* 126.2F */ { align1 1H };
-math log(8)     g7<1>F          g6<8,8,1>F      null<8,8,1>F    { align1 1Q };
-math log(16)    g11<1>F         g9<8,8,1>F      null<8,8,1>F    { align1 1H };
-math cos(8)     g3<1>F          g2<8,8,1>F      null<8,8,1>F    { align1 1Q };
-math cos(16)    g4<1>F          g2<8,8,1>F      null<8,8,1>F    { align1 1H };
-math intdiv(8)  g4<1>UD         g1<0,1,0>UD     g1.4<0,1,0>UD   { align1 1Q };
-math intdiv(8)  g5<1>UD         g1<0,1,0>UD     g1.4<0,1,0>UD   { align1 2Q };
-math intdiv(8)  g24<1>D         g4<0,1,0>D      g2.2<0,1,0>D    { align1 1Q };
-math sin(8)     g10<1>F         g9<8,8,1>F      null<8,8,1>F    { align1 1Q };
-math rsq(16)    g68<1>F         g66<8,8,1>F     null<8,8,1>F    { align1 1H };
-math exp(8)     g124<1>F        g10<8,8,1>F     null<8,8,1>F    { align1 1Q };
-math exp(16)    g120<1>F        g7<8,8,1>F      null<8,8,1>F    { align1 1H };
-math intdiv(8)  g5<1>D          g2<0,1,0>D      g2.4<0,1,0>D    { align1 2Q };
-math sin(16)    g3<1>F          g2<0,1,0>F      null<8,8,1>F    { align1 1H };
-math.sat pow(8) g3<1>F          g2<0,1,0>F      g2.4<0,1,0>F    { align1 1Q };
-math.sat pow(16) g3<1>F         g2<0,1,0>F      g2.4<0,1,0>F    { align1 1H };
-math.sat sqrt(8) g3<1>F         g2<0,1,0>F      null<8,8,1>F    { align1 1Q };
-math.sat sqrt(16) g3<1>F        g2<0,1,0>F      null<8,8,1>F    { align1 1H };
-math.sat exp(8) g3<1>F          g2<0,1,0>F      null<8,8,1>F    { align1 1Q };
-math.sat exp(16) g3<1>F         g2<0,1,0>F      null<8,8,1>F    { align1 1H };
-math.sat rsq(8) g127<1>F        (abs)g7<8,8,1>F null<8,8,1>F    { align1 1Q };
-math.sat inv(8) g124<1>F        g2<0,1,0>F      null<8,8,1>F    { align1 1Q };
-math.sat log(8) g127<1>F        g7<8,8,1>F      null<8,8,1>F    { align1 1Q };
--- a/src/intel/compiler/elk/tests/gen9/math.expected
+++ b/src/intel/compiler/elk/tests/gen9/math.expected
@ -1,31 +0,0 @@
-38 00 80 04 e8 3a 80 22 40 02 8d 38 00 00 8d 00
-38 00 60 01 e8 3a e0 2b c0 0b 8d 38 00 00 8d 00
-38 00 80 01 e8 3a 40 21 00 01 8d 38 00 00 8d 00
-38 00 60 0d 08 02 60 20 20 00 00 02 28 00 00 00
-38 10 60 0d 08 02 80 20 20 00 00 02 28 00 00 00
-38 00 60 04 e8 3a 00 23 e0 02 8d 38 00 00 8d 00
-38 00 60 05 e8 3a a0 20 40 00 8d 38 00 00 8d 00
-38 00 60 0a e8 3a 60 21 40 01 8d 3e 66 66 fc 42
-38 00 80 0a e8 3a 40 22 00 02 8d 3e 66 66 fc 42
-38 00 60 02 e8 3a e0 20 c0 00 8d 38 00 00 8d 00
-38 00 80 02 e8 3a 60 21 20 01 8d 38 00 00 8d 00
-38 00 60 07 e8 3a 60 20 40 00 8d 38 00 00 8d 00
-38 00 80 07 e8 3a 80 20 40 00 8d 38 00 00 8d 00
-38 00 60 0c 08 02 80 20 20 00 00 02 30 00 00 00
-38 10 60 0c 08 02 a0 20 20 00 00 02 30 00 00 00
-38 00 60 0c 28 0a 00 23 80 00 00 0a 48 00 00 00
-38 00 60 06 e8 3a 40 21 20 01 8d 38 00 00 8d 00
-38 00 80 05 e8 3a 80 28 40 08 8d 38 00 00 8d 00
-38 00 60 03 e8 3a 80 2f 40 01 8d 38 00 00 8d 00
-38 00 80 03 e8 3a 00 2f e0 00 8d 38 00 00 8d 00
-38 10 60 0c 28 0a a0 20 40 00 00 0a 50 00 00 00
-38 00 80 06 e8 3a 60 20 40 00 00 38 00 00 8d 00
-38 00 60 8a e8 3a 60 20 40 00 00 3a 50 00 00 00
-38 00 80 8a e8 3a 60 20 40 00 00 3a 50 00 00 00
-38 00 60 84 e8 3a 60 20 40 00 00 38 00 00 8d 00
-38 00 80 84 e8 3a 60 20 40 00 00 38 00 00 8d 00
-38 00 60 83 e8 3a 60 20 40 00 00 38 00 00 8d 00
-38 00 80 83 e8 3a 60 20 40 00 00 38 00 00 8d 00
-38 00 60 85 e8 3a e0 2f e0 20 8d 38 00 00 8d 00
-38 00 60 81 e8 3a 80 2f 40 00 00 38 00 00 8d 00
-38 00 60 82 e8 3a e0 2f e0 00 8d 38 00 00 8d 00
--- a/src/intel/compiler/elk/tests/gen9/mov.asm
+++ b/src/intel/compiler/elk/tests/gen9/mov.asm
@ -1,139 +0,0 @@
-mov(8)          g123<1>UD       g1<8,8,1>UD                     { align1 WE_all 1Q };
-mov(8)          g124<1>F        0x40c00000F      /* 6F */       { align1 1Q };
-mov(8)          g14<1>UD        0x00000000UD                    { align1 1Q };
-mov(8)          g17<1>F         g12<8,8,1>F                     { align1 1Q };
-mov.sat(8)      g124<1>F        g8<8,8,1>F                      { align1 1Q };
-mov(8)          g61<2>D         g22<8,8,1>D                     { align1 1Q };
-mov(8)          g21<1>D         g59<8,4,2>UD                    { align1 1Q };
-mov(8)          g4<1>D          -1D                             { align1 1Q };
-mov.nz.f0.0(8)  null<1>D        g4<8,8,1>D                      { align1 1Q };
-mov(1)          g2.2<1>UD       0x00000000UD                    { align1 WE_all 1N };
-mov(4)          g114<1>F        g2.3<8,2,4>F                    { align1 WE_all 1N };
-mov(8)          g126<1>F        g4<8,8,1>D                      { align1 1Q };
-mov(16)         g124<1>F        g4<8,8,1>D                      { align1 1H };
-mov(16)         g120<1>F        g124<8,8,1>F                    { align1 1H };
-mov(16)         g124<1>F        0x0F             /* 0F */       { align1 1H };
-mov(16)         g124<1>D        1065353216D                     { align1 1H };
-mov.nz.f0.0(16) null<1>D        g2<0,1,0>D                      { align1 1H };
-mov(8)          g3<1>UW         0x76543210V                     { align1 WE_all 1Q };
-mov(16)         g20<1>UD        g0.1<0,1,0>UD                   { align1 1H };
-mov(16)         g6<1>D          g3<8,8,1>UW                     { align1 1H };
-mov(8)          g1<1>D          g4<8,8,1>D                      { align1 2Q };
-mov(8)          g5<1>D          0D                              { align1 2Q };
-mov(8)          g2<1>F          g6<8,4,1>UW                     { align1 1Q };
-mov(8)          g7<1>D          g2<8,8,1>F                      { align1 1Q };
-mov(16)         g2<1>F          g10<8,4,1>UW                    { align1 1H };
-mov(16)         g11<1>D         g2<8,8,1>F                      { align1 1H };
-mov(8)          g80<1>DF        g5<0,1,0>DF                     { align1 1Q };
-mov(8)          g92<2>UD        g6.4<0,1,0>UD                   { align1 1Q };
-mov(8)          g62<1>Q         0xbff0000000000000Q             { align1 1Q };
-mov(8)          g92<2>F         g92<4,4,1>DF                    { align1 1Q };
-mov(8)          g92<1>DF        g95<4,4,1>F                     { align1 1Q };
-mov(8)          g106<1>DF       g2<0,1,0>F                      { align1 2Q };
-mov(8)          g48<1>Q         0xbff0000000000000Q             { align1 2Q };
-mov(8)          g127<1>UD       g106.1<8,4,2>UD                 { align1 2Q };
-mov(8)          g11<2>F         g7<4,4,1>DF                     { align1 2Q };
-mov(8)          g33<1>D         g34<8,4,2>UD                    { align1 2Q };
-mov(8)          g6<2>UD         0x00000000UD                    { align1 2Q };
-mov(8)          g2<1>UW         0x76543210UV                    { align1 1Q };
-mov(8)          g12<1>UD        g2<8,8,1>UW                     { align1 1Q };
-mov(8)          g7<1>UD         0x00080000UD                    { align1 WE_all 1Q };
-mov(1)          g2<1>F          0x3e800000F      /* 0.25F */    { align1 WE_all 1N };
-mov(8)          g15<1>F         g11<8,8,1>UD                    { align1 1Q };
-mov(1)          f0.1<1>UW       g1.14<0,1,0>UW                  { align1 WE_all 1N };
-mov(8)          g18<1>UD        g2<8,8,1>D                      { align1 1Q };
-mov(16)         g18<1>UD        g26<8,8,1>D                     { align1 1H };
-mov(16)         g120<1>D        g34<8,8,1>D                     { align1 1H };
-mov(8)          g8<1>Q          g13<4,4,1>Q                     { align1 1Q };
-mov(8)          g21<1>UD        g0<8,8,1>UD                     { align1 WE_all 2Q };
-mov(8)          g23<1>F         g6<0,1,0>F                      { align1 2Q };
-mov(1)          g21.2<1>UD      0x000003f2UD                    { align1 WE_all 3N };
-mov.nz.f0.0(8)  g19<1>D         g3<8,4,2>UD                     { align1 1Q };
-mov(1)          f1<1>UD         g1.7<0,1,0>UD                   { align1 WE_all 1N };
-mov.sat(8)      g126<1>F        0x0F             /* 0F */       { align1 1Q };
-mov.sat(8)      g124<1>F        -g36<8,8,1>D                    { align1 1Q };
-mov(8)          g41<1>F         0x0F             /* 0F */       { align1 2Q };
-mov(8)          g42<1>UD        g11<8,8,1>D                     { align1 2Q };
-mov(16)         g86<1>UD        g88<8,8,1>UD                    { align1 WE_all 1H };
-mov.sat(16)     g120<1>F        g2<0,1,0>F                      { align1 1H };
-mov(16)         g2<1>F          g18<8,8,1>UD                    { align1 1H };
-mov(8)          g4<1>UD         0x0F             /* 0F */       { align1 1Q };
-mov(8)          g8<1>DF         g2<0,1,0>D                      { align1 1Q };
-mov(16)         g8<1>UD         0x00000000UD                    { align1 1H };
-mov.nz.f0.0(8)  g4<1>F          -(abs)g2<0,1,0>F                { align1 1Q };
-(+f0.0) mov(8)  g4<1>F          0xbf800000F      /* -1F */      { align1 1Q };
-mov.nz.f0.0(16) g4<1>F          -(abs)g2<0,1,0>F                { align1 1H };
-(+f0.0) mov(16) g4<1>F          0xbf800000F      /* -1F */      { align1 1H };
-mov(1)          f1<1>UD         g1.7<0,1,0>UD                   { align1 WE_all 3N };
-mov(8)          g32<1>DF        g2<0,1,0>DF                     { align1 2Q };
-mov(8)          g5<1>F          g2<0,1,0>HF                     { align1 1Q };
-mov(16)         g6<1>F          g2<0,1,0>HF                     { align1 1H };
-mov(8)          g7<1>UD         g2<0,1,0>F                      { align1 1Q };
-mov(16)         g15<1>UD        g11<8,8,1>F                     { align1 1H };
-mov(16)         g19<1>UD        g15<16,8,2>UW                   { align1 1H };
-mov(1)          g19<1>UD        g[a0 64]<0,1,0>UD               { align1 WE_all 1N };
-mov(16)         g23<1>UD        g21<32,8,4>UB                   { align1 1H };
-mov(8)          g7<1>DF         0x0000000000000000DF /* 0DF */  { align1 1Q };
-mov(8)          g5<1>F          0x0F             /* 0F */       { align1 WE_all 1Q };
-mov(16)         g4<1>UD         0x00000000UD                    { align1 WE_all 1H };
-mov(8)          g5<2>UD         g2<0,1,0>DF                     { align1 1Q };
-mov(8)          g10<2>UD        g2<0,1,0>DF                     { align1 2Q };
-mov(8)          g3<1>DF         g2<0,1,0>UD                     { align1 1Q };
-mov(8)          g3<1>DF         g2<0,1,0>UD                     { align1 2Q };
-mov(1)          f0<1>UW         0x0000UW                        { align1 WE_all 1N };
-mov(1)          g1<1>D          0D                              { align1 WE_all 1N };
-(+f0.0.any16h) mov(1) g1<1>D    -1D                             { align1 WE_all 1N };
-mov(8)          g9<1>F          g2<0,1,0>W                      { align1 1Q };
-mov(8)          g7<1>UQ         g4<4,4,1>UQ                     { align1 1Q };
-mov(16)         g11<1>UD        0x0F             /* 0F */       { align1 1H };
-mov(8)          g5<2>D          g2<0,1,0>DF                     { align1 1Q };
-mov(8)          g10<2>D         g2<0,1,0>DF                     { align1 2Q };
-mov(1)          f1<1>UW         f0.1<0,1,0>UW                   { align1 WE_all 1N };
-mov(1)          f1<1>UW         f0.1<0,1,0>UW                   { align1 WE_all 3N };
-mov(16)         g4<1>D          0D                              { align1 2H };
-mov(8)          g14<1>UD        g13<32,8,4>UB                   { align1 1Q };
-mov(16)         g124<1>UD       g15<8,8,1>UD                    { align1 2H };
-mov(16)         g118<1>D        g122<8,8,1>UW                   { align1 2H };
-mov(16)         g101<1>UD       0x00000001UD                    { align1 2H };
-mov(1)          g4<2>UW         0x00000000UD                    { align1 WE_all 1N };
-mov(8)          g4<1>UD         f0<0,1,0>UW                     { align1 1Q };
-mov(8)          g8<1>D          g2<8,8,1>UW                     { align1 1Q };
-mov(16)         g4<1>UD         f0<0,1,0>UW                     { align1 1H };
-mov(8)          g3<1>DF         -g2<0,1,0>D                     { align1 2Q };
-mov(8)          g5<1>F          g2<0,1,0>B                      { align1 1Q };
-mov(16)         g6<1>F          g2<0,1,0>B                      { align1 1H };
-mov(8)          g4<1>DF         0x0000000000000000DF /* 0DF */  { align1 2Q };
-mov.nz.f0.0(8)  g16<1>D         g17<8,4,2>UD                    { align1 2Q };
-mov(8)          g34<1>UW        0x76543210V                     { align1 1Q };
-mov(8)          g8<1>UD         48D                             { align1 1Q };
-mov(16)         g8<1>UD         0D                              { align1 1H };
-mov(8)          g7<2>HF         g2.1<0,1,0>F                    { align1 1Q };
-mov(1)          g5<1>D          g[a0 96]<0,1,0>D                { align1 WE_all 1N };
-(+f0.0.any8h) mov(1) g2<1>D     -1D                             { align1 WE_all 1N };
-mov(8)          g9<1>UD         0D                              { align1 WE_all 1Q };
-mov(8)          g2<2>UW         g9<8,8,1>F                      { align1 1Q };
-mov(8)          g3<1>UW         g2<16,8,2>UW                    { align1 1Q };
-mov(8)          g12<1>UW        g8<16,8,2>UW                    { align1 WE_all 1Q };
-mov.sat(16)     g13<1>F         0x3f800000F      /* 1F */       { align1 1H };
-mov(16)         g19<2>UW        g17<8,8,1>F                     { align1 1H };
-mov(16)         g4<1>UW         g13<16,8,2>UW                   { align1 WE_all 1H };
-mov.nz.f0.0(8)  null<1>D        0x00000000UD                    { align1 1Q };
-mov.nz.f0.0(16) null<1>D        0x00000000UD                    { align1 1H };
-mov(4)          g3<1>UD         tm0<4,4,1>UD                    { align1 WE_all 1N };
-(+f0.0.all16h) mov(1) g1<1>D    -1D                             { align1 WE_all 1N };
-mov(8)          g9<1>F          g2<0,1,0>UB                     { align1 1Q };
-mov(16)         g6<1>F          g2<0,1,0>UB                     { align1 1H };
-mov(16)         g10<2>HF        g4<8,8,1>F                      { align1 1H };
-mov.z.f0.0(8)   null<1>UD       g2<8,8,1>UD                     { align1 1Q };
-mov.sat(8)      g125<1>F        g9<8,8,1>UD                     { align1 1Q };
-mov.z.f0.0(16)  g1<1>UD         g0.7<0,1,0>UD                   { align1 1H };
-mov.z.f0.0(8)   g18<1>D         g17<8,8,1>F                     { align1 1Q };
-mov(16)         g35<1>F         g15<16,8,2>W                    { align1 1H };
-mov(8)          g23<1>Q         g26<4,4,1>Q                     { align1 2Q };
-mov(8)          g2<1>D          0x00000000UD                    { align1 1Q };
-mov(16)         g2<1>D          0x00000000UD                    { align1 1H };
-(+f0.0.all8h) mov(1) g7<1>D     -1D                             { align1 WE_all 1N };
-mov(8)          g127<1>UB       g2<0,1,0>UB                     { align1 WE_all 1Q };
-mov.z.f0.0(8)   null<1>D        g24<8,8,1>F                     { align1 1Q };
-mov.z.f0.0(16)  null<1>D        g76<8,8,1>F                     { align1 1H };
-mov(16)         g7<1>D          g2<16,8,2>B                     { align1 1H };
--- a/src/intel/compiler/elk/tests/gen9/mov.expected
+++ b/src/intel/compiler/elk/tests/gen9/mov.expected
@ -1,139 +0,0 @@
-01 00 60 00 0c 02 60 2f 20 00 8d 00 00 00 00 00
-01 00 60 00 e8 3e 80 2f 00 00 00 38 00 00 c0 40
-01 00 60 00 08 06 c0 21 00 00 00 00 00 00 00 00
-01 00 60 00 e8 3a 20 22 80 01 8d 00 00 00 00 00
-01 00 60 80 e8 3a 80 2f 00 01 8d 00 00 00 00 00
-01 00 60 00 28 0a a0 47 c0 02 8d 00 00 00 00 00
-01 00 60 00 28 02 a0 22 60 07 8a 00 00 00 00 00
-01 00 60 00 28 0e 80 20 00 00 00 08 ff ff ff ff
-01 00 60 02 20 0a 00 20 80 00 8d 00 00 00 00 00
-01 00 00 00 0c 06 48 20 00 00 00 00 00 00 00 00
-01 00 40 00 ec 3a 40 2e 4c 00 87 00 00 00 00 00
-01 00 60 00 e8 0a c0 2f 80 00 8d 00 00 00 00 00
-01 00 80 00 e8 0a 80 2f 80 00 8d 00 00 00 00 00
-01 00 80 00 e8 3a 00 2f 80 0f 8d 00 00 00 00 00
-01 00 80 00 e8 3e 80 2f 00 00 00 38 00 00 00 00
-01 00 80 00 28 0e 80 2f 00 00 00 08 00 00 80 3f
-01 00 80 02 20 0a 00 20 40 00 00 00 00 00 00 00
-01 00 60 00 4c 36 60 20 00 00 00 30 10 32 54 76
-01 00 80 00 08 02 80 22 04 00 00 00 00 00 00 00
-01 00 80 00 28 12 c0 20 60 00 8d 00 00 00 00 00
-01 10 60 00 28 0a 20 20 80 00 8d 00 00 00 00 00
-01 10 60 00 28 0e a0 20 00 00 00 08 00 00 00 00
-01 00 60 00 e8 12 40 20 c0 00 89 00 00 00 00 00
-01 00 60 00 28 3a e0 20 40 00 8d 00 00 00 00 00
-01 00 80 00 e8 12 40 20 40 01 89 00 00 00 00 00
-01 00 80 00 28 3a 60 21 40 00 8d 00 00 00 00 00
-01 00 60 00 c8 32 00 2a a0 00 00 00 00 00 00 00
-01 00 60 00 08 02 80 4b d0 00 00 00 00 00 00 00
-01 00 60 00 28 4f c0 27 00 00 00 00 00 00 f0 bf
-01 00 60 00 e8 32 80 4b 80 0b 69 00 00 00 00 00
-01 00 60 00 c8 3a 80 2b e0 0b 69 00 00 00 00 00
-01 10 60 00 c8 3a 40 2d 40 00 00 00 00 00 00 00
-01 10 60 00 28 4f 00 26 00 00 00 00 00 00 f0 bf
-01 10 60 00 08 02 e0 2f 44 0d 8a 00 00 00 00 00
-01 10 60 00 e8 32 60 41 e0 00 69 00 00 00 00 00
-01 10 60 00 28 02 20 24 40 04 8a 00 00 00 00 00
-01 10 60 00 08 06 c0 40 00 00 00 00 00 00 00 00
-01 00 60 00 48 26 40 20 00 00 00 20 10 32 54 76
-01 00 60 00 08 12 80 21 40 00 8d 00 00 00 00 00
-01 00 60 00 0c 06 e0 20 00 00 00 00 00 00 08 00
-01 00 00 00 ec 3e 40 20 00 00 00 38 00 00 80 3e
-01 00 60 00 e8 02 e0 21 60 01 8d 00 00 00 00 00
-01 00 00 00 44 12 02 26 3c 00 00 00 00 00 00 00
-01 00 60 00 08 0a 40 22 40 00 8d 00 00 00 00 00
-01 00 80 00 08 0a 40 22 40 03 8d 00 00 00 00 00
-01 00 80 00 28 0a 00 2f 40 04 8d 00 00 00 00 00
-01 00 60 00 28 4b 00 21 a0 01 69 00 00 00 00 00
-01 10 60 00 0c 02 a0 22 00 00 8d 00 00 00 00 00
-01 10 60 00 e8 3a e0 22 c0 00 00 00 00 00 00 00
-01 10 00 00 0c 06 a8 22 00 00 00 00 f2 03 00 00
-01 00 60 02 28 02 60 22 60 00 8a 00 00 00 00 00
-01 00 00 00 04 02 20 26 3c 00 00 00 00 00 00 00
-01 00 60 80 e8 3e c0 2f 00 00 00 38 00 00 00 00
-01 00 60 80 e8 0a 80 2f 80 44 8d 00 00 00 00 00
-01 10 60 00 e8 3e 20 25 00 00 00 38 00 00 00 00
-01 10 60 00 08 0a 40 25 60 01 8d 00 00 00 00 00
-01 00 80 00 0c 02 c0 2a 00 0b 8d 00 00 00 00 00
-01 00 80 80 e8 3a 00 2f 40 00 00 00 00 00 00 00
-01 00 80 00 e8 02 40 20 40 02 8d 00 00 00 00 00
-01 00 60 00 08 3e 80 20 00 00 00 38 00 00 00 00
-01 00 60 00 c8 0a 00 21 40 00 00 00 00 00 00 00
-01 00 80 00 08 06 00 21 00 00 00 00 00 00 00 00
-01 00 60 02 e8 3a 80 20 40 60 00 00 00 00 00 00
-01 00 61 00 e8 3e 80 20 00 00 00 38 00 00 80 bf
-01 00 80 02 e8 3a 80 20 40 60 00 00 00 00 00 00
-01 00 81 00 e8 3e 80 20 00 00 00 38 00 00 80 bf
-01 10 00 00 04 02 20 26 3c 00 00 00 00 00 00 00
-01 10 60 00 c8 32 00 24 40 00 00 00 00 00 00 00
-01 00 60 00 e8 52 a0 20 40 00 00 00 00 00 00 00
-01 00 80 00 e8 52 c0 20 40 00 00 00 00 00 00 00
-01 00 60 00 08 3a e0 20 40 00 00 00 00 00 00 00
-01 00 80 00 08 3a e0 21 60 01 8d 00 00 00 00 00
-01 00 80 00 08 12 60 22 e0 01 ae 00 00 00 00 00
-01 00 00 00 0c 02 60 22 40 80 00 00 00 00 00 00
-01 00 80 00 08 22 e0 22 a0 02 cf 00 00 00 00 00
-01 00 60 00 c8 56 e0 20 00 00 00 00 00 00 00 00
-01 00 60 00 ec 3e a0 20 00 00 00 38 00 00 00 00
-01 00 80 00 0c 06 80 20 00 00 00 00 00 00 00 00
-01 00 60 00 08 32 a0 40 40 00 00 00 00 00 00 00
-01 10 60 00 08 32 40 41 40 00 00 00 00 00 00 00
-01 00 60 00 c8 02 60 20 40 00 00 00 00 00 00 00
-01 10 60 00 c8 02 60 20 40 00 00 00 00 00 00 00
-01 00 00 00 44 16 00 26 00 00 00 10 00 00 00 00
-01 00 00 00 2c 0e 20 20 00 00 00 08 00 00 00 00
-01 00 0a 00 2c 0e 20 20 00 00 00 08 ff ff ff ff
-01 00 60 00 e8 1a 20 21 40 00 00 00 00 00 00 00
-01 00 60 00 08 43 e0 20 80 00 69 00 00 00 00 00
-01 00 80 00 08 3e 60 21 00 00 00 38 00 00 00 00
-01 00 60 00 28 32 a0 40 40 00 00 00 00 00 00 00
-01 10 60 00 28 32 40 41 40 00 00 00 00 00 00 00
-01 00 00 00 44 10 20 26 02 06 00 00 00 00 00 00
-01 10 00 00 44 10 20 26 02 06 00 00 00 00 00 00
-01 20 80 00 28 0e 80 20 00 00 00 08 00 00 00 00
-01 00 60 00 08 22 c0 21 a0 01 cf 00 00 00 00 00
-01 20 80 00 08 02 80 2f e0 01 8d 00 00 00 00 00
-01 20 80 00 28 12 c0 2e 40 0f 8d 00 00 00 00 00
-01 20 80 00 08 06 a0 2c 00 00 00 00 01 00 00 00
-01 00 00 00 4c 06 80 40 00 00 00 00 00 00 00 00
-01 00 60 00 08 10 80 20 00 06 00 00 00 00 00 00
-01 00 60 00 28 12 00 21 40 00 8d 00 00 00 00 00
-01 00 80 00 08 10 80 20 00 06 00 00 00 00 00 00
-01 10 60 00 c8 0a 60 20 40 40 00 00 00 00 00 00
-01 00 60 00 e8 2a a0 20 40 00 00 00 00 00 00 00
-01 00 80 00 e8 2a c0 20 40 00 00 00 00 00 00 00
-01 10 60 00 c8 56 80 20 00 00 00 00 00 00 00 00
-01 10 60 02 28 02 00 22 20 02 8a 00 00 00 00 00
-01 00 60 00 48 36 40 24 00 00 00 30 10 32 54 76
-01 00 60 00 08 0e 00 21 00 00 00 08 30 00 00 00
-01 00 80 00 08 0e 00 21 00 00 00 08 00 00 00 00
-01 00 60 00 48 3b e0 40 44 00 00 00 00 00 00 00
-01 00 00 00 2c 0a a0 20 60 80 00 00 00 00 00 00
-01 00 08 00 2c 0e 40 20 00 00 00 08 ff ff ff ff
-01 00 60 00 0c 0e 20 21 00 00 00 08 00 00 00 00
-01 00 60 00 48 3a 40 40 20 01 8d 00 00 00 00 00
-01 00 60 00 48 12 60 20 40 00 ae 00 00 00 00 00
-01 00 60 00 4c 12 80 21 00 01 ae 00 00 00 00 00
-01 00 80 80 e8 3e a0 21 00 00 00 38 00 00 80 3f
-01 00 80 00 48 3a 60 42 20 02 8d 00 00 00 00 00
-01 00 80 00 4c 12 80 20 a0 01 ae 00 00 00 00 00
-01 00 60 02 20 06 00 20 00 00 00 00 00 00 00 00
-01 00 80 02 20 06 00 20 00 00 00 00 00 00 00 00
-01 00 40 00 0c 00 60 20 00 18 69 00 00 00 00 00
-01 00 0b 00 2c 0e 20 20 00 00 00 08 ff ff ff ff
-01 00 60 00 e8 22 20 21 40 00 00 00 00 00 00 00
-01 00 80 00 e8 22 c0 20 40 00 00 00 00 00 00 00
-01 00 80 00 48 3b 40 41 80 00 8d 00 00 00 00 00
-01 00 60 01 00 02 00 20 40 00 8d 00 00 00 00 00
-01 00 60 80 e8 02 a0 2f 20 01 8d 00 00 00 00 00
-01 00 80 01 08 02 20 20 1c 00 00 00 00 00 00 00
-01 00 60 01 28 3a 40 22 20 02 8d 00 00 00 00 00
-01 00 80 00 e8 1a 60 24 e0 01 ae 00 00 00 00 00
-01 10 60 00 28 4b e0 22 40 03 69 00 00 00 00 00
-01 00 60 00 28 06 40 20 00 00 00 00 00 00 00 00
-01 00 80 00 28 06 40 20 00 00 00 00 00 00 00 00
-01 00 09 00 2c 0e e0 20 00 00 00 08 ff ff ff ff
-01 00 60 00 8c 22 e0 2f 40 00 00 00 00 00 00 00
-01 00 60 01 20 3a 00 20 00 03 8d 00 00 00 00 00
-01 00 80 01 20 3a 00 20 80 09 8d 00 00 00 00 00
-01 00 80 00 28 2a e0 20 40 00 ae 00 00 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/mul.asm
+++ b/src/intel/compiler/elk/tests/gen9/mul.asm
@ -1,31 +0,0 @@
-mul(8)          g22<1>F         g4<8,8,1>F      g2<0,1,0>F      { align1 1Q };
-mul(16)         g26<1>F         g2<0,1,0>F      g2<0,1,0>F      { align1 1H };
-mul(8)          g36<1>DF        g8<0,1,0>DF     g8<0,1,0>DF     { align1 1Q };
-mul(8)          g9<1>UD         g86<8,8,1>UD    0x00000004UD    { align1 1Q };
-mul(8)          acc0<1>UD       g17<8,8,1>UD    0xaaabUW        { align1 1Q };
-mul(8)          acc0<1>D        g17<8,8,1>D     0x5556UW        { align1 1Q };
-mul(8)          g21<1>D         g20<8,8,1>D     3D              { align1 1Q };
-mul(8)          acc0<1>UD       g39<8,8,1>UD    0xaaabUW        { align1 2Q };
-mul(16)         g45<1>D         g43<8,8,1>D     3D              { align1 1H };
-mul(8)          acc0<1>D        g39<8,8,1>D     0x5556UW        { align1 2Q };
-mul.z.f0.0(8)   g10<1>F         g5<0,1,0>F      g9<8,8,1>F      { align1 1Q };
-mul(8)          g39<1>DF        g3.3<0,1,0>DF   g3.3<0,1,0>DF   { align1 2Q };
-mul.z.f0.0(16)  g6<1>F          g2<0,1,0>F      g4<8,8,1>F      { align1 1H };
-mul.sat(8)      g17<1>F         g4<8,8,1>F      g16<8,8,1>F     { align1 1Q };
-mul.sat(16)     g9<1>F          g3<8,8,1>F      g7<8,8,1>F      { align1 1H };
-mul.l.f0.0(8)   null<1>F        g6<0,1,0>F      g5.7<0,1,0>F    { align1 1Q };
-mul.sat(8)      g8<1>DF         g34<4,4,1>DF    g5<4,4,1>DF     { align1 1Q };
-mul(8)          g4<1>UQ         g8<4,4,1>UD     g12<4,4,1>UD    { align1 1Q };
-mul(8)          g20<1>UQ        g5<4,4,1>UD     g13<4,4,1>UD    { align1 2Q };
-mul(8)          g5<1>Q          g9<4,4,1>D      g13<4,4,1>D     { align1 1Q };
-mul.sat(8)      g10<1>DF        g10<4,4,1>DF    g16<4,4,1>DF    { align1 2Q };
-mul.l.f0.0(8)   g20<1>F         g2<8,8,1>F      0x42700000F  /* 60F */ { align1 1Q };
-mul.l.f0.0(16)  g32<1>F         g2<8,8,1>F      0x42700000F  /* 60F */ { align1 1H };
-mul(1)          g6<1>UD         g12<0,1,0>UD    0x00000101UD    { align1 WE_all 1N };
-mul(8)          g21<1>Q         g6<4,4,1>D      g14<4,4,1>D     { align1 2Q };
-mul.l.f0.0(16)  null<1>F        g2.2<0,1,0>F    g2.1<0,1,0>F    { align1 1H };
-mul(8)          g6<1>UW         g6<8,8,1>UW     0x0808UW        { align1 1Q };
-mul(16)         g15<1>UW        g14<16,16,1>UW  0x0808UW        { align1 1H };
-mul.nz.f0.0(8)  g6<1>F          g12<8,8,1>F     0x3f808000F  /* 1.00391F */ { align1 1Q };
-mul.nz.f0.0(16) g9<1>F          g7<8,8,1>F      0x3f808000F  /* 1.00391F */ { align1 1H };
-mul(1)          g4<1>UD         g4<0,1,0>UD     0x00000101UD    { align1 WE_all 3N };
--- a/src/intel/compiler/elk/tests/gen9/mul.expected
+++ b/src/intel/compiler/elk/tests/gen9/mul.expected
@ -1,31 +0,0 @@
-41 00 60 00 e8 3a c0 22 80 00 8d 3a 40 00 00 00
-41 00 80 00 e8 3a 40 23 40 00 00 3a 40 00 00 00
-41 00 60 00 c8 32 80 24 00 01 00 32 00 01 00 00
-41 00 60 00 08 02 20 21 c0 0a 8d 06 04 00 00 00
-41 00 60 00 00 02 00 24 20 02 8d 16 ab aa ab aa
-41 00 60 00 20 0a 00 24 20 02 8d 16 56 55 56 55
-41 00 60 00 28 0a a0 22 80 02 8d 0e 03 00 00 00
-41 10 60 00 00 02 00 24 e0 04 8d 16 ab aa ab aa
-41 00 80 00 28 0a a0 25 60 05 8d 0e 03 00 00 00
-41 10 60 00 20 0a 00 24 e0 04 8d 16 56 55 56 55
-41 00 60 01 e8 3a 40 21 a0 00 00 3a 20 01 8d 00
-41 10 60 00 c8 32 e0 24 78 00 00 32 78 00 00 00
-41 00 80 01 e8 3a c0 20 40 00 00 3a 80 00 8d 00
-41 00 60 80 e8 3a 20 22 80 00 8d 3a 00 02 8d 00
-41 00 80 80 e8 3a 20 21 60 00 8d 3a e0 00 8d 00
-41 00 60 05 e0 3a 00 20 c0 00 00 3a bc 00 00 00
-41 00 60 80 c8 32 00 21 40 04 69 32 a0 00 69 00
-41 00 60 00 08 03 80 20 00 01 69 02 80 01 69 00
-41 10 60 00 08 03 80 22 a0 00 69 02 a0 01 69 00
-41 00 60 00 28 0b a0 20 20 01 69 0a a0 01 69 00
-41 10 60 80 c8 32 40 21 40 01 69 32 00 02 69 00
-41 00 60 05 e8 3a 80 22 40 00 8d 3e 00 00 70 42
-41 00 80 05 e8 3a 00 24 40 00 8d 3e 00 00 70 42
-41 00 00 00 0c 02 c0 20 80 01 00 06 01 01 00 00
-41 10 60 00 28 0b a0 22 c0 00 69 0a c0 01 69 00
-41 00 80 05 e0 3a 00 20 48 00 00 3a 44 00 00 00
-41 00 60 00 48 12 c0 20 c0 00 8d 16 08 08 08 08
-41 00 80 00 48 12 e0 21 c0 01 b1 16 08 08 08 08
-41 00 60 02 e8 3a c0 20 80 01 8d 3e 00 80 80 3f
-41 00 80 02 e8 3a 20 21 e0 00 8d 3e 00 80 80 3f
-41 10 00 00 0c 02 80 20 80 00 00 06 01 01 00 00
--- a/src/intel/compiler/elk/tests/gen9/nop.asm
+++ b/src/intel/compiler/elk/tests/gen9/nop.asm
@ -1 +0,0 @@
-nop                                                             ;
--- a/src/intel/compiler/elk/tests/gen9/nop.expected
+++ b/src/intel/compiler/elk/tests/gen9/nop.expected
@ -1 +0,0 @@
-7e 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/not.asm
+++ b/src/intel/compiler/elk/tests/gen9/not.asm
@ -1,2 +0,0 @@
-not(16)         g3<1>D          g1.2<0,1,0>D                    { align1 1H };
-not(8)          g4<1>D          g8<8,8,1>D                      { align1 1Q };
--- a/src/intel/compiler/elk/tests/gen9/not.expected
+++ b/src/intel/compiler/elk/tests/gen9/not.expected
@ -1,2 +0,0 @@
-04 00 80 00 28 0a 60 20 28 00 00 00 00 00 00 00
-04 00 60 00 28 0a 80 20 00 01 8d 00 00 00 00 00
--- a/src/intel/compiler/elk/tests/gen9/or.asm
+++ b/src/intel/compiler/elk/tests/gen9/or.asm
@ -1,23 +0,0 @@
-or(8)           g53<1>UD        g49<8,8,1>UD    g21<8,8,1>UD    { align1 1Q };
-or.nz.f0.0(8)   null<1>UD       g21<8,8,1>UD    g2<8,8,1>UD     { align1 1Q };
-or.nz.f0.0(8)   g5<1>UD         g62<8,8,1>UD    g67<8,8,1>UD    { align1 1Q };
-or(8)           g5<1>UD         g106.1<8,4,2>UD 0x7ff00000UD    { align1 2Q };
-or.nz.f0.0(16)  null<1>UD       g35<8,8,1>UD    g32<8,8,1>UD    { align1 1H };
-or(16)          g36<1>UD        g34<8,8,1>UD    g20<8,8,1>UD    { align1 1H };
-or.nz.f0.0(16)  g53<1>UD        g51<8,8,1>UD    g49<8,8,1>UD    { align1 1H };
-or(1)           g8<1>UD         g8<0,1,0>UD     g4<0,1,0>UD     { align1 WE_all 1N };
-or(1)           a0<1>UD         g8<0,1,0>UD     0x060ba000UD    { align1 WE_all 1N };
-(+f0.0) or(8)   g3<1>UD         g3<8,8,1>UD     0x3f800000UD    { align1 1Q };
-(+f0.0) or(16)  g3<1>UD         g3<8,8,1>UD     0x3f800000UD    { align1 1H };
-or(1)           a0<1>UD         a0<0,1,0>UD     0x02280300UD    { align1 WE_all 1N };
-or(1)           a0<1>UD         g4<0,1,0>UD     0x04036000UD    { align1 WE_all 3N };
-(+f0.0) or(8)   g17.1<2>UD      g17.1<8,4,2>UD  0x3ff00000UD    { align1 2Q };
-or(8)           g4<1>UW         g4<8,8,1>UW     g6<8,8,1>UW     { align1 1Q };
-or(16)          g16<1>UW        g14<16,16,1>UW  g15<16,16,1>UW  { align1 1H };
-or(8)           g22<1>UD        ~g2.2<0,1,0>D   g21<8,8,1>UD    { align1 1Q };
-or(16)          g37<1>UD        ~g2.2<0,1,0>D   g35<8,8,1>UD    { align1 1H };
-or(8)           g9<1>D          ~g8<8,8,1>D     ~g7<8,8,1>D     { align1 1Q };
-or(16)          g13<1>D         ~g11<8,8,1>D    ~g9<8,8,1>D     { align1 1H };
-or(1)           g14<1>UD        g14<0,1,0>UD    g19<0,1,0>UD    { align1 WE_all 3N };
-or.z.f0.0(8)    null<1>UD       g5<8,8,1>UD     g6<8,8,1>UD     { align1 1Q };
-or.z.f0.0(16)   null<1>UD       g17<8,8,1>UD    g19<8,8,1>UD    { align1 1H };
--- a/src/intel/compiler/elk/tests/gen9/or.expected
+++ b/src/intel/compiler/elk/tests/gen9/or.expected
@ -1,23 +0,0 @@
-06 00 60 00 08 02 a0 26 20 06 8d 02 a0 02 8d 00
-06 00 60 02 00 02 00 20 a0 02 8d 02 40 00 8d 00
-06 00 60 02 08 02 a0 20 c0 07 8d 02 60 08 8d 00
-06 10 60 00 08 02 a0 20 44 0d 8a 06 00 00 f0 7f
-06 00 80 02 00 02 00 20 60 04 8d 02 00 04 8d 00
-06 00 80 00 08 02 80 24 40 04 8d 02 80 02 8d 00
-06 00 80 02 08 02 a0 26 60 06 8d 02 20 06 8d 00
-06 00 00 00 0c 02 00 21 00 01 00 02 80 00 00 00
-06 00 00 00 04 02 00 22 00 01 00 06 00 a0 0b 06
-06 00 61 00 08 02 60 20 60 00 8d 06 00 00 80 3f
-06 00 81 00 08 02 60 20 60 00 8d 06 00 00 80 3f
-06 00 00 00 04 00 00 22 00 02 00 06 00 03 28 02
-06 10 00 00 04 02 00 22 80 00 00 06 00 60 03 04
-06 10 61 00 08 02 24 42 24 02 8a 06 00 00 f0 3f
-06 00 60 00 48 12 80 20 80 00 8d 12 c0 00 8d 00
-06 00 80 00 48 12 00 22 c0 01 b1 12 e0 01 b1 00
-06 00 60 00 08 0a c0 22 48 40 00 02 a0 02 8d 00
-06 00 80 00 08 0a a0 24 48 40 00 02 60 04 8d 00
-06 00 60 00 28 0a 20 21 00 41 8d 0a e0 40 8d 00
-06 00 80 00 28 0a a0 21 60 41 8d 0a 20 41 8d 00
-06 10 00 00 0c 02 c0 21 c0 01 00 02 60 02 00 00
-06 00 60 01 00 02 00 20 a0 00 8d 02 c0 00 8d 00
-06 00 80 01 00 02 00 20 20 02 8d 02 60 02 8d 00
--- a/src/intel/compiler/elk/tests/gen9/pln.asm
+++ b/src/intel/compiler/elk/tests/gen9/pln.asm
@ -1,10 +0,0 @@
-pln(8)          g124<1>F        g4<0,1,0>F      g2<8,8,1>F      { align1 1Q };
-pln(16)         g120<1>F        g6<0,1,0>F      g2<8,8,1>F      { align1 1H };
-pln.sat(8)      g9<1>F          g5<0,1,0>F      g2<8,8,1>F      { align1 1Q };
-pln.sat(16)     g12<1>F         g7<0,1,0>F      g2<8,8,1>F      { align1 1H };
-pln.g.f0.0(8)   g7<1>F          g4<0,1,0>F      g2<8,8,1>F      { align1 1Q };
-pln.g.f0.0(16)  g11<1>F         g6<0,1,0>F      g2<8,8,1>F      { align1 1H };
-pln.l.f0.0(8)   g8<1>F          g4<0,1,0>F      g2<8,8,1>F      { align1 1Q };
-pln.l.f0.0(16)  g11<1>F         g6<0,1,0>F      g2<8,8,1>F      { align1 1H };
-pln.nz.f0.0(8)  g18<1>F         g5<0,1,0>F      g2<8,8,1>F      { align1 1Q };
-pln.nz.f0.0(16) g14<1>F         g7<0,1,0>F      g2<8,8,1>F      { align1 1H };
--- a/src/intel/compiler/elk/tests/gen9/pln.expected
+++ b/src/intel/compiler/elk/tests/gen9/pln.expected
@ -1,10 +0,0 @@
-5a 00 60 00 e8 3a 80 2f 80 00 00 3a 40 00 8d 00
-5a 00 80 00 e8 3a 00 2f c0 00 00 3a 40 00 8d 00
-5a 00 60 80 e8 3a 20 21 a0 00 00 3a 40 00 8d 00
-5a 00 80 80 e8 3a 80 21 e0 00 00 3a 40 00 8d 00
-5a 00 60 03 e8 3a e0 20 80 00 00 3a 40 00 8d 00
-5a 00 80 03 e8 3a 60 21 c0 00 00 3a 40 00 8d 00
-5a 00 60 05 e8 3a 00 21 80 00 00 3a 40 00 8d 00
-5a 00 80 05 e8 3a 60 21 c0 00 00 3a 40 00 8d 00
-5a 00 60 02 e8 3a 40 22 a0 00 00 3a 40 00 8d 00
-5a 00 80 02 e8 3a c0 21 e0 00 00 3a 40 00 8d 00
--- a/Show more
+++ b/Show more
				`@ -1 +0,0 @@`
				`rol(16) g3<1>UD g2<0,1,0>UD g2.1<0,1,0>UD { align1 1H };`
				`@ -1 +0,0 @@`
				`0f 00 80 00 08 02 60 20 40 00 00 02 44 00 00 00`
				`@ -1 +0,0 @@`
				`ror(16) g3<1>UD g2<0,1,0>UD g2.1<0,1,0>UD { align1 1H };`
				`@ -1 +0,0 @@`
				`0e 00 80 00 08 02 60 20 40 00 00 02 44 00 00 00`
				`@ -1 +0,0 @@`
				`7e 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00`