intel/elk: Remove a bunch of files that don't apply for Gfx8-

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27563>
This commit is contained in:
Caio Oliveira 2024-01-19 11:54:46 -08:00 committed by Marge Bot
parent 06b553f02c
commit dcf29202d4
124 changed files with 0 additions and 17536 deletions

View file

@ -3490,8 +3490,6 @@ fs_visitor::emit_repclear_shader()
calculate_cfg();
this->first_non_payload_grf = payload().num_regs;
lower_scoreboard();
}
/**
@ -6823,8 +6821,6 @@ fs_visitor::allocate_registers(bool allow_spilling)
*/
assert(prog_data->total_scratch < max_scratch_size);
}
lower_scoreboard();
}
bool

File diff suppressed because it is too large Load diff

View file

@ -1,790 +0,0 @@
/*
* Copyright © 2020 Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_kernel.h"
#include "brw_nir.h"
#include "intel_nir.h"
#include "intel_nir.h"
#include "nir_clc_helpers.h"
#include "compiler/nir/nir_builder.h"
#include "compiler/spirv/nir_spirv.h"
#include "dev/intel_debug.h"
#include "util/u_atomic.h"
#include "util/u_dynarray.h"
static const nir_shader *
load_clc_shader(struct brw_compiler *compiler, struct disk_cache *disk_cache,
const nir_shader_compiler_options *nir_options,
const struct spirv_to_nir_options *spirv_options)
{
if (compiler->clc_shader)
return compiler->clc_shader;
nir_shader *nir = nir_load_libclc_shader(64, disk_cache,
spirv_options, nir_options,
disk_cache != NULL);
if (nir == NULL)
return NULL;
const nir_shader *old_nir =
p_atomic_cmpxchg(&compiler->clc_shader, NULL, nir);
if (old_nir == NULL) {
/* We won the race */
ralloc_steal(compiler, nir);
return nir;
} else {
/* Someone else built the shader first */
ralloc_free(nir);
return old_nir;
}
}
static nir_builder
builder_init_new_impl(nir_function *func)
{
nir_function_impl *impl = nir_function_impl_create(func);
return nir_builder_at(nir_before_impl(impl));
}
static void
implement_atomic_builtin(nir_function *func, nir_atomic_op atomic_op,
enum glsl_base_type data_base_type,
nir_variable_mode mode)
{
nir_builder b = builder_init_new_impl(func);
const struct glsl_type *data_type = glsl_scalar_type(data_base_type);
unsigned p = 0;
nir_deref_instr *ret = NULL;
ret = nir_build_deref_cast(&b, nir_load_param(&b, p++),
nir_var_function_temp, data_type, 0);
nir_intrinsic_op op = nir_intrinsic_deref_atomic;
nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b.shader, op);
nir_intrinsic_set_atomic_op(atomic, atomic_op);
for (unsigned i = 0; i < nir_intrinsic_infos[op].num_srcs; i++) {
nir_def *src = nir_load_param(&b, p++);
if (i == 0) {
/* The first source is our deref */
assert(nir_intrinsic_infos[op].src_components[i] == -1);
src = &nir_build_deref_cast(&b, src, mode, data_type, 0)->def;
}
atomic->src[i] = nir_src_for_ssa(src);
}
nir_def_init_for_type(&atomic->instr, &atomic->def, data_type);
nir_builder_instr_insert(&b, &atomic->instr);
nir_store_deref(&b, ret, &atomic->def, ~0);
}
static void
implement_sub_group_ballot_builtin(nir_function *func)
{
nir_builder b = builder_init_new_impl(func);
nir_deref_instr *ret =
nir_build_deref_cast(&b, nir_load_param(&b, 0),
nir_var_function_temp, glsl_uint_type(), 0);
nir_def *cond = nir_load_param(&b, 1);
nir_intrinsic_instr *ballot =
nir_intrinsic_instr_create(b.shader, nir_intrinsic_ballot);
ballot->src[0] = nir_src_for_ssa(cond);
ballot->num_components = 1;
nir_def_init(&ballot->instr, &ballot->def, 1, 32);
nir_builder_instr_insert(&b, &ballot->instr);
nir_store_deref(&b, ret, &ballot->def, ~0);
}
static bool
implement_intel_builtins(nir_shader *nir)
{
bool progress = false;
nir_foreach_function(func, nir) {
if (strcmp(func->name, "_Z10atomic_minPU3AS1Vff") == 0) {
/* float atom_min(__global float volatile *p, float val) */
implement_atomic_builtin(func, nir_atomic_op_fmin,
GLSL_TYPE_FLOAT, nir_var_mem_global);
progress = true;
} else if (strcmp(func->name, "_Z10atomic_maxPU3AS1Vff") == 0) {
/* float atom_max(__global float volatile *p, float val) */
implement_atomic_builtin(func, nir_atomic_op_fmax,
GLSL_TYPE_FLOAT, nir_var_mem_global);
progress = true;
} else if (strcmp(func->name, "_Z10atomic_minPU3AS3Vff") == 0) {
/* float atomic_min(__shared float volatile *, float) */
implement_atomic_builtin(func, nir_atomic_op_fmin,
GLSL_TYPE_FLOAT, nir_var_mem_shared);
progress = true;
} else if (strcmp(func->name, "_Z10atomic_maxPU3AS3Vff") == 0) {
/* float atomic_max(__shared float volatile *, float) */
implement_atomic_builtin(func, nir_atomic_op_fmax,
GLSL_TYPE_FLOAT, nir_var_mem_shared);
progress = true;
} else if (strcmp(func->name, "intel_sub_group_ballot") == 0) {
implement_sub_group_ballot_builtin(func);
progress = true;
}
}
nir_shader_preserve_all_metadata(nir);
return progress;
}
static bool
lower_kernel_intrinsics(nir_shader *nir)
{
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
bool progress = false;
unsigned kernel_sysvals_start = 0;
unsigned kernel_arg_start = sizeof(struct brw_kernel_sysvals);
nir->num_uniforms += kernel_arg_start;
nir_builder b = nir_builder_create(impl);
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_load_kernel_input: {
b.cursor = nir_instr_remove(&intrin->instr);
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
load->num_components = intrin->num_components;
load->src[0] = nir_src_for_ssa(nir_u2u32(&b, intrin->src[0].ssa));
nir_intrinsic_set_base(load, kernel_arg_start);
nir_intrinsic_set_range(load, nir->num_uniforms);
nir_def_init(&load->instr, &load->def,
intrin->def.num_components,
intrin->def.bit_size);
nir_builder_instr_insert(&b, &load->instr);
nir_def_rewrite_uses(&intrin->def, &load->def);
progress = true;
break;
}
case nir_intrinsic_load_constant_base_ptr: {
b.cursor = nir_instr_remove(&intrin->instr);
nir_def *const_data_base_addr = nir_pack_64_2x32_split(&b,
nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
nir_def_rewrite_uses(&intrin->def, const_data_base_addr);
progress = true;
break;
}
case nir_intrinsic_load_num_workgroups: {
b.cursor = nir_instr_remove(&intrin->instr);
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
load->num_components = 3;
load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
nir_intrinsic_set_base(load, kernel_sysvals_start +
offsetof(struct brw_kernel_sysvals, num_work_groups));
nir_intrinsic_set_range(load, 3 * 4);
nir_def_init(&load->instr, &load->def, 3, 32);
nir_builder_instr_insert(&b, &load->instr);
nir_def_rewrite_uses(&intrin->def, &load->def);
progress = true;
break;
}
default:
break;
}
}
}
if (progress) {
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
} else {
nir_metadata_preserve(impl, nir_metadata_all);
}
return progress;
}
bool
brw_kernel_from_spirv(struct brw_compiler *compiler,
struct disk_cache *disk_cache,
struct brw_kernel *kernel,
void *log_data, void *mem_ctx,
const uint32_t *spirv, size_t spirv_size,
const char *entrypoint_name,
char **error_str)
{
const struct intel_device_info *devinfo = compiler->devinfo;
const nir_shader_compiler_options *nir_options =
compiler->nir_options[MESA_SHADER_KERNEL];
struct spirv_to_nir_options spirv_options = {
.environment = NIR_SPIRV_OPENCL,
.caps = {
.address = true,
.float16 = devinfo->ver >= 8,
.float64 = devinfo->ver >= 8,
.groups = true,
.image_write_without_format = true,
.int8 = devinfo->ver >= 8,
.int16 = devinfo->ver >= 8,
.int64 = devinfo->ver >= 8,
.int64_atomics = devinfo->ver >= 9,
.kernel = true,
.linkage = true, /* We receive linked kernel from clc */
.float_controls = devinfo->ver >= 8,
.generic_pointers = true,
.storage_8bit = devinfo->ver >= 8,
.storage_16bit = devinfo->ver >= 8,
.subgroup_arithmetic = true,
.subgroup_basic = true,
.subgroup_ballot = true,
.subgroup_dispatch = true,
.subgroup_quad = true,
.subgroup_shuffle = true,
.subgroup_vote = true,
.intel_subgroup_shuffle = true,
.intel_subgroup_buffer_block_io = true,
},
.shared_addr_format = nir_address_format_62bit_generic,
.global_addr_format = nir_address_format_62bit_generic,
.temp_addr_format = nir_address_format_62bit_generic,
.constant_addr_format = nir_address_format_64bit_global,
};
spirv_options.clc_shader = load_clc_shader(compiler, disk_cache,
nir_options, &spirv_options);
if (spirv_options.clc_shader == NULL) {
fprintf(stderr, "ERROR: libclc shader missing."
" Consider installing the libclc package\n");
abort();
}
assert(spirv_size % 4 == 0);
nir_shader *nir =
spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
entrypoint_name, &spirv_options, nir_options);
nir_validate_shader(nir, "after spirv_to_nir");
nir_validate_ssa_dominance(nir, "after spirv_to_nir");
ralloc_steal(mem_ctx, nir);
nir->info.name = ralloc_strdup(nir, entrypoint_name);
if (INTEL_DEBUG(DEBUG_CS)) {
/* Re-index SSA defs so we print more sensible numbers. */
nir_foreach_function_impl(impl, nir) {
nir_index_ssa_defs(impl);
}
fprintf(stderr, "NIR (from SPIR-V) for kernel\n");
nir_print_shader(nir, stderr);
}
NIR_PASS_V(nir, implement_intel_builtins);
NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader);
/* We have to lower away local constant initializers right before we
* inline functions. That way they get properly initialized at the top
* of the function and not at the top of its caller.
*/
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
NIR_PASS_V(nir, nir_lower_returns);
NIR_PASS_V(nir, nir_inline_functions);
NIR_PASS_V(nir, nir_copy_prop);
NIR_PASS_V(nir, nir_opt_deref);
/* Pick off the single entrypoint that we want */
nir_remove_non_entrypoints(nir);
/* Now that we've deleted all but the main function, we can go ahead and
* lower the rest of the constant initializers. We do this here so that
* nir_remove_dead_variables and split_per_member_structs below see the
* corresponding stores.
*/
NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
/* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
* aligned and so it can just read/write them as vec4s. This results in a
* LOT of vec4->vec3 casts on loads and stores. One solution to this
* problem is to get rid of all vec3 variables.
*/
NIR_PASS_V(nir, nir_lower_vec3_to_vec4,
nir_var_shader_temp | nir_var_function_temp |
nir_var_mem_shared | nir_var_mem_global|
nir_var_mem_constant);
/* We assign explicit types early so that the optimizer can take advantage
* of that information and hopefully get rid of some of our memcpys.
*/
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
nir_var_uniform |
nir_var_shader_temp | nir_var_function_temp |
nir_var_mem_shared | nir_var_mem_global,
glsl_get_cl_type_size_align);
struct brw_nir_compiler_opts opts = {};
brw_preprocess_nir(compiler, nir, &opts);
int max_arg_idx = -1;
nir_foreach_uniform_variable(var, nir) {
assert(var->data.location < 256);
max_arg_idx = MAX2(max_arg_idx, var->data.location);
}
kernel->args_size = nir->num_uniforms;
kernel->arg_count = max_arg_idx + 1;
/* No bindings */
struct brw_kernel_arg_desc *args =
rzalloc_array(mem_ctx, struct brw_kernel_arg_desc, kernel->arg_count);
kernel->args = args;
nir_foreach_uniform_variable(var, nir) {
struct brw_kernel_arg_desc arg_desc = {
.offset = var->data.driver_location,
.size = glsl_get_explicit_size(var->type, false),
};
assert(arg_desc.offset + arg_desc.size <= nir->num_uniforms);
assert(var->data.location >= 0);
args[var->data.location] = arg_desc;
}
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_all, NULL);
/* Lower again, this time after dead-variables to get more compact variable
* layouts.
*/
nir->global_mem_size = 0;
nir->scratch_size = 0;
nir->info.shared_size = 0;
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
nir_var_shader_temp | nir_var_function_temp |
nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant,
glsl_get_cl_type_size_align);
if (nir->constant_data_size > 0) {
assert(nir->constant_data == NULL);
nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
nir_gather_explicit_io_initializers(nir, nir->constant_data,
nir->constant_data_size,
nir_var_mem_constant);
}
if (INTEL_DEBUG(DEBUG_CS)) {
/* Re-index SSA defs so we print more sensible numbers. */
nir_foreach_function_impl(impl, nir) {
nir_index_ssa_defs(impl);
}
fprintf(stderr, "NIR (before I/O lowering) for kernel\n");
nir_print_shader(nir, stderr);
}
NIR_PASS_V(nir, nir_lower_memcpy);
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
nir_address_format_64bit_global);
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
nir_address_format_32bit_offset_as_64bit);
NIR_PASS_V(nir, nir_lower_explicit_io,
nir_var_shader_temp | nir_var_function_temp |
nir_var_mem_shared | nir_var_mem_global,
nir_address_format_62bit_generic);
NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);
NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
NIR_PASS_V(nir, lower_kernel_intrinsics);
struct brw_cs_prog_key key = { };
memset(&kernel->prog_data, 0, sizeof(kernel->prog_data));
kernel->prog_data.base.nr_params = DIV_ROUND_UP(nir->num_uniforms, 4);
struct brw_compile_cs_params params = {
.base = {
.nir = nir,
.stats = kernel->stats,
.log_data = log_data,
.mem_ctx = mem_ctx,
},
.key = &key,
.prog_data = &kernel->prog_data,
};
kernel->code = brw_compile_cs(compiler, &params);
if (error_str)
*error_str = params.base.error_str;
return kernel->code != NULL;
}
static nir_def *
rebuild_value_from_store(struct util_dynarray *stores,
nir_def *value, unsigned read_offset)
{
unsigned read_size = value->num_components * value->bit_size / 8;
util_dynarray_foreach(stores, nir_intrinsic_instr *, _store) {
nir_intrinsic_instr *store = *_store;
unsigned write_offset = nir_src_as_uint(store->src[1]);
unsigned write_size = nir_src_num_components(store->src[0]) *
nir_src_bit_size(store->src[0]) / 8;
if (write_offset <= read_offset &&
(write_offset + write_size) >= (read_offset + read_size)) {
assert(nir_block_dominates(store->instr.block, value->parent_instr->block));
assert(write_size == read_size);
return store->src[0].ssa;
}
}
unreachable("Matching scratch store not found");
}
/**
* Remove temporary variables stored to scratch to be then reloaded
* immediately. Remap the load to the store SSA value.
*
* This workaround is only meant to be applied to shaders in src/intel/shaders
* were we know there should be no issue. More complex cases might not work
* with this approach.
*/
static bool
nir_remove_llvm17_scratch(nir_shader *nir)
{
struct util_dynarray scratch_stores;
void *mem_ctx = ralloc_context(NULL);
util_dynarray_init(&scratch_stores, mem_ctx);
nir_foreach_function_impl(func, nir) {
nir_foreach_block(block, func) {
nir_foreach_instr(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_store_scratch)
continue;
nir_const_value *offset = nir_src_as_const_value(intrin->src[1]);
if (offset != NULL) {
util_dynarray_append(&scratch_stores, nir_intrinsic_instr *, intrin);
}
}
}
}
bool progress = false;
if (util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) > 0) {
nir_foreach_function_impl(func, nir) {
nir_foreach_block(block, func) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_scratch)
continue;
nir_const_value *offset = nir_src_as_const_value(intrin->src[0]);
if (offset == NULL)
continue;
nir_def_rewrite_uses(&intrin->def,
rebuild_value_from_store(
&scratch_stores, &intrin->def,
nir_src_as_uint(intrin->src[0])));
nir_instr_remove(instr);
progress = true;
}
}
}
}
util_dynarray_foreach(&scratch_stores, nir_intrinsic_instr *, _store) {
nir_intrinsic_instr *store = *_store;
nir_instr_remove(&store->instr);
}
/* Quick sanity check */
assert(util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) == 0 ||
progress);
ralloc_free(mem_ctx);
return progress;
}
static void
cleanup_llvm17_scratch(nir_shader *nir)
{
{
bool progress;
do {
progress = false;
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_constant_folding);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_algebraic);
} while (progress);
}
nir_remove_llvm17_scratch(nir);
{
bool progress;
do {
progress = false;
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_constant_folding);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_algebraic);
} while (progress);
}
}
nir_shader *
brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size,
bool llvm17_wa)
{
struct spirv_to_nir_options spirv_options = {
.environment = NIR_SPIRV_OPENCL,
.caps = {
.address = true,
.groups = true,
.image_write_without_format = true,
.int8 = true,
.int16 = true,
.int64 = true,
.int64_atomics = true,
.kernel = true,
.linkage = true, /* We receive linked kernel from clc */
.float_controls = true,
.generic_pointers = true,
.storage_8bit = true,
.storage_16bit = true,
.subgroup_arithmetic = true,
.subgroup_basic = true,
.subgroup_ballot = true,
.subgroup_dispatch = true,
.subgroup_quad = true,
.subgroup_shuffle = true,
.subgroup_vote = true,
.intel_subgroup_shuffle = true,
.intel_subgroup_buffer_block_io = true,
},
.shared_addr_format = nir_address_format_62bit_generic,
.global_addr_format = nir_address_format_62bit_generic,
.temp_addr_format = nir_address_format_62bit_generic,
.constant_addr_format = nir_address_format_64bit_global,
.create_library = true,
};
assert(spirv_size % 4 == 0);
nir_shader *nir =
spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
"library", &spirv_options, &brw_scalar_nir_options);
nir_validate_shader(nir, "after spirv_to_nir");
nir_validate_ssa_dominance(nir, "after spirv_to_nir");
ralloc_steal(mem_ctx, nir);
nir->info.name = ralloc_strdup(nir, "library");
if (INTEL_DEBUG(DEBUG_CS)) {
/* Re-index SSA defs so we print more sensible numbers. */
nir_foreach_function_impl(impl, nir) {
nir_index_ssa_defs(impl);
}
fprintf(stderr, "NIR (from SPIR-V) for kernel\n");
nir_print_shader(nir, stderr);
}
NIR_PASS_V(nir, implement_intel_builtins);
NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader);
/* We have to lower away local constant initializers right before we
* inline functions. That way they get properly initialized at the top
* of the function and not at the top of its caller.
*/
NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_shader_temp |
nir_var_function_temp));
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
{
bool progress;
do
{
progress = false;
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
NIR_PASS(progress, nir, nir_opt_deref);
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_undef);
NIR_PASS(progress, nir, nir_opt_constant_folding);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
NIR_PASS(progress, nir, nir_opt_algebraic);
} while (progress);
}
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
NIR_PASS_V(nir, nir_lower_returns);
NIR_PASS_V(nir, nir_inline_functions);
assert(nir->scratch_size == 0);
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);
{
bool progress;
do
{
progress = false;
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
NIR_PASS(progress, nir, nir_opt_deref);
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_undef);
NIR_PASS(progress, nir, nir_opt_constant_folding);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_split_var_copies);
NIR_PASS(progress, nir, nir_lower_var_copies);
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_remove_phis);
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);
NIR_PASS(progress, nir, nir_opt_memcpy);
} while (progress);
}
NIR_PASS_V(nir, nir_scale_fdiv);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL);
nir->scratch_size = 0;
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp |
nir_var_mem_global | nir_var_mem_constant,
glsl_get_cl_type_size_align);
// Lower memcpy - needs to wait until types are sized
{
bool progress;
do {
progress = false;
NIR_PASS(progress, nir, nir_opt_memcpy);
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
NIR_PASS(progress, nir, nir_opt_deref);
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_split_var_copies);
NIR_PASS(progress, nir, nir_lower_var_copies);
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
NIR_PASS(progress, nir, nir_opt_constant_folding);
NIR_PASS(progress, nir, nir_opt_cse);
} while (progress);
}
NIR_PASS_V(nir, nir_lower_memcpy);
NIR_PASS_V(nir, nir_lower_explicit_io,
nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp | nir_var_uniform,
nir_address_format_32bit_offset_as_64bit);
NIR_PASS_V(nir, nir_lower_system_values);
/* Hopefully we can drop this once lower_vars_to_ssa has improved to not
* lower everything to scratch.
*/
if (llvm17_wa)
cleanup_llvm17_scratch(nir);
/* Lower again, this time after dead-variables to get more compact variable
* layouts.
*/
nir->global_mem_size = 0;
nir->scratch_size = 0;
nir->info.shared_size = 0;
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant,
glsl_get_cl_type_size_align);
if (nir->constant_data_size > 0) {
assert(nir->constant_data == NULL);
nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
nir_gather_explicit_io_initializers(nir, nir->constant_data,
nir->constant_data_size,
nir_var_mem_constant);
}
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
nir_address_format_64bit_global);
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
nir_address_format_32bit_offset_as_64bit);
NIR_PASS_V(nir, nir_lower_explicit_io,
nir_var_shader_temp | nir_var_function_temp |
nir_var_mem_shared | nir_var_mem_global,
nir_address_format_62bit_generic);
if (INTEL_DEBUG(DEBUG_CS)) {
/* Re-index SSA defs so we print more sensible numbers. */
nir_foreach_function_impl(impl, nir) {
nir_index_ssa_defs(impl);
}
fprintf(stderr, "NIR (before I/O lowering) for kernel\n");
nir_print_shader(nir, stderr);
}
return nir;
}

View file

@ -1,78 +0,0 @@
/*
* Copyright © 2020 Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef BRW_KERNEL_H
#define BRW_KERNEL_H
#include "brw_compiler.h"
struct disk_cache;
#ifdef __cplusplus
extern "C" {
#endif
/** Software interface for system values in kernels
*
* These are intended to go at the start of the kernel argument buffer.
*/
struct brw_kernel_sysvals {
uint32_t num_work_groups[3];
uint32_t pad[5];
};
struct brw_kernel_arg_desc {
uint16_t offset;
uint16_t size;
};
struct brw_kernel {
struct brw_cs_prog_data prog_data;
struct brw_compile_stats stats[3];
uint16_t args_size;
uint16_t arg_count;
const struct brw_kernel_arg_desc *args;
const void *code;
};
bool
brw_kernel_from_spirv(struct brw_compiler *compiler,
struct disk_cache *disk_cache,
struct brw_kernel *kernel,
void *log_data, void *mem_ctx,
const uint32_t *spirv, size_t spirv_size,
const char *entrypoint_name,
char **error_str);
nir_shader *
brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size,
bool llvm17_wa);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* BRW_KERNEL_H */

File diff suppressed because it is too large Load diff

View file

@ -23,7 +23,6 @@
#include "intel_nir.h"
#include "brw_nir.h"
#include "brw_nir_rt.h"
#include "brw_shader.h"
#include "dev/intel_debug.h"
#include "compiler/glsl_types.h"
@ -1770,15 +1769,6 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
OPT(nir_opt_dce);
/* The mesh stages require this pass to be called at the last minute,
* but if anything is done by it, it will also constant fold, and that
* undoes the work done by nir_trivialize_registers, so call it right
* before that one instead.
*/
if (nir->info.stage == MESA_SHADER_MESH ||
nir->info.stage == MESA_SHADER_TASK)
brw_nir_adjust_payload(nir);
nir_trivialize_registers(nir);
/* This is the last pass we run before we start emitting stuff. It

View file

@ -1,818 +0,0 @@
/*
* Copyright 2023 Intel Corporation
* SPDX-License-Identifier: MIT
*/
/**
* \file brw_nir_lower_cooperative_matrix.c
* Lower cooperative matrix to subgroup operations.
*
* All supported matrix types are assumed to have either 8 rows or 8
* columns. The other dimension of the matrix is typically 8 times the number
* of data elements that can be stored in a 32-bit dword. Matrix data is
* indexed by a combination of an array element and a subgroup invocation ID.
*
* Two layouts for matrix data are used. In the first layout,
* subgroupShuffle(slice[N], ...) accesses row N of the matrix. This will be
* called row-major hereafter. In the other layout,
* subgroupShuffle(slice[...], M) accesses column M of the matrix. This will
* be called column-major hereafter. In cases where a single 32-bit value is
* stored in each entry, these layouts are identical.
*
* The subtle difference arises when multiple values are packed into a single
* 32-bit dword. If two 16-bit values are packed in a single 32-bit value in
* column-major, subgroupShuffle(slice[0], 1) holds matrix entries m[1][1] and
* m[2][1] (in m[row][column] notation). In row-major, that same shuffle holds
* m[0][2] and m[0][3].
*
* There is an alternate way to think about the matrix layouts. Every matrix
* size supported by the Intel driver is either Sx8 (e.g., 16x8 for float16 B
* matrix) or Sx8T (e.g., 8x32 for int8 A matrix). The A matrix and B matrix
* layouts are such that a single 8 dword register hold an entire row of the
* matrix.
*
* Consider a matrix stored starting in register g32. In an A matrix, the
* packed dwords of g32 contain only the data for a single row of the
* matrix. g32 is row 0, g33 is row 1, etc. In a B matrix, the packed dwords
* of g(32+N).X contain only the data for a single column of the
* matrix. g[32:40].0 is column 0, g[32:40].1 is column 1, etc.
*
* This leads to some shenanigans in \c lower_cmat_load_store.
*
* In the common case, A, C, and result matrices are stored row major while B
* matrices are stored column major. This arrangement facilitates efficient
* dot product operations using DPAS or DP4A instructions.
*
* Future optimizations are possible when row and column major are
* flipped. That is, efficient dot products are also possible when A, C, and
* result matrices are column major while B is row major.
*/
#include "brw_nir.h"
struct lower_cmat_state {
nir_shader *shader;
struct hash_table *slice_coop_types;
struct hash_table *vars_to_slice;
unsigned subgroup_size;
};
static void
print_coop_types(struct lower_cmat_state *state)
{
fprintf(stderr, "--- Slices to Cooperative Matrix type table\n");
hash_table_foreach(state->slice_coop_types, e) {
nir_variable *var = (void *)e->key;
const struct glsl_type *t = e->data;
fprintf(stderr, "%p: %s -> %s\n", var, var->name, glsl_get_type_name(t));
}
fprintf(stderr, "\n\n");
}
static const struct glsl_type *
get_coop_type_for_slice(struct lower_cmat_state *state, nir_deref_instr *deref)
{
nir_variable *var = nir_deref_instr_get_variable(deref);
struct hash_entry *entry = _mesa_hash_table_search(state->slice_coop_types, var);
assert(entry != NULL);
return entry->data;
}
static bool
lower_cmat_filter(const nir_instr *instr, const void *_state)
{
if (instr->type == nir_instr_type_deref) {
nir_deref_instr *deref = nir_instr_as_deref(instr);
return glsl_type_is_cmat(deref->type);
}
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_cmat_construct:
case nir_intrinsic_cmat_load:
case nir_intrinsic_cmat_store:
case nir_intrinsic_cmat_length:
case nir_intrinsic_cmat_muladd:
case nir_intrinsic_cmat_unary_op:
case nir_intrinsic_cmat_binary_op:
case nir_intrinsic_cmat_scalar_op:
case nir_intrinsic_cmat_bitcast:
case nir_intrinsic_cmat_insert:
case nir_intrinsic_cmat_extract:
case nir_intrinsic_cmat_copy:
return true;
default:
return false;
}
}
/**
* Get number of matrix elements packed in each component of the slice.
*/
static unsigned
get_packing_factor(const struct glsl_cmat_description desc,
const struct glsl_type *slice_type)
{
const struct glsl_type *slice_element_type = glsl_without_array(slice_type);
assert(!glsl_type_is_cmat(slice_type));
assert(glsl_get_bit_size(slice_element_type) >= glsl_base_type_get_bit_size(desc.element_type));
assert(glsl_get_bit_size(slice_element_type) % glsl_base_type_get_bit_size(desc.element_type) == 0);
return glsl_get_bit_size(slice_element_type) / glsl_base_type_get_bit_size(desc.element_type);
}
static const struct glsl_type *
get_slice_type_from_desc(const struct lower_cmat_state *state,
const struct glsl_cmat_description desc)
{
enum glsl_base_type base_type;
/* Number of matrix elements stored by each subgroup invocation. If the
* data is packed, the slice size will be less than this.
*/
const unsigned elements_per_invocation =
(desc.rows * desc.cols) / state->subgroup_size;
assert(elements_per_invocation > 0);
const unsigned element_bits = 32;
const unsigned bits = glsl_base_type_get_bit_size(desc.element_type);
unsigned packing_factor = MIN2(elements_per_invocation,
element_bits / bits);
/* Adjust the packing factor so that each row of the matrix fills and
* entire GRF.
*
* The in-register layout of B matrices is different, so those are handled
* more like column major (for row major matrices). See the file comment
* for more details.
*/
const unsigned actual_cols = desc.use != GLSL_CMAT_USE_B ? desc.cols : desc.rows;
while ((actual_cols / packing_factor) < 8) {
assert(packing_factor > 1);
packing_factor /= 2;
}
switch (desc.element_type) {
case GLSL_TYPE_FLOAT:
base_type = GLSL_TYPE_FLOAT;
break;
case GLSL_TYPE_UINT:
case GLSL_TYPE_FLOAT16:
case GLSL_TYPE_UINT8:
case GLSL_TYPE_UINT16:
base_type = glsl_get_base_type(glsl_uintN_t_type(packing_factor * bits));
break;
case GLSL_TYPE_INT:
case GLSL_TYPE_INT8:
case GLSL_TYPE_INT16:
base_type = glsl_get_base_type(glsl_intN_t_type(packing_factor * bits));
break;
default:
unreachable("Invalid cooperative matrix element type.");
}
unsigned len = elements_per_invocation / packing_factor;
/* Supported matrix sizes are designed to fill either 4 or 8 SIMD8
* registers. That means:
*
* 4 regsiters 8 registers
* SIMD32 len = 1 len = 2
* SIMD16 len = 2 len = 4
* SIMD8 len = 4 len = 8
*
* If configurations are added that result in other values of len, at the
* very least this assertion will need to be updated. The only value of len
* that makes sense to add would be 16, and that would be a lot of
* registers.
*/
assert(len == 1 || len == 2 || len == 4 || len == 8);
const struct glsl_type *slice_type = glsl_vector_type(base_type, len);
assert(packing_factor == get_packing_factor(desc, slice_type));
return slice_type;
}
static const struct glsl_type *
get_slice_type(const struct lower_cmat_state *state,
const struct glsl_type *type)
{
if (glsl_type_is_array(type)) {
const struct glsl_type *slice_type =
get_slice_type(state, glsl_get_array_element(type));
return glsl_array_type(slice_type, glsl_array_size(type), 0);
}
assert(glsl_type_is_cmat(type));
return get_slice_type_from_desc(state,
*glsl_get_cmat_description(type));
}
static nir_deref_instr *
create_local_slice(struct lower_cmat_state *state, nir_builder *b,
const struct glsl_type *mat_type, const char *name)
{
const struct glsl_type *slice_type = get_slice_type(state, mat_type);
nir_variable *slice_var = nir_local_variable_create(b->impl, slice_type, name);
_mesa_hash_table_insert(state->slice_coop_types, slice_var, (void *)mat_type);
return nir_build_deref_var(b, slice_var);
}
static void
lower_cmat_load_store(nir_builder *b, nir_intrinsic_instr *intrin,
struct lower_cmat_state *state)
{
const bool load = intrin->intrinsic == nir_intrinsic_cmat_load;
const unsigned mat_src = load ? 0 : 1;
const unsigned ptr_src = load ? 1 : 0;
nir_deref_instr *slice = nir_src_as_deref(intrin->src[mat_src]);
const struct glsl_type *mat_type = get_coop_type_for_slice(state, slice);
const struct glsl_cmat_description *desc = glsl_get_cmat_description(mat_type);
nir_def *results[NIR_MAX_VEC_COMPONENTS];
const unsigned num_components = glsl_get_vector_elements(slice->type);
const unsigned packing_factor = get_packing_factor(*desc, slice->type);
nir_deref_instr *pointer = nir_src_as_deref(intrin->src[ptr_src]);
if ((nir_intrinsic_matrix_layout(intrin) == GLSL_MATRIX_LAYOUT_ROW_MAJOR) ==
(desc->use != GLSL_CMAT_USE_B)) {
nir_def *stride = nir_udiv_imm(b, intrin->src[2].ssa, packing_factor);
const struct glsl_type *element_type =
glsl_scalar_type(glsl_get_base_type(slice->type));
pointer = nir_build_deref_cast(b, &pointer->def, pointer->modes,
element_type,
glsl_get_bit_size(element_type) / 8);
nir_def *invocation = nir_load_subgroup_invocation(b);
nir_def *base_offset;
nir_def *step;
if (desc->use != GLSL_CMAT_USE_B) {
base_offset = nir_iadd(b,
nir_imul(b,
nir_udiv_imm(b, invocation, 8),
stride),
nir_umod_imm(b, invocation, 8));
step = nir_imul_imm(b, stride, state->subgroup_size / 8);
} else {
base_offset = nir_iadd(b,
nir_imul(b,
nir_umod_imm(b, invocation, 8),
stride),
nir_udiv_imm(b, invocation, 8));
step = nir_imm_int(b, state->subgroup_size / 8);
}
for (unsigned i = 0; i < num_components; i++) {
nir_def *offset = nir_imul_imm(b, step, i);
nir_deref_instr *memory_deref =
nir_build_deref_ptr_as_array(b, pointer,
nir_i2iN(b,
nir_iadd(b,
base_offset,
offset),
pointer->def.bit_size));
if (load) {
results[i] = nir_load_deref(b, memory_deref);
} else {
nir_def *src = nir_channel(b, nir_load_deref(b, slice), i);
nir_store_deref(b, memory_deref, src, 0x1);
}
}
} else {
nir_def *stride = intrin->src[2].ssa;
const struct glsl_type *element_type = glsl_scalar_type(desc->element_type);
const unsigned element_bits = glsl_base_type_get_bit_size(desc->element_type);
const unsigned element_stride = element_bits / 8;
pointer = nir_build_deref_cast(b, &pointer->def, pointer->modes, element_type,
element_stride);
nir_def *invocation_div_8 = nir_udiv_imm(b, nir_load_subgroup_invocation(b), 8);
nir_def *invocation_mod_8 = nir_umod_imm(b, nir_load_subgroup_invocation(b), 8);
nir_def *packed_stride = nir_imul_imm(b, stride, packing_factor);
for (unsigned i = 0; i < num_components; i++) {
const unsigned i_offset = i * (state->subgroup_size / 8);
nir_def *v[4];
for (unsigned j = 0; j < packing_factor; j++) {
nir_def *j_offset = nir_imul_imm(b, stride, j);
nir_def *offset;
if (desc->use != GLSL_CMAT_USE_B) {
offset = nir_iadd(b,
nir_iadd(b,
nir_imul(b,
invocation_mod_8,
packed_stride),
invocation_div_8),
nir_iadd_imm(b, j_offset, i_offset));
} else {
offset = nir_iadd(b,
nir_iadd(b,
nir_imul(b,
invocation_div_8,
packed_stride),
invocation_mod_8),
nir_iadd(b,
nir_imul_imm(b,
packed_stride,
i_offset),
j_offset));
}
nir_deref_instr *memory_deref =
nir_build_deref_ptr_as_array(b, pointer,
nir_i2iN(b,
offset,
pointer->def.bit_size));
if (load) {
v[j] = nir_load_deref(b, memory_deref);
} else {
nir_def *src = nir_channel(b, nir_load_deref(b, slice), i);
nir_def *v =
nir_channel(b, nir_unpack_bits(b, src, element_bits), j);
nir_store_deref(b, memory_deref, v, 0x1);
}
}
if (load) {
results[i] = nir_pack_bits(b, nir_vec(b, v, packing_factor),
packing_factor * element_bits);
}
}
}
if (load)
nir_store_deref(b, slice, nir_vec(b, results, num_components),
nir_component_mask(num_components));
}
static void
lower_cmat_unary_op(nir_builder *b, nir_intrinsic_instr *intrin,
struct lower_cmat_state *state)
{
nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
nir_deref_instr *src_slice = nir_src_as_deref(intrin->src[1]);
nir_def *results[NIR_MAX_VEC_COMPONENTS];
const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
const struct glsl_type *dst_mat_type =
get_coop_type_for_slice(state, dst_slice);
const struct glsl_type *src_mat_type =
get_coop_type_for_slice(state, src_slice);
const struct glsl_cmat_description dst_desc =
*glsl_get_cmat_description(dst_mat_type);
const struct glsl_cmat_description src_desc =
*glsl_get_cmat_description(src_mat_type);
const unsigned dst_bits = glsl_base_type_bit_size(dst_desc.element_type);
const unsigned src_bits = glsl_base_type_bit_size(src_desc.element_type);
/* The type of the returned slice may be different from the type of the
* input slice.
*/
const unsigned dst_packing_factor =
get_packing_factor(dst_desc, dst_slice->type);
const unsigned src_packing_factor =
get_packing_factor(src_desc, src_slice->type);
const nir_op op = nir_intrinsic_alu_op(intrin);
/* There are three possible cases:
*
* 1. dst_packing_factor == src_packing_factor. This is the common case,
* and handling it is straightforward.
*
* 2. dst_packing_factor > src_packing_factor. This occurs when converting a
* float32_t matrix slice to a packed float16_t slice. Loop over the size
* of the destination slice, but read multiple entries from the source
* slice on each iteration.
*
* 3. dst_packing_factor < src_packing_factor. This occurs when converting a
* packed int8_t matrix slice to an int32_t slice. Loop over the size of
* the source slice, but write multiple entries to the destination slice
* on each iteration.
*
* Handle all cases by iterating over the total (non-packed) number of
* elements in the slice. When dst_packing_factor values have been
* calculated, store them.
*/
assert((dst_packing_factor * glsl_get_vector_elements(dst_slice->type)) ==
(src_packing_factor * glsl_get_vector_elements(src_slice->type)));
/* Stores at most dst_packing_factor partial results. */
nir_def *v[4];
assert(dst_packing_factor <= 4);
for (unsigned i = 0; i < num_components * dst_packing_factor; i++) {
const unsigned dst_chan_index = i % dst_packing_factor;
const unsigned src_chan_index = i % src_packing_factor;
const unsigned dst_index = i / dst_packing_factor;
const unsigned src_index = i / src_packing_factor;
nir_def *src =
nir_channel(b,
nir_unpack_bits(b,
nir_channel(b,
nir_load_deref(b, src_slice),
src_index),
src_bits),
src_chan_index);
v[dst_chan_index] = nir_build_alu1(b, op, src);
if (dst_chan_index == (dst_packing_factor - 1)) {
results[dst_index] =
nir_pack_bits(b, nir_vec(b, v, dst_packing_factor),
dst_packing_factor * dst_bits);
}
}
nir_store_deref(b, dst_slice, nir_vec(b, results, num_components),
nir_component_mask(num_components));
}
static void
lower_cmat_binary_op(nir_builder *b, nir_intrinsic_instr *intrin,
struct lower_cmat_state *state)
{
nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
nir_deref_instr *src_a_slice = nir_src_as_deref(intrin->src[1]);
nir_deref_instr *src_b_slice = nir_src_as_deref(intrin->src[2]);
nir_def *src_a = nir_load_deref(b, src_a_slice);
nir_def *src_b = nir_load_deref(b, src_b_slice);
nir_def *results[NIR_MAX_VEC_COMPONENTS];
const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
const struct glsl_type *dst_mat_type = get_coop_type_for_slice(state, dst_slice);
ASSERTED const struct glsl_type *src_a_mat_type = get_coop_type_for_slice(state, src_a_slice);
ASSERTED const struct glsl_type *src_b_mat_type = get_coop_type_for_slice(state, src_b_slice);
const struct glsl_cmat_description desc =
*glsl_get_cmat_description(dst_mat_type);
assert(dst_mat_type == src_a_mat_type);
assert(dst_mat_type == src_b_mat_type);
const unsigned bits = glsl_base_type_bit_size(desc.element_type);
const unsigned packing_factor = get_packing_factor(desc, dst_slice->type);
for (unsigned i = 0; i < num_components; i++) {
nir_def *val_a = nir_channel(b, src_a, i);
nir_def *val_b = nir_channel(b, src_b, i);
results[i] =
nir_pack_bits(b, nir_build_alu2(b, nir_intrinsic_alu_op(intrin),
nir_unpack_bits(b, val_a, bits),
nir_unpack_bits(b, val_b, bits)),
packing_factor * bits);
}
nir_store_deref(b, dst_slice, nir_vec(b, results, num_components),
nir_component_mask(num_components));
}
static void
lower_cmat_scalar_op(nir_builder *b, nir_intrinsic_instr *intrin,
struct lower_cmat_state *state)
{
nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
nir_deref_instr *src_slice = nir_src_as_deref(intrin->src[1]);
nir_def *scalar = intrin->src[2].ssa;
nir_def *src = nir_load_deref(b, src_slice);
nir_def *results[NIR_MAX_VEC_COMPONENTS];
const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
ASSERTED const struct glsl_type *dst_mat_type = get_coop_type_for_slice(state, dst_slice);
ASSERTED const struct glsl_type *src_mat_type = get_coop_type_for_slice(state, src_slice);
assert(dst_mat_type == src_mat_type);
const struct glsl_cmat_description desc =
*glsl_get_cmat_description(dst_mat_type);
const unsigned bits = glsl_base_type_bit_size(desc.element_type);
const unsigned packing_factor = get_packing_factor(desc, dst_slice->type);
for (unsigned i = 0; i < num_components; i++) {
nir_def *val = nir_channel(b, src, i);
results[i] =
nir_pack_bits(b, nir_build_alu2(b, nir_intrinsic_alu_op(intrin),
nir_unpack_bits(b, val, bits),
scalar),
packing_factor * bits);
}
nir_store_deref(b, dst_slice, nir_vec(b, results, num_components),
nir_component_mask(num_components));
}
static nir_deref_instr *
lower_cmat_deref(nir_builder *b, nir_deref_instr *deref,
struct lower_cmat_state *state)
{
nir_deref_instr *parent = nir_deref_instr_parent(deref);
if (parent) {
assert(deref->deref_type == nir_deref_type_array);
parent = lower_cmat_deref(b, parent, state);
return nir_build_deref_array(b, parent, deref->arr.index.ssa);
} else {
assert(deref->deref_type == nir_deref_type_var);
assert(deref->var);
assert(glsl_type_is_cmat(glsl_without_array(deref->var->type)));
struct hash_entry *entry = _mesa_hash_table_search(state->vars_to_slice, deref->var);
assert(entry);
return nir_build_deref_var(b, (nir_variable *)entry->data);
}
}
static nir_def *
lower_cmat_instr(nir_builder *b, nir_instr *instr, void *_state)
{
struct lower_cmat_state *state = _state;
if (instr->type == nir_instr_type_deref) {
nir_deref_instr *deref = lower_cmat_deref(b, nir_instr_as_deref(instr), state);
return &deref->def;
}
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_cmat_load:
case nir_intrinsic_cmat_store:
lower_cmat_load_store(b, intrin, state);
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
case nir_intrinsic_cmat_construct: {
nir_deref_instr *slice = nir_src_as_deref(intrin->src[0]);
nir_def *src = intrin->src[1].ssa;
const struct glsl_type *mat_type = get_coop_type_for_slice(state, slice);
const struct glsl_cmat_description desc =
*glsl_get_cmat_description(mat_type);
const unsigned packing_factor = get_packing_factor(desc, slice->type);
if (packing_factor > 1) {
src = nir_pack_bits(b, nir_replicate(b, src, packing_factor),
packing_factor * glsl_base_type_get_bit_size(desc.element_type));
}
const unsigned num_components = glsl_get_vector_elements(slice->type);
nir_store_deref(b, slice, nir_replicate(b, src, num_components),
nir_component_mask(num_components));
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
}
case nir_intrinsic_cmat_unary_op:
lower_cmat_unary_op(b, intrin, state);
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
case nir_intrinsic_cmat_binary_op:
lower_cmat_binary_op(b, intrin, state);
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
case nir_intrinsic_cmat_scalar_op:
lower_cmat_scalar_op(b, intrin, state);
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
case nir_intrinsic_cmat_length: {
const struct glsl_cmat_description desc = nir_intrinsic_cmat_desc(intrin);
const struct glsl_type *mat_type = glsl_cmat_type(&desc);
const struct glsl_type *slice_type = get_slice_type(state, mat_type);
return nir_imm_intN_t(b, (get_packing_factor(desc, slice_type) *
glsl_get_vector_elements(slice_type)), 32);
}
case nir_intrinsic_cmat_muladd: {
nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
nir_deref_instr *A_slice = nir_src_as_deref(intrin->src[1]);
nir_deref_instr *B_slice = nir_src_as_deref(intrin->src[2]);
nir_deref_instr *accum_slice = nir_src_as_deref(intrin->src[3]);
const struct glsl_type *dst_mat_type = get_coop_type_for_slice(state, dst_slice);
const struct glsl_cmat_description dst_desc = *glsl_get_cmat_description(dst_mat_type);
const struct glsl_type *src_mat_type = get_coop_type_for_slice(state, A_slice);
const struct glsl_cmat_description src_desc = *glsl_get_cmat_description(src_mat_type);
const unsigned packing_factor = get_packing_factor(dst_desc, dst_slice->type);
const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
nir_def *result =
nir_dpas_intel(b,
packing_factor * glsl_base_type_get_bit_size(dst_desc.element_type),
nir_load_deref(b, A_slice),
nir_load_deref(b, B_slice),
nir_load_deref(b, accum_slice),
.dest_type = nir_get_nir_type_for_glsl_base_type(dst_desc.element_type),
.src_type = nir_get_nir_type_for_glsl_base_type(src_desc.element_type),
.saturate = nir_intrinsic_saturate(intrin),
.cmat_signed_mask = nir_intrinsic_cmat_signed_mask(intrin),
.systolic_depth = 8,
.repeat_count = 8);
nir_store_deref(b, dst_slice, result,
nir_component_mask(num_components));
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
}
case nir_intrinsic_cmat_bitcast: {
nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
nir_deref_instr *src_slice = nir_src_as_deref(intrin->src[1]);
const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
assert(glsl_get_vector_elements(src_slice->type) == num_components);
nir_store_deref(b, dst_slice, nir_load_deref(b, src_slice),
nir_component_mask(num_components));
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
}
case nir_intrinsic_cmat_copy:
nir_copy_deref(b,
nir_src_as_deref(intrin->src[0]),
nir_src_as_deref(intrin->src[1]));
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
case nir_intrinsic_cmat_insert: {
nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
nir_def *scalar = intrin->src[1].ssa;
nir_deref_instr *src_slice = nir_src_as_deref(intrin->src[2]);
const nir_src dst_index = intrin->src[3];
const struct glsl_type *dst_mat_type = get_coop_type_for_slice(state, dst_slice);
ASSERTED const struct glsl_type *src_mat_type = get_coop_type_for_slice(state, src_slice);
assert(dst_mat_type == src_mat_type);
const struct glsl_cmat_description desc =
*glsl_get_cmat_description(dst_mat_type);
const unsigned bits = glsl_base_type_bit_size(desc.element_type);
const unsigned packing_factor = get_packing_factor(desc, dst_slice->type);
const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
nir_def *slice_index = nir_udiv_imm(b, dst_index.ssa, packing_factor);
nir_def *vector_index = nir_umod_imm(b, dst_index.ssa, packing_factor);
nir_def *results[NIR_MAX_VEC_COMPONENTS];
const int slice_constant_index = nir_src_is_const(dst_index)
? nir_src_as_uint(dst_index) / packing_factor
: -1;
for (unsigned i = 0; i < num_components; i++) {
nir_def *val = nir_channel(b, nir_load_deref(b, src_slice), i);
nir_def *insert;
if (slice_constant_index < 0 || slice_constant_index == i) {
if (packing_factor == 1) {
insert = scalar;
} else {
nir_def *unpacked = nir_unpack_bits(b, val, bits);
nir_def *v = nir_vector_insert(b, unpacked, scalar, vector_index);
insert = nir_pack_bits(b, v, bits * packing_factor);
}
} else {
insert = val;
}
results[i] = slice_constant_index < 0
? nir_bcsel(b, nir_ieq_imm(b, slice_index, i), insert, val)
: insert;
}
nir_store_deref(b, dst_slice, nir_vec(b, results, num_components),
nir_component_mask(num_components));
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
}
case nir_intrinsic_cmat_extract: {
nir_deref_instr *slice = nir_src_as_deref(intrin->src[0]);
const struct glsl_type *mat_type = get_coop_type_for_slice(state, slice);
nir_def *index = intrin->src[1].ssa;
const struct glsl_cmat_description desc =
*glsl_get_cmat_description(mat_type);
const unsigned bits = glsl_base_type_bit_size(desc.element_type);
const unsigned packing_factor = get_packing_factor(desc, slice->type);
nir_def *src =
nir_vector_extract(b, nir_load_deref(b, slice),
nir_udiv_imm(b, index, packing_factor));
if (packing_factor == 1) {
return src;
} else {
return nir_vector_extract(b,
nir_unpack_bits(b, src, bits),
nir_umod_imm(b, index, packing_factor));
}
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
}
default:
unreachable("invalid cooperative matrix intrinsic");
}
}
static void
create_slice_var(struct lower_cmat_state *state, nir_variable *var,
nir_function_impl *impl)
{
// TODO: without array
const struct glsl_type *mat_type = glsl_without_array(var->type);
assert(glsl_type_is_cmat(mat_type));
assert((!impl && var->data.mode == nir_var_shader_temp) ||
( impl && var->data.mode == nir_var_function_temp));
const struct glsl_type *slice_type = get_slice_type(state, var->type);
const char *slice_name = ralloc_asprintf(state->shader, "%s_slice", var->name);
nir_variable *slice_var = impl ?
nir_local_variable_create(impl, slice_type, slice_name) :
nir_variable_create(state->shader, var->data.mode, slice_type, slice_name);
_mesa_hash_table_insert(state->vars_to_slice, var, slice_var);
_mesa_hash_table_insert(state->slice_coop_types, slice_var, (void *)mat_type);
}
bool
brw_nir_lower_cmat(nir_shader *shader, unsigned subgroup_size)
{
void *temp_ctx = ralloc_context(NULL);
struct lower_cmat_state state = {
.shader = shader,
.slice_coop_types = _mesa_pointer_hash_table_create(temp_ctx),
.vars_to_slice = _mesa_pointer_hash_table_create(temp_ctx),
.subgroup_size = subgroup_size,
};
/* Create a slice array for each variable and add a map from the original
* variable back to it, so it can be reached during lowering.
*
* TODO: Cooperative matrix inside struct?
*/
nir_foreach_variable_in_shader(var, shader) {
if (glsl_type_is_cmat(glsl_without_array(var->type)))
create_slice_var(&state, var, NULL);
}
nir_foreach_function(func, shader) {
nir_foreach_function_temp_variable(var, func->impl) {
if (glsl_type_is_cmat(glsl_without_array(var->type)))
create_slice_var(&state, var, func->impl);
}
}
bool progress = nir_shader_lower_instructions(shader,
lower_cmat_filter,
lower_cmat_instr,
&state);
ralloc_free(temp_ctx);
return progress;
}

View file

@ -1,273 +0,0 @@
/*
* Copyright (c) 2020 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_nir_rt.h"
#include "brw_nir_rt_builder.h"
static nir_function_impl *
lower_any_hit_for_intersection(nir_shader *any_hit)
{
nir_function_impl *impl = nir_shader_get_entrypoint(any_hit);
/* Any-hit shaders need three parameters */
assert(impl->function->num_params == 0);
nir_parameter params[] = {
{
/* A pointer to a boolean value for whether or not the hit was
* accepted.
*/
.num_components = 1,
.bit_size = 32,
},
{
/* The hit T value */
.num_components = 1,
.bit_size = 32,
},
{
/* The hit kind */
.num_components = 1,
.bit_size = 32,
},
};
impl->function->num_params = ARRAY_SIZE(params);
impl->function->params =
ralloc_array(any_hit, nir_parameter, ARRAY_SIZE(params));
memcpy(impl->function->params, params, sizeof(params));
nir_builder build = nir_builder_at(nir_before_impl(impl));
nir_builder *b = &build;
nir_def *commit_ptr = nir_load_param(b, 0);
nir_def *hit_t = nir_load_param(b, 1);
nir_def *hit_kind = nir_load_param(b, 2);
nir_deref_instr *commit =
nir_build_deref_cast(b, commit_ptr, nir_var_function_temp,
glsl_bool_type(), 0);
nir_foreach_block_safe(block, impl) {
nir_foreach_instr_safe(instr, block) {
switch (instr->type) {
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_ignore_ray_intersection:
b->cursor = nir_instr_remove(&intrin->instr);
/* We put the newly emitted code inside a dummy if because it's
* going to contain a jump instruction and we don't want to
* deal with that mess here. It'll get dealt with by our
* control-flow optimization passes.
*/
nir_store_deref(b, commit, nir_imm_false(b), 0x1);
nir_push_if(b, nir_imm_true(b));
nir_jump(b, nir_jump_return);
nir_pop_if(b, NULL);
break;
case nir_intrinsic_terminate_ray:
/* The "normal" handling of terminateRay works fine in
* intersection shaders.
*/
break;
case nir_intrinsic_load_ray_t_max:
nir_def_rewrite_uses(&intrin->def,
hit_t);
nir_instr_remove(&intrin->instr);
break;
case nir_intrinsic_load_ray_hit_kind:
nir_def_rewrite_uses(&intrin->def,
hit_kind);
nir_instr_remove(&intrin->instr);
break;
default:
break;
}
break;
}
case nir_instr_type_jump: {
/* Stomp any halts to returns since they only return from the
* any-hit shader and not necessarily from the intersection
* shader. This is safe to do because we've already asserted
* that we only have the one function.
*/
nir_jump_instr *jump = nir_instr_as_jump(instr);
if (jump->type == nir_jump_halt)
jump->type = nir_jump_return;
break;
}
default:
break;
}
}
}
nir_validate_shader(any_hit, "after initial any-hit lowering");
nir_lower_returns_impl(impl);
nir_validate_shader(any_hit, "after lowering returns");
return impl;
}
void
brw_nir_lower_intersection_shader(nir_shader *intersection,
const nir_shader *any_hit,
const struct intel_device_info *devinfo)
{
void *dead_ctx = ralloc_context(intersection);
nir_function_impl *any_hit_impl = NULL;
struct hash_table *any_hit_var_remap = NULL;
if (any_hit) {
nir_shader *any_hit_tmp = nir_shader_clone(dead_ctx, any_hit);
NIR_PASS_V(any_hit_tmp, nir_opt_dce);
any_hit_impl = lower_any_hit_for_intersection(any_hit_tmp);
any_hit_var_remap = _mesa_pointer_hash_table_create(dead_ctx);
}
nir_function_impl *impl = nir_shader_get_entrypoint(intersection);
nir_builder build = nir_builder_at(nir_before_impl(impl));
nir_builder *b = &build;
nir_def *t_addr = brw_nir_rt_mem_hit_addr(b, false /* committed */);
nir_variable *commit =
nir_local_variable_create(impl, glsl_bool_type(), "ray_commit");
nir_store_var(b, commit, nir_imm_false(b), 0x1);
assert(impl->end_block->predecessors->entries == 1);
set_foreach(impl->end_block->predecessors, block_entry) {
struct nir_block *block = (void *)block_entry->key;
b->cursor = nir_after_block_before_jump(block);
nir_push_if(b, nir_load_var(b, commit));
{
/* Set the "valid" bit in mem_hit */
nir_def *ray_addr = brw_nir_rt_mem_hit_addr(b, false /* committed */);
nir_def *flags_dw_addr = nir_iadd_imm(b, ray_addr, 12);
nir_store_global(b, flags_dw_addr, 4,
nir_ior(b, nir_load_global(b, flags_dw_addr, 4, 1, 32),
nir_imm_int(b, 1 << 16)), 0x1 /* write_mask */);
nir_accept_ray_intersection(b);
}
nir_push_else(b, NULL);
{
nir_ignore_ray_intersection(b);
}
nir_pop_if(b, NULL);
break;
}
nir_foreach_block_safe(block, impl) {
nir_foreach_instr_safe(instr, block) {
switch (instr->type) {
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_report_ray_intersection: {
b->cursor = nir_instr_remove(&intrin->instr);
nir_def *hit_t = intrin->src[0].ssa;
nir_def *hit_kind = intrin->src[1].ssa;
nir_def *min_t = nir_load_ray_t_min(b);
struct brw_nir_rt_mem_ray_defs ray_def;
brw_nir_rt_load_mem_ray(b, &ray_def, BRW_RT_BVH_LEVEL_WORLD);
struct brw_nir_rt_mem_hit_defs hit_in = {};
brw_nir_rt_load_mem_hit(b, &hit_in, false);
nir_def *max_t = ray_def.t_far;
/* bool commit_tmp = false; */
nir_variable *commit_tmp =
nir_local_variable_create(impl, glsl_bool_type(),
"commit_tmp");
nir_store_var(b, commit_tmp, nir_imm_false(b), 0x1);
nir_push_if(b, nir_iand(b, nir_fge(b, hit_t, min_t),
nir_fge(b, max_t, hit_t)));
{
/* Any-hit defaults to commit */
nir_store_var(b, commit_tmp, nir_imm_true(b), 0x1);
if (any_hit_impl != NULL) {
nir_push_if(b, nir_inot(b, nir_load_leaf_opaque_intel(b)));
{
nir_def *params[] = {
&nir_build_deref_var(b, commit_tmp)->def,
hit_t,
hit_kind,
};
nir_inline_function_impl(b, any_hit_impl, params,
any_hit_var_remap);
}
nir_pop_if(b, NULL);
}
nir_push_if(b, nir_load_var(b, commit_tmp));
{
nir_store_var(b, commit, nir_imm_true(b), 0x1);
nir_def *ray_addr =
brw_nir_rt_mem_ray_addr(b, brw_nir_rt_stack_addr(b), BRW_RT_BVH_LEVEL_WORLD);
nir_store_global(b, nir_iadd_imm(b, ray_addr, 16 + 12), 4, hit_t, 0x1);
nir_store_global(b, t_addr, 4,
nir_vec2(b, nir_fmin(b, hit_t, hit_in.t), hit_kind),
0x3);
}
nir_pop_if(b, NULL);
}
nir_pop_if(b, NULL);
nir_def *accepted = nir_load_var(b, commit_tmp);
nir_def_rewrite_uses(&intrin->def,
accepted);
break;
}
default:
break;
}
break;
}
default:
break;
}
}
}
nir_metadata_preserve(impl, nir_metadata_none);
/* We did some inlining; have to re-index SSA defs */
nir_index_ssa_defs(impl);
ralloc_free(dead_ctx);
}

View file

@ -1,567 +0,0 @@
/*
* Copyright (c) 2021 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_nir_rt.h"
#include "brw_nir_rt_builder.h"
#include "nir_deref.h"
#include "util/macros.h"
struct lowering_state {
const struct intel_device_info *devinfo;
nir_function_impl *impl;
struct hash_table *queries;
uint32_t n_queries;
struct brw_nir_rt_globals_defs globals;
nir_def *rq_globals;
};
struct brw_ray_query {
nir_variable *opaque_var;
nir_variable *internal_var;
uint32_t id;
};
#define SIZEOF_QUERY_STATE (sizeof(uint32_t))
static bool
need_spill_fill(struct lowering_state *state)
{
return state->n_queries > 1;
}
/**
* This pass converts opaque RayQuery structures from SPIRV into a vec3 where
* the first 2 elements store a global address for the query and the third
* element is an incremented counter on the number of executed
* nir_intrinsic_rq_proceed.
*/
static void
register_opaque_var(nir_variable *opaque_var, struct lowering_state *state)
{
struct hash_entry *entry = _mesa_hash_table_search(state->queries, opaque_var);
assert(entry == NULL);
struct brw_ray_query *rq = rzalloc(state->queries, struct brw_ray_query);
rq->opaque_var = opaque_var;
rq->id = state->n_queries;
unsigned aoa_size = glsl_get_aoa_size(opaque_var->type);
state->n_queries += MAX2(1, aoa_size);
_mesa_hash_table_insert(state->queries, opaque_var, rq);
}
static void
create_internal_var(struct brw_ray_query *rq, struct lowering_state *state)
{
const struct glsl_type *opaque_type = rq->opaque_var->type;
const struct glsl_type *internal_type = glsl_uint16_t_type();
while (glsl_type_is_array(opaque_type)) {
assert(!glsl_type_is_unsized_array(opaque_type));
internal_type = glsl_array_type(internal_type,
glsl_array_size(opaque_type),
0);
opaque_type = glsl_get_array_element(opaque_type);
}
rq->internal_var = nir_local_variable_create(state->impl,
internal_type,
NULL);
}
static nir_def *
get_ray_query_shadow_addr(nir_builder *b,
nir_deref_instr *deref,
struct lowering_state *state,
nir_deref_instr **out_state_deref)
{
nir_deref_path path;
nir_deref_path_init(&path, deref, NULL);
assert(path.path[0]->deref_type == nir_deref_type_var);
nir_variable *opaque_var = nir_deref_instr_get_variable(path.path[0]);
struct hash_entry *entry = _mesa_hash_table_search(state->queries, opaque_var);
assert(entry);
struct brw_ray_query *rq = entry->data;
/* Base address in the shadow memory of the variable associated with this
* ray query variable.
*/
nir_def *base_addr =
nir_iadd_imm(b, state->globals.resume_sbt_addr,
brw_rt_ray_queries_shadow_stack_size(state->devinfo) * rq->id);
bool spill_fill = need_spill_fill(state);
*out_state_deref = nir_build_deref_var(b, rq->internal_var);
if (!spill_fill)
return NULL;
/* Just emit code and let constant-folding go to town */
nir_deref_instr **p = &path.path[1];
for (; *p; p++) {
if ((*p)->deref_type == nir_deref_type_array) {
nir_def *index = (*p)->arr.index.ssa;
/**/
*out_state_deref = nir_build_deref_array(b, *out_state_deref, index);
/**/
uint64_t size = MAX2(1, glsl_get_aoa_size((*p)->type)) *
brw_rt_ray_queries_shadow_stack_size(state->devinfo);
nir_def *mul = nir_amul_imm(b, nir_i2i64(b, index), size);
base_addr = nir_iadd(b, base_addr, mul);
} else {
unreachable("Unsupported deref type");
}
}
nir_deref_path_finish(&path);
/* Add the lane offset to the shadow memory address */
nir_def *lane_offset =
nir_imul_imm(
b,
nir_iadd(
b,
nir_imul(
b,
brw_load_btd_dss_id(b),
brw_nir_rt_load_num_simd_lanes_per_dss(b, state->devinfo)),
brw_nir_rt_sync_stack_id(b)),
BRW_RT_SIZEOF_SHADOW_RAY_QUERY);
return nir_iadd(b, base_addr, nir_i2i64(b, lane_offset));
}
static void
update_trace_ctrl_level(nir_builder *b,
nir_deref_instr *state_deref,
nir_def **out_old_ctrl,
nir_def **out_old_level,
nir_def *new_ctrl,
nir_def *new_level)
{
nir_def *old_value = nir_load_deref(b, state_deref);
nir_def *old_ctrl = nir_ishr_imm(b, old_value, 2);
nir_def *old_level = nir_iand_imm(b, old_value, 0x3);
if (out_old_ctrl)
*out_old_ctrl = old_ctrl;
if (out_old_level)
*out_old_level = old_level;
if (new_ctrl)
new_ctrl = nir_i2i16(b, new_ctrl);
if (new_level)
new_level = nir_i2i16(b, new_level);
if (new_ctrl || new_level) {
if (!new_ctrl)
new_ctrl = old_ctrl;
if (!new_level)
new_level = old_level;
nir_def *new_value = nir_ior(b, nir_ishl_imm(b, new_ctrl, 2), new_level);
nir_store_deref(b, state_deref, new_value, 0x1);
}
}
static void
fill_query(nir_builder *b,
nir_def *hw_stack_addr,
nir_def *shadow_stack_addr,
nir_def *ctrl)
{
brw_nir_memcpy_global(b, hw_stack_addr, 64, shadow_stack_addr, 64,
BRW_RT_SIZEOF_RAY_QUERY);
}
static void
spill_query(nir_builder *b,
nir_def *hw_stack_addr,
nir_def *shadow_stack_addr)
{
brw_nir_memcpy_global(b, shadow_stack_addr, 64, hw_stack_addr, 64,
BRW_RT_SIZEOF_RAY_QUERY);
}
static void
lower_ray_query_intrinsic(nir_builder *b,
nir_intrinsic_instr *intrin,
struct lowering_state *state)
{
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
b->cursor = nir_instr_remove(&intrin->instr);
nir_deref_instr *ctrl_level_deref;
nir_def *shadow_stack_addr =
get_ray_query_shadow_addr(b, deref, state, &ctrl_level_deref);
nir_def *hw_stack_addr =
brw_nir_rt_sync_stack_addr(b, state->globals.base_mem_addr, state->devinfo);
nir_def *stack_addr = shadow_stack_addr ? shadow_stack_addr : hw_stack_addr;
switch (intrin->intrinsic) {
case nir_intrinsic_rq_initialize: {
nir_def *as_addr = intrin->src[1].ssa;
nir_def *ray_flags = intrin->src[2].ssa;
/* From the SPIR-V spec:
*
* "Only the 8 least-significant bits of Cull Mask are used by
* this instruction - other bits are ignored.
*
* Only the 16 least-significant bits of Miss Index are used by
* this instruction - other bits are ignored."
*/
nir_def *cull_mask = nir_iand_imm(b, intrin->src[3].ssa, 0xff);
nir_def *ray_orig = intrin->src[4].ssa;
nir_def *ray_t_min = intrin->src[5].ssa;
nir_def *ray_dir = intrin->src[6].ssa;
nir_def *ray_t_max = intrin->src[7].ssa;
nir_def *root_node_ptr =
brw_nir_rt_acceleration_structure_to_root_node(b, as_addr);
struct brw_nir_rt_mem_ray_defs ray_defs = {
.root_node_ptr = root_node_ptr,
.ray_flags = nir_u2u16(b, ray_flags),
.ray_mask = cull_mask,
.orig = ray_orig,
.t_near = ray_t_min,
.dir = ray_dir,
.t_far = ray_t_max,
};
nir_def *ray_addr =
brw_nir_rt_mem_ray_addr(b, stack_addr, BRW_RT_BVH_LEVEL_WORLD);
brw_nir_rt_query_mark_init(b, stack_addr);
brw_nir_rt_store_mem_ray_query_at_addr(b, ray_addr, &ray_defs);
update_trace_ctrl_level(b, ctrl_level_deref,
NULL, NULL,
nir_imm_int(b, GEN_RT_TRACE_RAY_INITAL),
nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD));
break;
}
case nir_intrinsic_rq_proceed: {
nir_def *not_done =
nir_inot(b, brw_nir_rt_query_done(b, stack_addr));
nir_def *not_done_then, *not_done_else;
nir_push_if(b, not_done);
{
nir_def *ctrl, *level;
update_trace_ctrl_level(b, ctrl_level_deref,
&ctrl, &level,
NULL,
NULL);
/* Mark the query as done because handing it over to the HW for
* processing. If the HW make any progress, it will write back some
* data and as a side effect, clear the "done" bit. If no progress is
* made, HW does not write anything back and we can use this bit to
* detect that.
*/
brw_nir_rt_query_mark_done(b, stack_addr);
if (shadow_stack_addr)
fill_query(b, hw_stack_addr, shadow_stack_addr, ctrl);
nir_trace_ray_intel(b, state->rq_globals, level, ctrl, .synchronous = true);
struct brw_nir_rt_mem_hit_defs hit_in = {};
brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, hw_stack_addr, false);
if (shadow_stack_addr)
spill_query(b, hw_stack_addr, shadow_stack_addr);
update_trace_ctrl_level(b, ctrl_level_deref,
NULL, NULL,
nir_imm_int(b, GEN_RT_TRACE_RAY_CONTINUE),
hit_in.bvh_level);
not_done_then = nir_inot(b, hit_in.done);
}
nir_push_else(b, NULL);
{
not_done_else = nir_imm_false(b);
}
nir_pop_if(b, NULL);
not_done = nir_if_phi(b, not_done_then, not_done_else);
nir_def_rewrite_uses(&intrin->def, not_done);
break;
}
case nir_intrinsic_rq_confirm_intersection: {
brw_nir_memcpy_global(b,
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, true), 16,
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, false), 16,
BRW_RT_SIZEOF_HIT_INFO);
update_trace_ctrl_level(b, ctrl_level_deref,
NULL, NULL,
nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT),
nir_imm_int(b, BRW_RT_BVH_LEVEL_OBJECT));
break;
}
case nir_intrinsic_rq_generate_intersection: {
brw_nir_rt_generate_hit_addr(b, stack_addr, intrin->src[1].ssa);
update_trace_ctrl_level(b, ctrl_level_deref,
NULL, NULL,
nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT),
nir_imm_int(b, BRW_RT_BVH_LEVEL_OBJECT));
break;
}
case nir_intrinsic_rq_terminate: {
brw_nir_rt_query_mark_done(b, stack_addr);
break;
}
case nir_intrinsic_rq_load: {
const bool committed = nir_intrinsic_committed(intrin);
struct brw_nir_rt_mem_ray_defs world_ray_in = {};
struct brw_nir_rt_mem_ray_defs object_ray_in = {};
struct brw_nir_rt_mem_hit_defs hit_in = {};
brw_nir_rt_load_mem_ray_from_addr(b, &world_ray_in, stack_addr,
BRW_RT_BVH_LEVEL_WORLD);
brw_nir_rt_load_mem_ray_from_addr(b, &object_ray_in, stack_addr,
BRW_RT_BVH_LEVEL_OBJECT);
brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, stack_addr, committed);
nir_def *sysval = NULL;
switch (nir_intrinsic_ray_query_value(intrin)) {
case nir_ray_query_value_intersection_type:
if (committed) {
/* Values we want to generate :
*
* RayQueryCommittedIntersectionNoneEXT = 0U <= hit_in.valid == false
* RayQueryCommittedIntersectionTriangleEXT = 1U <= hit_in.leaf_type == BRW_RT_BVH_NODE_TYPE_QUAD (4)
* RayQueryCommittedIntersectionGeneratedEXT = 2U <= hit_in.leaf_type == BRW_RT_BVH_NODE_TYPE_PROCEDURAL (3)
*/
sysval =
nir_bcsel(b, nir_ieq_imm(b, hit_in.leaf_type, 4),
nir_imm_int(b, 1), nir_imm_int(b, 2));
sysval =
nir_bcsel(b, hit_in.valid,
sysval, nir_imm_int(b, 0));
} else {
/* 0 -> triangle, 1 -> AABB */
sysval =
nir_b2i32(b,
nir_ieq_imm(b, hit_in.leaf_type,
BRW_RT_BVH_NODE_TYPE_PROCEDURAL));
}
break;
case nir_ray_query_value_intersection_t:
sysval = hit_in.t;
break;
case nir_ray_query_value_intersection_instance_custom_index: {
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
sysval = leaf.instance_id;
break;
}
case nir_ray_query_value_intersection_instance_id: {
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
sysval = leaf.instance_index;
break;
}
case nir_ray_query_value_intersection_instance_sbt_index: {
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
sysval = leaf.contribution_to_hit_group_index;
break;
}
case nir_ray_query_value_intersection_geometry_index: {
nir_def *geometry_index_dw =
nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
1, 32);
sysval = nir_iand_imm(b, geometry_index_dw, BITFIELD_MASK(29));
break;
}
case nir_ray_query_value_intersection_primitive_index:
sysval = brw_nir_rt_load_primitive_id_from_hit(b, NULL /* is_procedural */, &hit_in);
break;
case nir_ray_query_value_intersection_barycentrics:
sysval = hit_in.tri_bary;
break;
case nir_ray_query_value_intersection_front_face:
sysval = hit_in.front_face;
break;
case nir_ray_query_value_intersection_object_ray_direction:
sysval = world_ray_in.dir;
break;
case nir_ray_query_value_intersection_object_ray_origin:
sysval = world_ray_in.orig;
break;
case nir_ray_query_value_intersection_object_to_world: {
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
sysval = leaf.object_to_world[nir_intrinsic_column(intrin)];
break;
}
case nir_ray_query_value_intersection_world_to_object: {
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
sysval = leaf.world_to_object[nir_intrinsic_column(intrin)];
break;
}
case nir_ray_query_value_intersection_candidate_aabb_opaque:
sysval = hit_in.front_face;
break;
case nir_ray_query_value_tmin:
sysval = world_ray_in.t_near;
break;
case nir_ray_query_value_flags:
sysval = nir_u2u32(b, world_ray_in.ray_flags);
break;
case nir_ray_query_value_world_ray_direction:
sysval = world_ray_in.dir;
break;
case nir_ray_query_value_world_ray_origin:
sysval = world_ray_in.orig;
break;
case nir_ray_query_value_intersection_triangle_vertex_positions: {
struct brw_nir_rt_bvh_primitive_leaf_positions_defs pos;
brw_nir_rt_load_bvh_primitive_leaf_positions(b, &pos, hit_in.prim_leaf_ptr);
sysval = pos.positions[nir_intrinsic_column(intrin)];
break;
}
default:
unreachable("Invalid ray query");
}
assert(sysval);
nir_def_rewrite_uses(&intrin->def, sysval);
break;
}
default:
unreachable("Invalid intrinsic");
}
}
static void
lower_ray_query_impl(nir_function_impl *impl, struct lowering_state *state)
{
nir_builder _b, *b = &_b;
_b = nir_builder_at(nir_before_impl(impl));
state->rq_globals = nir_load_ray_query_global_intel(b);
brw_nir_rt_load_globals_addr(b, &state->globals, state->rq_globals);
nir_foreach_block_safe(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_rq_initialize &&
intrin->intrinsic != nir_intrinsic_rq_terminate &&
intrin->intrinsic != nir_intrinsic_rq_proceed &&
intrin->intrinsic != nir_intrinsic_rq_generate_intersection &&
intrin->intrinsic != nir_intrinsic_rq_confirm_intersection &&
intrin->intrinsic != nir_intrinsic_rq_load)
continue;
lower_ray_query_intrinsic(b, intrin, state);
}
}
nir_metadata_preserve(impl, nir_metadata_none);
}
bool
brw_nir_lower_ray_queries(nir_shader *shader,
const struct intel_device_info *devinfo)
{
assert(exec_list_length(&shader->functions) == 1);
struct lowering_state state = {
.devinfo = devinfo,
.impl = nir_shader_get_entrypoint(shader),
.queries = _mesa_pointer_hash_table_create(NULL),
};
/* Map all query variable to internal type variables */
nir_foreach_function_temp_variable(var, state.impl)
register_opaque_var(var, &state);
hash_table_foreach(state.queries, entry)
create_internal_var(entry->data, &state);
bool progress = state.n_queries > 0;
if (progress) {
lower_ray_query_impl(state.impl, &state);
nir_remove_dead_derefs(shader);
nir_remove_dead_variables(shader,
nir_var_shader_temp | nir_var_function_temp,
NULL);
nir_metadata_preserve(state.impl, nir_metadata_none);
}
ralloc_free(state.queries);
return progress;
}

View file

@ -1,386 +0,0 @@
/*
* Copyright (c) 2020 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_nir_rt.h"
#include "brw_nir_rt_builder.h"
static nir_def *
build_leaf_is_procedural(nir_builder *b, struct brw_nir_rt_mem_hit_defs *hit)
{
switch (b->shader->info.stage) {
case MESA_SHADER_ANY_HIT:
/* Any-hit shaders are always compiled into intersection shaders for
* procedural geometry. If we got here in an any-hit shader, it's for
* triangles.
*/
return nir_imm_false(b);
case MESA_SHADER_INTERSECTION:
return nir_imm_true(b);
default:
return nir_ieq_imm(b, hit->leaf_type,
BRW_RT_BVH_NODE_TYPE_PROCEDURAL);
}
}
static void
lower_rt_intrinsics_impl(nir_function_impl *impl,
const struct intel_device_info *devinfo)
{
bool progress = false;
nir_builder build = nir_builder_at(nir_before_impl(impl));
nir_builder *b = &build;
struct brw_nir_rt_globals_defs globals;
brw_nir_rt_load_globals(b, &globals);
nir_def *hotzone_addr = brw_nir_rt_sw_hotzone_addr(b, devinfo);
nir_def *hotzone = nir_load_global(b, hotzone_addr, 16, 4, 32);
gl_shader_stage stage = b->shader->info.stage;
struct brw_nir_rt_mem_ray_defs world_ray_in = {};
struct brw_nir_rt_mem_ray_defs object_ray_in = {};
struct brw_nir_rt_mem_hit_defs hit_in = {};
switch (stage) {
case MESA_SHADER_ANY_HIT:
case MESA_SHADER_CLOSEST_HIT:
case MESA_SHADER_INTERSECTION:
brw_nir_rt_load_mem_hit(b, &hit_in,
stage == MESA_SHADER_CLOSEST_HIT);
brw_nir_rt_load_mem_ray(b, &object_ray_in,
BRW_RT_BVH_LEVEL_OBJECT);
FALLTHROUGH;
case MESA_SHADER_MISS:
brw_nir_rt_load_mem_ray(b, &world_ray_in,
BRW_RT_BVH_LEVEL_WORLD);
break;
default:
break;
}
nir_def *thread_stack_base_addr = brw_nir_rt_sw_stack_addr(b, devinfo);
nir_def *stack_base_offset = nir_channel(b, hotzone, 0);
nir_def *stack_base_addr =
nir_iadd(b, thread_stack_base_addr, nir_u2u64(b, stack_base_offset));
ASSERTED bool seen_scratch_base_ptr_load = false;
ASSERTED bool found_resume = false;
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
b->cursor = nir_after_instr(&intrin->instr);
nir_def *sysval = NULL;
switch (intrin->intrinsic) {
case nir_intrinsic_load_scratch_base_ptr:
assert(nir_intrinsic_base(intrin) == 1);
seen_scratch_base_ptr_load = true;
sysval = stack_base_addr;
break;
case nir_intrinsic_btd_stack_push_intel: {
int32_t stack_size = nir_intrinsic_stack_size(intrin);
if (stack_size > 0) {
nir_def *child_stack_offset =
nir_iadd_imm(b, stack_base_offset, stack_size);
nir_store_global(b, hotzone_addr, 16, child_stack_offset, 0x1);
}
nir_instr_remove(instr);
break;
}
case nir_intrinsic_rt_resume:
/* This is the first "interesting" instruction */
assert(block == nir_start_block(impl));
assert(!seen_scratch_base_ptr_load);
found_resume = true;
int32_t stack_size = nir_intrinsic_stack_size(intrin);
if (stack_size > 0) {
stack_base_offset =
nir_iadd_imm(b, stack_base_offset, -stack_size);
nir_store_global(b, hotzone_addr, 16, stack_base_offset, 0x1);
stack_base_addr = nir_iadd(b, thread_stack_base_addr,
nir_u2u64(b, stack_base_offset));
}
nir_instr_remove(instr);
break;
case nir_intrinsic_load_uniform: {
/* We don't want to lower this in the launch trampoline. */
if (stage == MESA_SHADER_COMPUTE)
break;
sysval = brw_nir_load_global_const(b, intrin,
nir_load_btd_global_arg_addr_intel(b),
BRW_RT_PUSH_CONST_OFFSET);
break;
}
case nir_intrinsic_load_ray_launch_id:
sysval = nir_channels(b, hotzone, 0xe);
break;
case nir_intrinsic_load_ray_launch_size:
sysval = globals.launch_size;
break;
case nir_intrinsic_load_ray_world_origin:
sysval = world_ray_in.orig;
break;
case nir_intrinsic_load_ray_world_direction:
sysval = world_ray_in.dir;
break;
case nir_intrinsic_load_ray_object_origin:
sysval = object_ray_in.orig;
break;
case nir_intrinsic_load_ray_object_direction:
sysval = object_ray_in.dir;
break;
case nir_intrinsic_load_ray_t_min:
/* It shouldn't matter which we pull this from */
sysval = world_ray_in.t_near;
break;
case nir_intrinsic_load_ray_t_max:
if (stage == MESA_SHADER_MISS)
sysval = world_ray_in.t_far;
else
sysval = hit_in.t;
break;
case nir_intrinsic_load_primitive_id:
sysval = brw_nir_rt_load_primitive_id_from_hit(b,
build_leaf_is_procedural(b, &hit_in),
&hit_in);
break;
case nir_intrinsic_load_instance_id: {
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
sysval = leaf.instance_index;
break;
}
case nir_intrinsic_load_ray_object_to_world: {
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
sysval = leaf.object_to_world[nir_intrinsic_column(intrin)];
break;
}
case nir_intrinsic_load_ray_world_to_object: {
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
sysval = leaf.world_to_object[nir_intrinsic_column(intrin)];
break;
}
case nir_intrinsic_load_ray_hit_kind: {
nir_def *tri_hit_kind =
nir_bcsel(b, hit_in.front_face,
nir_imm_int(b, BRW_RT_HIT_KIND_FRONT_FACE),
nir_imm_int(b, BRW_RT_HIT_KIND_BACK_FACE));
sysval = nir_bcsel(b, build_leaf_is_procedural(b, &hit_in),
hit_in.aabb_hit_kind, tri_hit_kind);
break;
}
case nir_intrinsic_load_ray_flags:
/* We need to fetch the original ray flags we stored in the
* leaf pointer, because the actual ray flags we get here
* will include any flags passed on the pipeline at creation
* time, and the spec for IncomingRayFlagsKHR says:
* Setting pipeline flags on the raytracing pipeline must not
* cause any corresponding flags to be set in variables with
* this decoration.
*/
sysval = nir_u2u32(b, world_ray_in.inst_leaf_ptr);
break;
case nir_intrinsic_load_cull_mask:
sysval = nir_u2u32(b, world_ray_in.ray_mask);
break;
case nir_intrinsic_load_ray_geometry_index: {
nir_def *geometry_index_dw =
nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
1, 32);
sysval = nir_iand_imm(b, geometry_index_dw, BITFIELD_MASK(29));
break;
}
case nir_intrinsic_load_ray_instance_custom_index: {
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
sysval = leaf.instance_id;
break;
}
case nir_intrinsic_load_shader_record_ptr:
/* We can't handle this intrinsic in resume shaders because the
* handle we get there won't be from the original SBT. The shader
* call lowering/splitting pass should have ensured that this
* value was spilled from the initial shader and unspilled in any
* resume shaders that need it.
*/
assert(!found_resume);
sysval = nir_load_btd_local_arg_addr_intel(b);
break;
case nir_intrinsic_load_ray_base_mem_addr_intel:
sysval = globals.base_mem_addr;
break;
case nir_intrinsic_load_ray_hw_stack_size_intel:
sysval = nir_imul_imm(b, globals.hw_stack_size, 64);
break;
case nir_intrinsic_load_ray_sw_stack_size_intel:
sysval = nir_imul_imm(b, globals.sw_stack_size, 64);
break;
case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
sysval = globals.num_dss_rt_stacks;
break;
case nir_intrinsic_load_ray_hit_sbt_addr_intel:
sysval = globals.hit_sbt_addr;
break;
case nir_intrinsic_load_ray_hit_sbt_stride_intel:
sysval = globals.hit_sbt_stride;
break;
case nir_intrinsic_load_ray_miss_sbt_addr_intel:
sysval = globals.miss_sbt_addr;
break;
case nir_intrinsic_load_ray_miss_sbt_stride_intel:
sysval = globals.miss_sbt_stride;
break;
case nir_intrinsic_load_callable_sbt_addr_intel:
sysval = globals.call_sbt_addr;
break;
case nir_intrinsic_load_callable_sbt_stride_intel:
sysval = globals.call_sbt_stride;
break;
case nir_intrinsic_load_btd_resume_sbt_addr_intel:
sysval = nir_pack_64_2x32_split(b,
nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW),
nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH));
break;
case nir_intrinsic_load_leaf_procedural_intel:
sysval = build_leaf_is_procedural(b, &hit_in);
break;
case nir_intrinsic_load_ray_triangle_vertex_positions: {
struct brw_nir_rt_bvh_primitive_leaf_positions_defs pos;
brw_nir_rt_load_bvh_primitive_leaf_positions(b, &pos, hit_in.prim_leaf_ptr);
sysval = pos.positions[nir_intrinsic_column(intrin)];
break;
}
case nir_intrinsic_load_leaf_opaque_intel: {
if (stage == MESA_SHADER_INTERSECTION) {
/* In intersection shaders, the opaque bit is passed to us in
* the front_face bit.
*/
sysval = hit_in.front_face;
} else {
nir_def *flags_dw =
nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
1, 32);
sysval = nir_i2b(b, nir_iand_imm(b, flags_dw, 1u << 30));
}
break;
}
default:
continue;
}
progress = true;
if (sysval) {
nir_def_rewrite_uses(&intrin->def,
sysval);
nir_instr_remove(&intrin->instr);
}
}
}
nir_metadata_preserve(impl,
progress ?
nir_metadata_none :
(nir_metadata_block_index |
nir_metadata_dominance));
}
/** Lower ray-tracing system values and intrinsics
*
* In most 3D shader stages, intrinsics are a fairly thin wrapper around
* hardware functionality and system values represent magic bits that come
* into the shader from FF hardware. Ray-tracing, however, looks a bit more
* like the OpenGL 1.0 world where the underlying hardware is simple and most
* of the API implementation is software.
*
* In particular, most things that are treated as system values (or built-ins
* in SPIR-V) don't get magically dropped into registers for us. Instead, we
* have to fetch them from the relevant data structures shared with the
* ray-tracing hardware. Most come from either the RT_DISPATCH_GLOBALS or
* from one of the MemHit data structures. Some, such as primitive_id require
* us to fetch the leaf address from the MemHit struct and then manually read
* the data out of the BVH. Instead of trying to emit all this code deep in
* the back-end where we can't effectively optimize it, we lower it all to
* global memory access in NIR.
*
* Once this pass is complete, the only real system values left are the two
* argument pointer system values for BTD dispatch: btd_local_arg_addr and
* btd_global_arg_addr.
*/
void
brw_nir_lower_rt_intrinsics(nir_shader *nir,
const struct intel_device_info *devinfo)
{
nir_foreach_function_impl(impl, nir) {
lower_rt_intrinsics_impl(impl, devinfo);
}
}

View file

@ -1,329 +0,0 @@
/*
* Copyright © 2020 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_nir_rt.h"
#include "brw_nir_rt_builder.h"
#include "nir_phi_builder.h"
UNUSED static bool
no_load_scratch_base_ptr_intrinsic(nir_shader *shader)
{
nir_foreach_function_impl(impl, shader) {
nir_foreach_block(block, impl) {
nir_foreach_instr(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic == nir_intrinsic_load_scratch_base_ptr)
return false;
}
}
}
return true;
}
/** Insert the appropriate return instruction at the end of the shader */
void
brw_nir_lower_shader_returns(nir_shader *shader)
{
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
/* Reserve scratch space at the start of the shader's per-thread scratch
* space for the return BINDLESS_SHADER_RECORD address and data payload.
* When a shader is called, the calling shader will write the return BSR
* address in this region of the callee's scratch space.
*
* We could also put it at the end of the caller's scratch space. However,
* doing this way means that a shader never accesses its caller's scratch
* space unless given an explicit pointer (such as for ray payloads). It
* also makes computing the address easier given that we want to apply an
* alignment to the scratch offset to ensure we can make alignment
* assumptions in the called shader.
*
* This isn't needed for ray-gen shaders because they end the thread and
* never return to the calling trampoline shader.
*/
assert(no_load_scratch_base_ptr_intrinsic(shader));
if (shader->info.stage != MESA_SHADER_RAYGEN)
shader->scratch_size += BRW_BTD_STACK_CALLEE_DATA_SIZE;
nir_builder b = nir_builder_create(impl);
set_foreach(impl->end_block->predecessors, block_entry) {
struct nir_block *block = (void *)block_entry->key;
b.cursor = nir_after_block_before_jump(block);
switch (shader->info.stage) {
case MESA_SHADER_RAYGEN:
/* A raygen shader is always the root of the shader call tree. When
* it ends, we retire the bindless stack ID and no further shaders
* will be executed.
*/
assert(impl->end_block->predecessors->entries == 1);
brw_nir_btd_retire(&b);
break;
case MESA_SHADER_ANY_HIT:
/* The default action of an any-hit shader is to accept the ray
* intersection. Any-hit shaders may have more than one exit. Only
* the final "normal" exit will actually need to accept the
* intersection as any others should come from nir_jump_halt
* instructions inserted after ignore_ray_intersection or
* terminate_ray or the like. However, inserting an accept after
* the ignore or terminate is safe because it'll get deleted later.
*/
nir_accept_ray_intersection(&b);
break;
case MESA_SHADER_CALLABLE:
case MESA_SHADER_MISS:
case MESA_SHADER_CLOSEST_HIT:
/* Callable, miss, and closest-hit shaders don't take any special
* action at the end. They simply return back to the previous shader
* in the call stack.
*/
assert(impl->end_block->predecessors->entries == 1);
brw_nir_btd_return(&b);
break;
case MESA_SHADER_INTERSECTION:
/* This will be handled by brw_nir_lower_intersection_shader */
break;
default:
unreachable("Invalid callable shader stage");
}
}
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
}
static void
store_resume_addr(nir_builder *b, nir_intrinsic_instr *call)
{
uint32_t call_idx = nir_intrinsic_call_idx(call);
uint32_t offset = nir_intrinsic_stack_size(call);
/* First thing on the called shader's stack is the resume address
* followed by a pointer to the payload.
*/
nir_def *resume_record_addr =
nir_iadd_imm(b, nir_load_btd_resume_sbt_addr_intel(b),
call_idx * BRW_BTD_RESUME_SBT_STRIDE);
/* By the time we get here, any remaining shader/function memory
* pointers have been lowered to SSA values.
*/
nir_def *payload_addr =
nir_get_shader_call_payload_src(call)->ssa;
brw_nir_rt_store_scratch(b, offset, BRW_BTD_STACK_ALIGN,
nir_vec2(b, resume_record_addr, payload_addr),
0xf /* write_mask */);
nir_btd_stack_push_intel(b, offset);
}
static bool
lower_shader_trace_ray_instr(struct nir_builder *b, nir_instr *instr, void *data)
{
struct brw_bs_prog_key *key = data;
if (instr->type != nir_instr_type_intrinsic)
return false;
/* Leave nir_intrinsic_rt_resume to be lowered by
* brw_nir_lower_rt_intrinsics()
*/
nir_intrinsic_instr *call = nir_instr_as_intrinsic(instr);
if (call->intrinsic != nir_intrinsic_rt_trace_ray)
return false;
b->cursor = nir_instr_remove(instr);
store_resume_addr(b, call);
nir_def *as_addr = call->src[0].ssa;
nir_def *ray_flags = call->src[1].ssa;
/* From the SPIR-V spec:
*
* "Only the 8 least-significant bits of Cull Mask are used by this
* instruction - other bits are ignored.
*
* Only the 4 least-significant bits of SBT Offset and SBT Stride are
* used by this instruction - other bits are ignored.
*
* Only the 16 least-significant bits of Miss Index are used by this
* instruction - other bits are ignored."
*/
nir_def *cull_mask = nir_iand_imm(b, call->src[2].ssa, 0xff);
nir_def *sbt_offset = nir_iand_imm(b, call->src[3].ssa, 0xf);
nir_def *sbt_stride = nir_iand_imm(b, call->src[4].ssa, 0xf);
nir_def *miss_index = nir_iand_imm(b, call->src[5].ssa, 0xffff);
nir_def *ray_orig = call->src[6].ssa;
nir_def *ray_t_min = call->src[7].ssa;
nir_def *ray_dir = call->src[8].ssa;
nir_def *ray_t_max = call->src[9].ssa;
nir_def *root_node_ptr =
brw_nir_rt_acceleration_structure_to_root_node(b, as_addr);
/* The hardware packet requires an address to the first element of the
* hit SBT.
*
* In order to calculate this, we must multiply the "SBT Offset"
* provided to OpTraceRay by the SBT stride provided for the hit SBT in
* the call to vkCmdTraceRay() and add that to the base address of the
* hit SBT. This stride is not to be confused with the "SBT Stride"
* provided to OpTraceRay which is in units of this stride. It's a
* rather terrible overload of the word "stride". The hardware docs
* calls the SPIR-V stride value the "shader index multiplier" which is
* a much more sane name.
*/
nir_def *hit_sbt_stride_B =
nir_load_ray_hit_sbt_stride_intel(b);
nir_def *hit_sbt_offset_B =
nir_imul(b, sbt_offset, nir_u2u32(b, hit_sbt_stride_B));
nir_def *hit_sbt_addr =
nir_iadd(b, nir_load_ray_hit_sbt_addr_intel(b),
nir_u2u64(b, hit_sbt_offset_B));
/* The hardware packet takes an address to the miss BSR. */
nir_def *miss_sbt_stride_B =
nir_load_ray_miss_sbt_stride_intel(b);
nir_def *miss_sbt_offset_B =
nir_imul(b, miss_index, nir_u2u32(b, miss_sbt_stride_B));
nir_def *miss_sbt_addr =
nir_iadd(b, nir_load_ray_miss_sbt_addr_intel(b),
nir_u2u64(b, miss_sbt_offset_B));
struct brw_nir_rt_mem_ray_defs ray_defs = {
.root_node_ptr = root_node_ptr,
/* Combine the shader value given to traceRayEXT() with the pipeline
* creation value VkPipelineCreateFlags.
*/
.ray_flags = nir_ior_imm(b, nir_u2u16(b, ray_flags), key->pipeline_ray_flags),
.ray_mask = cull_mask,
.hit_group_sr_base_ptr = hit_sbt_addr,
.hit_group_sr_stride = nir_u2u16(b, hit_sbt_stride_B),
.miss_sr_ptr = miss_sbt_addr,
.orig = ray_orig,
.t_near = ray_t_min,
.dir = ray_dir,
.t_far = ray_t_max,
.shader_index_multiplier = sbt_stride,
/* The instance leaf pointer is unused in the top level BVH traversal
* since we always start from the root node. We can reuse that field to
* store the ray_flags handed to traceRayEXT(). This will be reloaded
* when the shader accesses gl_IncomingRayFlagsEXT (see
* nir_intrinsic_load_ray_flags brw_nir_lower_rt_intrinsic.c)
*/
.inst_leaf_ptr = nir_u2u64(b, ray_flags),
};
brw_nir_rt_store_mem_ray(b, &ray_defs, BRW_RT_BVH_LEVEL_WORLD);
nir_trace_ray_intel(b,
nir_load_btd_global_arg_addr_intel(b),
nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD),
nir_imm_int(b, GEN_RT_TRACE_RAY_INITAL),
.synchronous = false);
return true;
}
static bool
lower_shader_call_instr(struct nir_builder *b, nir_intrinsic_instr *call,
void *data)
{
if (call->intrinsic != nir_intrinsic_rt_execute_callable)
return false;
b->cursor = nir_instr_remove(&call->instr);
store_resume_addr(b, call);
nir_def *sbt_offset32 =
nir_imul(b, call->src[0].ssa,
nir_u2u32(b, nir_load_callable_sbt_stride_intel(b)));
nir_def *sbt_addr =
nir_iadd(b, nir_load_callable_sbt_addr_intel(b),
nir_u2u64(b, sbt_offset32));
brw_nir_btd_spawn(b, sbt_addr);
return true;
}
bool
brw_nir_lower_shader_calls(nir_shader *shader, struct brw_bs_prog_key *key)
{
bool a = nir_shader_instructions_pass(shader,
lower_shader_trace_ray_instr,
nir_metadata_none,
key);
bool b = nir_shader_intrinsics_pass(shader, lower_shader_call_instr,
nir_metadata_block_index |
nir_metadata_dominance,
NULL);
return a || b;
}
/** Creates a trivial return shader
*
* In most cases this shader doesn't actually do anything. It just needs to
* return to the caller.
*
* By default, our HW has the ability to handle the fact that a shader is not
* available and will execute the next following shader in the tracing call.
* For instance, a RAYGEN shader traces a ray, the tracing generates a hit,
* but there is no ANYHIT shader available. The HW should follow up by
* execution the CLOSESTHIT shader.
*
* This default behavior can be changed through the RT_CTRL register
* (privileged access) and when NULL shader checks are disabled, the HW will
* instead call the call stack handler (this shader). This is what i915 is
* doing as part of Wa_14013202645.
*
* In order to ensure the call to the CLOSESTHIT shader, this shader needs to
* commit the ray and will not proceed with the BTD return. Similarly when the
* same thing happen with the INTERSECTION shader, we should just carry on the
* ray traversal with the continue operation.
*
*/
nir_shader *
brw_nir_create_trivial_return_shader(const struct brw_compiler *compiler,
void *mem_ctx)
{
const nir_shader_compiler_options *nir_options =
compiler->nir_options[MESA_SHADER_CALLABLE];
nir_builder _b = nir_builder_init_simple_shader(MESA_SHADER_CALLABLE,
nir_options,
"RT Trivial Return");
nir_builder *b = &_b;
ralloc_steal(mem_ctx, b->shader);
nir_shader *nir = b->shader;
NIR_PASS_V(nir, brw_nir_lower_shader_returns);
return nir;
}

View file

@ -1,536 +0,0 @@
/*
* Copyright © 2020 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "intel_nir.h"
#include "brw_nir_rt.h"
#include "brw_nir_rt_builder.h"
#include "intel_nir.h"
static bool
resize_deref(nir_builder *b, nir_deref_instr *deref,
unsigned num_components, unsigned bit_size)
{
if (deref->def.num_components == num_components &&
deref->def.bit_size == bit_size)
return false;
/* NIR requires array indices have to match the deref bit size */
if (deref->def.bit_size != bit_size &&
(deref->deref_type == nir_deref_type_array ||
deref->deref_type == nir_deref_type_ptr_as_array)) {
b->cursor = nir_before_instr(&deref->instr);
nir_def *idx;
if (nir_src_is_const(deref->arr.index)) {
idx = nir_imm_intN_t(b, nir_src_as_int(deref->arr.index), bit_size);
} else {
idx = nir_i2iN(b, deref->arr.index.ssa, bit_size);
}
nir_src_rewrite(&deref->arr.index, idx);
}
deref->def.num_components = num_components;
deref->def.bit_size = bit_size;
return true;
}
static bool
lower_rt_io_derefs(nir_shader *shader)
{
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
bool progress = false;
unsigned num_shader_call_vars = 0;
nir_foreach_variable_with_modes(var, shader, nir_var_shader_call_data)
num_shader_call_vars++;
unsigned num_ray_hit_attrib_vars = 0;
nir_foreach_variable_with_modes(var, shader, nir_var_ray_hit_attrib)
num_ray_hit_attrib_vars++;
/* At most one payload is allowed because it's an input. Technically, this
* is also true for hit attribute variables. However, after we inline an
* any-hit shader into an intersection shader, we can end up with multiple
* hit attribute variables. They'll end up mapping to a cast from the same
* base pointer so this is fine.
*/
assert(num_shader_call_vars <= 1);
nir_builder b = nir_builder_at(nir_before_impl(impl));
nir_def *call_data_addr = NULL;
if (num_shader_call_vars > 0) {
assert(shader->scratch_size >= BRW_BTD_STACK_CALLEE_DATA_SIZE);
call_data_addr =
brw_nir_rt_load_scratch(&b, BRW_BTD_STACK_CALL_DATA_PTR_OFFSET, 8,
1, 64);
progress = true;
}
gl_shader_stage stage = shader->info.stage;
nir_def *hit_attrib_addr = NULL;
if (num_ray_hit_attrib_vars > 0) {
assert(stage == MESA_SHADER_ANY_HIT ||
stage == MESA_SHADER_CLOSEST_HIT ||
stage == MESA_SHADER_INTERSECTION);
nir_def *hit_addr =
brw_nir_rt_mem_hit_addr(&b, stage == MESA_SHADER_CLOSEST_HIT);
/* The vec2 barycentrics are in 2nd and 3rd dwords of MemHit */
nir_def *bary_addr = nir_iadd_imm(&b, hit_addr, 4);
hit_attrib_addr = nir_bcsel(&b, nir_load_leaf_procedural_intel(&b),
brw_nir_rt_hit_attrib_data_addr(&b),
bary_addr);
progress = true;
}
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_deref)
continue;
nir_deref_instr *deref = nir_instr_as_deref(instr);
if (nir_deref_mode_is(deref, nir_var_shader_call_data)) {
deref->modes = nir_var_function_temp;
if (deref->deref_type == nir_deref_type_var) {
b.cursor = nir_before_instr(&deref->instr);
nir_deref_instr *cast =
nir_build_deref_cast(&b, call_data_addr,
nir_var_function_temp,
deref->var->type, 0);
nir_def_rewrite_uses(&deref->def,
&cast->def);
nir_instr_remove(&deref->instr);
progress = true;
}
} else if (nir_deref_mode_is(deref, nir_var_ray_hit_attrib)) {
deref->modes = nir_var_function_temp;
if (deref->deref_type == nir_deref_type_var) {
b.cursor = nir_before_instr(&deref->instr);
nir_deref_instr *cast =
nir_build_deref_cast(&b, hit_attrib_addr,
nir_var_function_temp,
deref->type, 0);
nir_def_rewrite_uses(&deref->def,
&cast->def);
nir_instr_remove(&deref->instr);
progress = true;
}
}
/* We're going to lower all function_temp memory to scratch using
* 64-bit addresses. We need to resize all our derefs first or else
* nir_lower_explicit_io will have a fit.
*/
if (nir_deref_mode_is(deref, nir_var_function_temp) &&
resize_deref(&b, deref, 1, 64))
progress = true;
}
}
if (progress) {
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
} else {
nir_metadata_preserve(impl, nir_metadata_all);
}
return progress;
}
/** Lowers ray-tracing shader I/O and scratch access
*
* SPV_KHR_ray_tracing adds three new types of I/O, each of which need their
* own bit of special care:
*
* - Shader payload data: This is represented by the IncomingCallableData
* and IncomingRayPayload storage classes which are both represented by
* nir_var_call_data in NIR. There is at most one of these per-shader and
* they contain payload data passed down the stack from the parent shader
* when it calls executeCallable() or traceRay(). In our implementation,
* the actual storage lives in the calling shader's scratch space and we're
* passed a pointer to it.
*
* - Hit attribute data: This is represented by the HitAttribute storage
* class in SPIR-V and nir_var_ray_hit_attrib in NIR. For triangle
* geometry, it's supposed to contain two floats which are the barycentric
* coordinates. For AABS/procedural geometry, it contains the hit data
* written out by the intersection shader. In our implementation, it's a
* 64-bit pointer which points either to the u/v area of the relevant
* MemHit data structure or the space right after the HW ray stack entry.
*
* - Shader record buffer data: This allows read-only access to the data
* stored in the SBT right after the bindless shader handles. It's
* effectively a UBO with a magic address. Coming out of spirv_to_nir,
* we get a nir_intrinsic_load_shader_record_ptr which is cast to a
* nir_var_mem_global deref and all access happens through that. The
* shader_record_ptr system value is handled in brw_nir_lower_rt_intrinsics
* and we assume nir_lower_explicit_io is called elsewhere thanks to
* VK_KHR_buffer_device_address so there's really nothing to do here.
*
* We also handle lowering any remaining function_temp variables to scratch at
* this point. This gets rid of any remaining arrays and also takes care of
* the sending side of ray payloads where we pass pointers to a function_temp
* variable down the call stack.
*/
static void
lower_rt_io_and_scratch(nir_shader *nir)
{
/* First, we to ensure all the I/O variables have explicit types. Because
* these are shader-internal and don't come in from outside, they don't
* have an explicit memory layout and we have to assign them one.
*/
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
nir_var_function_temp |
nir_var_shader_call_data |
nir_var_ray_hit_attrib,
glsl_get_natural_size_align_bytes);
/* Now patch any derefs to I/O vars */
NIR_PASS_V(nir, lower_rt_io_derefs);
/* Finally, lower any remaining function_temp, mem_constant, or
* ray_hit_attrib access to 64-bit global memory access.
*/
NIR_PASS_V(nir, nir_lower_explicit_io,
nir_var_function_temp |
nir_var_mem_constant |
nir_var_ray_hit_attrib,
nir_address_format_64bit_global);
}
static void
build_terminate_ray(nir_builder *b)
{
nir_def *skip_closest_hit = nir_test_mask(b, nir_load_ray_flags(b),
BRW_RT_RAY_FLAG_SKIP_CLOSEST_HIT_SHADER);
nir_push_if(b, skip_closest_hit);
{
/* The shader that calls traceRay() is unable to access any ray hit
* information except for that which is explicitly written into the ray
* payload by shaders invoked during the trace. If there's no closest-
* hit shader, then accepting the hit has no observable effect; it's
* just extra memory traffic for no reason.
*/
brw_nir_btd_return(b);
nir_jump(b, nir_jump_halt);
}
nir_push_else(b, NULL);
{
/* The closest hit shader is in the same shader group as the any-hit
* shader that we're currently in. We can get the address for its SBT
* handle by looking at the shader record pointer and subtracting the
* size of a SBT handle. The BINDLESS_SHADER_RECORD for a closest hit
* shader is the first one in the SBT handle.
*/
nir_def *closest_hit =
nir_iadd_imm(b, nir_load_shader_record_ptr(b),
-BRW_RT_SBT_HANDLE_SIZE);
brw_nir_rt_commit_hit(b);
brw_nir_btd_spawn(b, closest_hit);
nir_jump(b, nir_jump_halt);
}
nir_pop_if(b, NULL);
}
/** Lowers away ray walk intrinsics
*
* This lowers terminate_ray, ignore_ray_intersection, and the NIR-specific
* accept_ray_intersection intrinsics to the appropriate Intel-specific
* intrinsics.
*/
static bool
lower_ray_walk_intrinsics(nir_shader *shader,
const struct intel_device_info *devinfo)
{
assert(shader->info.stage == MESA_SHADER_ANY_HIT ||
shader->info.stage == MESA_SHADER_INTERSECTION);
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
nir_builder b = nir_builder_create(impl);
bool progress = false;
nir_foreach_block_safe(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_ignore_ray_intersection: {
b.cursor = nir_instr_remove(&intrin->instr);
/* We put the newly emitted code inside a dummy if because it's
* going to contain a jump instruction and we don't want to deal
* with that mess here. It'll get dealt with by our control-flow
* optimization passes.
*/
nir_push_if(&b, nir_imm_true(&b));
nir_trace_ray_intel(&b,
nir_load_btd_global_arg_addr_intel(&b),
nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT),
nir_imm_int(&b, GEN_RT_TRACE_RAY_CONTINUE),
.synchronous = false);
nir_jump(&b, nir_jump_halt);
nir_pop_if(&b, NULL);
progress = true;
break;
}
case nir_intrinsic_accept_ray_intersection: {
b.cursor = nir_instr_remove(&intrin->instr);
nir_def *terminate = nir_test_mask(&b, nir_load_ray_flags(&b),
BRW_RT_RAY_FLAG_TERMINATE_ON_FIRST_HIT);
nir_push_if(&b, terminate);
{
build_terminate_ray(&b);
}
nir_push_else(&b, NULL);
{
nir_trace_ray_intel(&b,
nir_load_btd_global_arg_addr_intel(&b),
nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT),
nir_imm_int(&b, GEN_RT_TRACE_RAY_COMMIT),
.synchronous = false);
nir_jump(&b, nir_jump_halt);
}
nir_pop_if(&b, NULL);
progress = true;
break;
}
case nir_intrinsic_terminate_ray: {
b.cursor = nir_instr_remove(&intrin->instr);
build_terminate_ray(&b);
progress = true;
break;
}
default:
break;
}
}
}
if (progress) {
nir_metadata_preserve(impl, nir_metadata_none);
} else {
nir_metadata_preserve(impl, nir_metadata_all);
}
return progress;
}
void
brw_nir_lower_raygen(nir_shader *nir)
{
assert(nir->info.stage == MESA_SHADER_RAYGEN);
NIR_PASS_V(nir, brw_nir_lower_shader_returns);
lower_rt_io_and_scratch(nir);
}
void
brw_nir_lower_any_hit(nir_shader *nir, const struct intel_device_info *devinfo)
{
assert(nir->info.stage == MESA_SHADER_ANY_HIT);
NIR_PASS_V(nir, brw_nir_lower_shader_returns);
NIR_PASS_V(nir, lower_ray_walk_intrinsics, devinfo);
lower_rt_io_and_scratch(nir);
}
void
brw_nir_lower_closest_hit(nir_shader *nir)
{
assert(nir->info.stage == MESA_SHADER_CLOSEST_HIT);
NIR_PASS_V(nir, brw_nir_lower_shader_returns);
lower_rt_io_and_scratch(nir);
}
void
brw_nir_lower_miss(nir_shader *nir)
{
assert(nir->info.stage == MESA_SHADER_MISS);
NIR_PASS_V(nir, brw_nir_lower_shader_returns);
lower_rt_io_and_scratch(nir);
}
void
brw_nir_lower_callable(nir_shader *nir)
{
assert(nir->info.stage == MESA_SHADER_CALLABLE);
NIR_PASS_V(nir, brw_nir_lower_shader_returns);
lower_rt_io_and_scratch(nir);
}
void
brw_nir_lower_combined_intersection_any_hit(nir_shader *intersection,
const nir_shader *any_hit,
const struct intel_device_info *devinfo)
{
assert(intersection->info.stage == MESA_SHADER_INTERSECTION);
assert(any_hit == NULL || any_hit->info.stage == MESA_SHADER_ANY_HIT);
NIR_PASS_V(intersection, brw_nir_lower_shader_returns);
NIR_PASS_V(intersection, brw_nir_lower_intersection_shader,
any_hit, devinfo);
NIR_PASS_V(intersection, lower_ray_walk_intrinsics, devinfo);
lower_rt_io_and_scratch(intersection);
}
static nir_def *
build_load_uniform(nir_builder *b, unsigned offset,
unsigned num_components, unsigned bit_size)
{
return nir_load_uniform(b, num_components, bit_size, nir_imm_int(b, 0),
.base = offset,
.range = num_components * bit_size / 8);
}
#define load_trampoline_param(b, name, num_components, bit_size) \
build_load_uniform((b), offsetof(struct brw_rt_raygen_trampoline_params, name), \
(num_components), (bit_size))
nir_shader *
brw_nir_create_raygen_trampoline(const struct brw_compiler *compiler,
void *mem_ctx)
{
const struct intel_device_info *devinfo = compiler->devinfo;
const nir_shader_compiler_options *nir_options =
compiler->nir_options[MESA_SHADER_COMPUTE];
STATIC_ASSERT(sizeof(struct brw_rt_raygen_trampoline_params) == 32);
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
nir_options,
"RT Ray-Gen Trampoline");
ralloc_steal(mem_ctx, b.shader);
b.shader->info.workgroup_size_variable = true;
/* The RT global data and raygen BINDLESS_SHADER_RECORD addresses are
* passed in as push constants in the first register. We deal with the
* raygen BSR address here; the global data we'll deal with later.
*/
b.shader->num_uniforms = 32;
nir_def *raygen_param_bsr_addr =
load_trampoline_param(&b, raygen_bsr_addr, 1, 64);
nir_def *is_indirect =
nir_i2b(&b, load_trampoline_param(&b, is_indirect, 1, 8));
nir_def *local_shift =
nir_u2u32(&b, load_trampoline_param(&b, local_group_size_log2, 3, 8));
nir_def *raygen_indirect_bsr_addr;
nir_push_if(&b, is_indirect);
{
raygen_indirect_bsr_addr =
nir_load_global_constant(&b, raygen_param_bsr_addr,
8 /* align */,
1 /* components */,
64 /* bit_size */);
}
nir_pop_if(&b, NULL);
nir_def *raygen_bsr_addr =
nir_if_phi(&b, raygen_indirect_bsr_addr, raygen_param_bsr_addr);
nir_def *global_id = nir_load_workgroup_id_zero_base(&b);
nir_def *simd_channel = nir_load_subgroup_invocation(&b);
nir_def *local_x =
nir_ubfe(&b, simd_channel, nir_imm_int(&b, 0),
nir_channel(&b, local_shift, 0));
nir_def *local_y =
nir_ubfe(&b, simd_channel, nir_channel(&b, local_shift, 0),
nir_channel(&b, local_shift, 1));
nir_def *local_z =
nir_ubfe(&b, simd_channel,
nir_iadd(&b, nir_channel(&b, local_shift, 0),
nir_channel(&b, local_shift, 1)),
nir_channel(&b, local_shift, 2));
nir_def *launch_id =
nir_iadd(&b, nir_ishl(&b, global_id, local_shift),
nir_vec3(&b, local_x, local_y, local_z));
nir_def *launch_size = nir_load_ray_launch_size(&b);
nir_push_if(&b, nir_ball(&b, nir_ult(&b, launch_id, launch_size)));
{
nir_store_global(&b, brw_nir_rt_sw_hotzone_addr(&b, devinfo), 16,
nir_vec4(&b, nir_imm_int(&b, 0), /* Stack ptr */
nir_channel(&b, launch_id, 0),
nir_channel(&b, launch_id, 1),
nir_channel(&b, launch_id, 2)),
0xf /* write mask */);
brw_nir_btd_spawn(&b, raygen_bsr_addr);
}
nir_push_else(&b, NULL);
{
/* Even though these invocations aren't being used for anything, the
* hardware allocated stack IDs for them. They need to retire them.
*/
brw_nir_btd_retire(&b);
}
nir_pop_if(&b, NULL);
nir_shader *nir = b.shader;
nir->info.name = ralloc_strdup(nir, "RT: TraceRay trampoline");
nir_validate_shader(nir, "in brw_nir_create_raygen_trampoline");
struct brw_nir_compiler_opts opts = {};
brw_preprocess_nir(compiler, nir, &opts);
NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);
b = nir_builder_create(nir_shader_get_entrypoint(b.shader));
/* brw_nir_lower_rt_intrinsics will leave us with a btd_global_arg_addr
* intrinsic which doesn't exist in compute shaders. We also created one
* above when we generated the BTD spawn intrinsic. Now we go through and
* replace them with a uniform load.
*/
nir_foreach_block(block, b.impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_btd_global_arg_addr_intel)
continue;
b.cursor = nir_before_instr(&intrin->instr);
nir_def *global_arg_addr =
load_trampoline_param(&b, rt_disp_globals_addr, 1, 64);
nir_def_rewrite_uses(&intrin->def,
global_arg_addr);
nir_instr_remove(instr);
}
}
NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
const bool is_scalar = true;
brw_nir_optimize(nir, is_scalar, devinfo);
return nir;
}

View file

@ -1,76 +0,0 @@
/*
* Copyright © 2020 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef BRW_NIR_RT_H
#define BRW_NIR_RT_H
#include "brw_nir.h"
#include "brw_rt.h"
#ifdef __cplusplus
extern "C" {
#endif
void brw_nir_lower_raygen(nir_shader *nir);
void brw_nir_lower_any_hit(nir_shader *nir,
const struct intel_device_info *devinfo);
void brw_nir_lower_closest_hit(nir_shader *nir);
void brw_nir_lower_miss(nir_shader *nir);
void brw_nir_lower_callable(nir_shader *nir);
void brw_nir_lower_combined_intersection_any_hit(nir_shader *intersection,
const nir_shader *any_hit,
const struct intel_device_info *devinfo);
/* We reserve the first 16B of the stack for callee data pointers */
#define BRW_BTD_STACK_RESUME_BSR_ADDR_OFFSET 0
#define BRW_BTD_STACK_CALL_DATA_PTR_OFFSET 8
#define BRW_BTD_STACK_CALLEE_DATA_SIZE 16
/* We require the stack to be 8B aligned at the start of a shader */
#define BRW_BTD_STACK_ALIGN 8
bool brw_nir_lower_ray_queries(nir_shader *shader,
const struct intel_device_info *devinfo);
void brw_nir_lower_shader_returns(nir_shader *shader);
bool brw_nir_lower_shader_calls(nir_shader *shader, struct brw_bs_prog_key *key);
void brw_nir_lower_rt_intrinsics(nir_shader *shader,
const struct intel_device_info *devinfo);
void brw_nir_lower_intersection_shader(nir_shader *intersection,
const nir_shader *any_hit,
const struct intel_device_info *devinfo);
nir_shader *
brw_nir_create_raygen_trampoline(const struct brw_compiler *compiler,
void *mem_ctx);
nir_shader *
brw_nir_create_trivial_return_shader(const struct brw_compiler *compiler,
void *mem_ctx);
#ifdef __cplusplus
}
#endif
#endif /* BRW_NIR_RT_H */

View file

@ -1,990 +0,0 @@
/*
* Copyright © 2020 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef BRW_NIR_RT_BUILDER_H
#define BRW_NIR_RT_BUILDER_H
/* This file provides helpers to access memory based data structures that the
* RT hardware reads/writes and their locations.
*
* See also "Memory Based Data Structures for Ray Tracing" (BSpec 47547) and
* "Ray Tracing Address Computation for Memory Resident Structures" (BSpec
* 47550).
*/
#include "brw_rt.h"
#include "nir_builder.h"
#define is_access_for_builder(b) \
((b)->shader->info.stage == MESA_SHADER_FRAGMENT ? \
ACCESS_INCLUDE_HELPERS : 0)
static inline nir_def *
brw_nir_rt_load(nir_builder *b, nir_def *addr, unsigned align,
unsigned components, unsigned bit_size)
{
return nir_build_load_global(b, components, bit_size, addr,
.align_mul = align,
.access = is_access_for_builder(b));
}
static inline void
brw_nir_rt_store(nir_builder *b, nir_def *addr, unsigned align,
nir_def *value, unsigned write_mask)
{
nir_build_store_global(b, value, addr,
.align_mul = align,
.write_mask = (write_mask) &
BITFIELD_MASK(value->num_components),
.access = is_access_for_builder(b));
}
static inline nir_def *
brw_nir_rt_load_const(nir_builder *b, unsigned components,
nir_def *addr, nir_def *pred)
{
return nir_load_global_const_block_intel(b, components, addr, pred);
}
static inline nir_def *
brw_load_btd_dss_id(nir_builder *b)
{
return nir_load_topology_id_intel(b, .base = BRW_TOPOLOGY_ID_DSS);
}
static inline nir_def *
brw_nir_rt_load_num_simd_lanes_per_dss(nir_builder *b,
const struct intel_device_info *devinfo)
{
return nir_imm_int(b, devinfo->num_thread_per_eu *
devinfo->max_eus_per_subslice *
16 /* The RT computation is based off SIMD16 */);
}
static inline nir_def *
brw_load_eu_thread_simd(nir_builder *b)
{
return nir_load_topology_id_intel(b, .base = BRW_TOPOLOGY_ID_EU_THREAD_SIMD);
}
static inline nir_def *
brw_nir_rt_async_stack_id(nir_builder *b)
{
return nir_iadd(b, nir_umul_32x16(b, nir_load_ray_num_dss_rt_stacks_intel(b),
brw_load_btd_dss_id(b)),
nir_load_btd_stack_id_intel(b));
}
static inline nir_def *
brw_nir_rt_sync_stack_id(nir_builder *b)
{
return brw_load_eu_thread_simd(b);
}
/* We have our own load/store scratch helpers because they emit a global
* memory read or write based on the scratch_base_ptr system value rather
* than a load/store_scratch intrinsic.
*/
static inline nir_def *
brw_nir_rt_load_scratch(nir_builder *b, uint32_t offset, unsigned align,
unsigned num_components, unsigned bit_size)
{
nir_def *addr =
nir_iadd_imm(b, nir_load_scratch_base_ptr(b, 1, 64, 1), offset);
return brw_nir_rt_load(b, addr, MIN2(align, BRW_BTD_STACK_ALIGN),
num_components, bit_size);
}
static inline void
brw_nir_rt_store_scratch(nir_builder *b, uint32_t offset, unsigned align,
nir_def *value, nir_component_mask_t write_mask)
{
nir_def *addr =
nir_iadd_imm(b, nir_load_scratch_base_ptr(b, 1, 64, 1), offset);
brw_nir_rt_store(b, addr, MIN2(align, BRW_BTD_STACK_ALIGN),
value, write_mask);
}
static inline void
brw_nir_btd_spawn(nir_builder *b, nir_def *record_addr)
{
nir_btd_spawn_intel(b, nir_load_btd_global_arg_addr_intel(b), record_addr);
}
static inline void
brw_nir_btd_retire(nir_builder *b)
{
nir_btd_retire_intel(b);
}
/** This is a pseudo-op which does a bindless return
*
* It loads the return address from the stack and calls btd_spawn to spawn the
* resume shader.
*/
static inline void
brw_nir_btd_return(struct nir_builder *b)
{
nir_def *resume_addr =
brw_nir_rt_load_scratch(b, BRW_BTD_STACK_RESUME_BSR_ADDR_OFFSET,
8 /* align */, 1, 64);
brw_nir_btd_spawn(b, resume_addr);
}
static inline void
assert_def_size(nir_def *def, unsigned num_components, unsigned bit_size)
{
assert(def->num_components == num_components);
assert(def->bit_size == bit_size);
}
static inline nir_def *
brw_nir_num_rt_stacks(nir_builder *b,
const struct intel_device_info *devinfo)
{
return nir_imul_imm(b, nir_load_ray_num_dss_rt_stacks_intel(b),
intel_device_info_dual_subslice_id_bound(devinfo));
}
static inline nir_def *
brw_nir_rt_sw_hotzone_addr(nir_builder *b,
const struct intel_device_info *devinfo)
{
nir_def *offset32 =
nir_imul_imm(b, brw_nir_rt_async_stack_id(b),
BRW_RT_SIZEOF_HOTZONE);
offset32 = nir_iadd(b, offset32, nir_ineg(b,
nir_imul_imm(b, brw_nir_num_rt_stacks(b, devinfo),
BRW_RT_SIZEOF_HOTZONE)));
return nir_iadd(b, nir_load_ray_base_mem_addr_intel(b),
nir_i2i64(b, offset32));
}
static inline nir_def *
brw_nir_rt_sync_stack_addr(nir_builder *b,
nir_def *base_mem_addr,
const struct intel_device_info *devinfo)
{
/* For Ray queries (Synchronous Ray Tracing), the formula is similar but
* goes down from rtMemBasePtr :
*
* syncBase = RTDispatchGlobals.rtMemBasePtr
* - (DSSID * NUM_SIMD_LANES_PER_DSS + SyncStackID + 1)
* * syncStackSize
*
* We assume that we can calculate a 32-bit offset first and then add it
* to the 64-bit base address at the end.
*/
nir_def *offset32 =
nir_imul(b,
nir_iadd(b,
nir_imul(b, brw_load_btd_dss_id(b),
brw_nir_rt_load_num_simd_lanes_per_dss(b, devinfo)),
nir_iadd_imm(b, brw_nir_rt_sync_stack_id(b), 1)),
nir_imm_int(b, BRW_RT_SIZEOF_RAY_QUERY));
return nir_isub(b, base_mem_addr, nir_u2u64(b, offset32));
}
static inline nir_def *
brw_nir_rt_stack_addr(nir_builder *b)
{
/* From the BSpec "Address Computation for Memory Based Data Structures:
* Ray and TraversalStack (Async Ray Tracing)":
*
* stackBase = RTDispatchGlobals.rtMemBasePtr
* + (DSSID * RTDispatchGlobals.numDSSRTStacks + stackID)
* * RTDispatchGlobals.stackSizePerRay // 64B aligned
*
* We assume that we can calculate a 32-bit offset first and then add it
* to the 64-bit base address at the end.
*/
nir_def *offset32 =
nir_imul(b, brw_nir_rt_async_stack_id(b),
nir_load_ray_hw_stack_size_intel(b));
return nir_iadd(b, nir_load_ray_base_mem_addr_intel(b),
nir_u2u64(b, offset32));
}
static inline nir_def *
brw_nir_rt_mem_hit_addr_from_addr(nir_builder *b,
nir_def *stack_addr,
bool committed)
{
return nir_iadd_imm(b, stack_addr, committed ? 0 : BRW_RT_SIZEOF_HIT_INFO);
}
static inline nir_def *
brw_nir_rt_mem_hit_addr(nir_builder *b, bool committed)
{
return nir_iadd_imm(b, brw_nir_rt_stack_addr(b),
committed ? 0 : BRW_RT_SIZEOF_HIT_INFO);
}
static inline nir_def *
brw_nir_rt_hit_attrib_data_addr(nir_builder *b)
{
return nir_iadd_imm(b, brw_nir_rt_stack_addr(b),
BRW_RT_OFFSETOF_HIT_ATTRIB_DATA);
}
static inline nir_def *
brw_nir_rt_mem_ray_addr(nir_builder *b,
nir_def *stack_addr,
enum brw_rt_bvh_level bvh_level)
{
/* From the BSpec "Address Computation for Memory Based Data Structures:
* Ray and TraversalStack (Async Ray Tracing)":
*
* rayBase = stackBase + sizeof(HitInfo) * 2 // 64B aligned
* rayPtr = rayBase + bvhLevel * sizeof(Ray); // 64B aligned
*
* In Vulkan, we always have exactly two levels of BVH: World and Object.
*/
uint32_t offset = BRW_RT_SIZEOF_HIT_INFO * 2 +
bvh_level * BRW_RT_SIZEOF_RAY;
return nir_iadd_imm(b, stack_addr, offset);
}
static inline nir_def *
brw_nir_rt_sw_stack_addr(nir_builder *b,
const struct intel_device_info *devinfo)
{
nir_def *addr = nir_load_ray_base_mem_addr_intel(b);
nir_def *offset32 = nir_imul(b, brw_nir_num_rt_stacks(b, devinfo),
nir_load_ray_hw_stack_size_intel(b));
addr = nir_iadd(b, addr, nir_u2u64(b, offset32));
nir_def *offset_in_stack =
nir_imul(b, nir_u2u64(b, brw_nir_rt_async_stack_id(b)),
nir_u2u64(b, nir_load_ray_sw_stack_size_intel(b)));
return nir_iadd(b, addr, offset_in_stack);
}
static inline nir_def *
nir_unpack_64_4x16_split_z(nir_builder *b, nir_def *val)
{
return nir_unpack_32_2x16_split_x(b, nir_unpack_64_2x32_split_y(b, val));
}
struct brw_nir_rt_globals_defs {
nir_def *base_mem_addr;
nir_def *call_stack_handler_addr;
nir_def *hw_stack_size;
nir_def *num_dss_rt_stacks;
nir_def *hit_sbt_addr;
nir_def *hit_sbt_stride;
nir_def *miss_sbt_addr;
nir_def *miss_sbt_stride;
nir_def *sw_stack_size;
nir_def *launch_size;
nir_def *call_sbt_addr;
nir_def *call_sbt_stride;
nir_def *resume_sbt_addr;
};
static inline void
brw_nir_rt_load_globals_addr(nir_builder *b,
struct brw_nir_rt_globals_defs *defs,
nir_def *addr)
{
nir_def *data;
data = brw_nir_rt_load_const(b, 16, addr, nir_imm_true(b));
defs->base_mem_addr = nir_pack_64_2x32(b, nir_trim_vector(b, data, 2));
defs->call_stack_handler_addr =
nir_pack_64_2x32(b, nir_channels(b, data, 0x3 << 2));
defs->hw_stack_size = nir_channel(b, data, 4);
defs->num_dss_rt_stacks = nir_iand_imm(b, nir_channel(b, data, 5), 0xffff);
defs->hit_sbt_addr =
nir_pack_64_2x32_split(b, nir_channel(b, data, 8),
nir_extract_i16(b, nir_channel(b, data, 9),
nir_imm_int(b, 0)));
defs->hit_sbt_stride =
nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 9));
defs->miss_sbt_addr =
nir_pack_64_2x32_split(b, nir_channel(b, data, 10),
nir_extract_i16(b, nir_channel(b, data, 11),
nir_imm_int(b, 0)));
defs->miss_sbt_stride =
nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 11));
defs->sw_stack_size = nir_channel(b, data, 12);
defs->launch_size = nir_channels(b, data, 0x7u << 13);
data = brw_nir_rt_load_const(b, 8, nir_iadd_imm(b, addr, 64), nir_imm_true(b));
defs->call_sbt_addr =
nir_pack_64_2x32_split(b, nir_channel(b, data, 0),
nir_extract_i16(b, nir_channel(b, data, 1),
nir_imm_int(b, 0)));
defs->call_sbt_stride =
nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 1));
defs->resume_sbt_addr =
nir_pack_64_2x32(b, nir_channels(b, data, 0x3 << 2));
}
static inline void
brw_nir_rt_load_globals(nir_builder *b,
struct brw_nir_rt_globals_defs *defs)
{
brw_nir_rt_load_globals_addr(b, defs, nir_load_btd_global_arg_addr_intel(b));
}
static inline nir_def *
brw_nir_rt_unpack_leaf_ptr(nir_builder *b, nir_def *vec2)
{
/* Hit record leaf pointers are 42-bit and assumed to be in 64B chunks.
* This leaves 22 bits at the top for other stuff.
*/
nir_def *ptr64 = nir_imul_imm(b, nir_pack_64_2x32(b, vec2), 64);
/* The top 16 bits (remember, we shifted by 6 already) contain garbage
* that we need to get rid of.
*/
nir_def *ptr_lo = nir_unpack_64_2x32_split_x(b, ptr64);
nir_def *ptr_hi = nir_unpack_64_2x32_split_y(b, ptr64);
ptr_hi = nir_extract_i16(b, ptr_hi, nir_imm_int(b, 0));
return nir_pack_64_2x32_split(b, ptr_lo, ptr_hi);
}
/**
* MemHit memory layout (BSpec 47547) :
*
* name bits description
* - t 32 hit distance of current hit (or initial traversal distance)
* - u 32 barycentric hit coordinates
* - v 32 barycentric hit coordinates
* - primIndexDelta 16 prim index delta for compressed meshlets and quads
* - valid 1 set if there is a hit
* - leafType 3 type of node primLeafPtr is pointing to
* - primLeafIndex 4 index of the hit primitive inside the leaf
* - bvhLevel 3 the instancing level at which the hit occured
* - frontFace 1 whether we hit the front-facing side of a triangle (also used to pass opaque flag when calling intersection shaders)
* - pad0 4 unused bits
* - primLeafPtr 42 pointer to BVH leaf node (multiple of 64 bytes)
* - hitGroupRecPtr0 22 LSB of hit group record of the hit triangle (multiple of 16 bytes)
* - instLeafPtr 42 pointer to BVH instance leaf node (in multiple of 64 bytes)
* - hitGroupRecPtr1 22 MSB of hit group record of the hit triangle (multiple of 32 bytes)
*/
struct brw_nir_rt_mem_hit_defs {
nir_def *t;
nir_def *tri_bary; /**< Only valid for triangle geometry */
nir_def *aabb_hit_kind; /**< Only valid for AABB geometry */
nir_def *valid;
nir_def *leaf_type;
nir_def *prim_index_delta;
nir_def *prim_leaf_index;
nir_def *bvh_level;
nir_def *front_face;
nir_def *done; /**< Only for ray queries */
nir_def *prim_leaf_ptr;
nir_def *inst_leaf_ptr;
};
static inline void
brw_nir_rt_load_mem_hit_from_addr(nir_builder *b,
struct brw_nir_rt_mem_hit_defs *defs,
nir_def *stack_addr,
bool committed)
{
nir_def *hit_addr =
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, committed);
nir_def *data = brw_nir_rt_load(b, hit_addr, 16, 4, 32);
defs->t = nir_channel(b, data, 0);
defs->aabb_hit_kind = nir_channel(b, data, 1);
defs->tri_bary = nir_channels(b, data, 0x6);
nir_def *bitfield = nir_channel(b, data, 3);
defs->prim_index_delta =
nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 0), nir_imm_int(b, 16));
defs->valid = nir_i2b(b, nir_iand_imm(b, bitfield, 1u << 16));
defs->leaf_type =
nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 17), nir_imm_int(b, 3));
defs->prim_leaf_index =
nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 20), nir_imm_int(b, 4));
defs->bvh_level =
nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 24), nir_imm_int(b, 3));
defs->front_face = nir_i2b(b, nir_iand_imm(b, bitfield, 1 << 27));
defs->done = nir_i2b(b, nir_iand_imm(b, bitfield, 1 << 28));
data = brw_nir_rt_load(b, nir_iadd_imm(b, hit_addr, 16), 16, 4, 32);
defs->prim_leaf_ptr =
brw_nir_rt_unpack_leaf_ptr(b, nir_channels(b, data, 0x3 << 0));
defs->inst_leaf_ptr =
brw_nir_rt_unpack_leaf_ptr(b, nir_channels(b, data, 0x3 << 2));
}
static inline void
brw_nir_rt_load_mem_hit(nir_builder *b,
struct brw_nir_rt_mem_hit_defs *defs,
bool committed)
{
brw_nir_rt_load_mem_hit_from_addr(b, defs, brw_nir_rt_stack_addr(b),
committed);
}
static inline void
brw_nir_memcpy_global(nir_builder *b,
nir_def *dst_addr, uint32_t dst_align,
nir_def *src_addr, uint32_t src_align,
uint32_t size)
{
/* We're going to copy in 16B chunks */
assert(size % 16 == 0);
dst_align = MIN2(dst_align, 16);
src_align = MIN2(src_align, 16);
for (unsigned offset = 0; offset < size; offset += 16) {
nir_def *data =
brw_nir_rt_load(b, nir_iadd_imm(b, src_addr, offset), 16,
4, 32);
brw_nir_rt_store(b, nir_iadd_imm(b, dst_addr, offset), 16,
data, 0xf /* write_mask */);
}
}
static inline void
brw_nir_memclear_global(nir_builder *b,
nir_def *dst_addr, uint32_t dst_align,
uint32_t size)
{
/* We're going to copy in 16B chunks */
assert(size % 16 == 0);
dst_align = MIN2(dst_align, 16);
nir_def *zero = nir_imm_ivec4(b, 0, 0, 0, 0);
for (unsigned offset = 0; offset < size; offset += 16) {
brw_nir_rt_store(b, nir_iadd_imm(b, dst_addr, offset), dst_align,
zero, 0xf /* write_mask */);
}
}
static inline nir_def *
brw_nir_rt_query_done(nir_builder *b, nir_def *stack_addr)
{
struct brw_nir_rt_mem_hit_defs hit_in = {};
brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, stack_addr,
false /* committed */);
return hit_in.done;
}
static inline void
brw_nir_rt_set_dword_bit_at(nir_builder *b,
nir_def *addr,
uint32_t addr_offset,
uint32_t bit)
{
nir_def *dword_addr = nir_iadd_imm(b, addr, addr_offset);
nir_def *dword = brw_nir_rt_load(b, dword_addr, 4, 1, 32);
brw_nir_rt_store(b, dword_addr, 4, nir_ior_imm(b, dword, 1u << bit), 0x1);
}
static inline void
brw_nir_rt_query_mark_done(nir_builder *b, nir_def *stack_addr)
{
brw_nir_rt_set_dword_bit_at(b,
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr,
false /* committed */),
4 * 3 /* dword offset */, 28 /* bit */);
}
/* This helper clears the 3rd dword of the MemHit structure where the valid
* bit is located.
*/
static inline void
brw_nir_rt_query_mark_init(nir_builder *b, nir_def *stack_addr)
{
nir_def *dword_addr;
for (uint32_t i = 0; i < 2; i++) {
dword_addr =
nir_iadd_imm(b,
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr,
i == 0 /* committed */),
4 * 3 /* dword offset */);
brw_nir_rt_store(b, dword_addr, 4, nir_imm_int(b, 0), 0x1);
}
}
/* This helper is pretty much a memcpy of uncommitted into committed hit
* structure, just adding the valid bit.
*/
static inline void
brw_nir_rt_commit_hit_addr(nir_builder *b, nir_def *stack_addr)
{
nir_def *dst_addr =
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, true /* committed */);
nir_def *src_addr =
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, false /* committed */);
for (unsigned offset = 0; offset < BRW_RT_SIZEOF_HIT_INFO; offset += 16) {
nir_def *data =
brw_nir_rt_load(b, nir_iadd_imm(b, src_addr, offset), 16, 4, 32);
if (offset == 0) {
data = nir_vec4(b,
nir_channel(b, data, 0),
nir_channel(b, data, 1),
nir_channel(b, data, 2),
nir_ior_imm(b,
nir_channel(b, data, 3),
0x1 << 16 /* valid */));
/* Also write the potential hit as we change it. */
brw_nir_rt_store(b, nir_iadd_imm(b, src_addr, offset), 16,
data, 0xf /* write_mask */);
}
brw_nir_rt_store(b, nir_iadd_imm(b, dst_addr, offset), 16,
data, 0xf /* write_mask */);
}
}
static inline void
brw_nir_rt_commit_hit(nir_builder *b)
{
nir_def *stack_addr = brw_nir_rt_stack_addr(b);
brw_nir_rt_commit_hit_addr(b, stack_addr);
}
static inline void
brw_nir_rt_generate_hit_addr(nir_builder *b, nir_def *stack_addr, nir_def *t_val)
{
nir_def *committed_addr =
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, true /* committed */);
nir_def *potential_addr =
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, false /* committed */);
/* Set:
*
* potential.t = t_val;
* potential.valid = true;
*/
nir_def *potential_hit_dwords_0_3 =
brw_nir_rt_load(b, potential_addr, 16, 4, 32);
potential_hit_dwords_0_3 =
nir_vec4(b,
t_val,
nir_channel(b, potential_hit_dwords_0_3, 1),
nir_channel(b, potential_hit_dwords_0_3, 2),
nir_ior_imm(b, nir_channel(b, potential_hit_dwords_0_3, 3),
(0x1 << 16) /* valid */));
brw_nir_rt_store(b, potential_addr, 16, potential_hit_dwords_0_3, 0xf /* write_mask */);
/* Set:
*
* committed.t = t_val;
* committed.u = 0.0f;
* committed.v = 0.0f;
* committed.valid = true;
* committed.leaf_type = potential.leaf_type;
* committed.bvh_level = BRW_RT_BVH_LEVEL_OBJECT;
* committed.front_face = false;
* committed.prim_leaf_index = 0;
* committed.done = false;
*/
nir_def *committed_hit_dwords_0_3 =
brw_nir_rt_load(b, committed_addr, 16, 4, 32);
committed_hit_dwords_0_3 =
nir_vec4(b,
t_val,
nir_imm_float(b, 0.0f),
nir_imm_float(b, 0.0f),
nir_ior_imm(b,
nir_ior_imm(b, nir_channel(b, potential_hit_dwords_0_3, 3), 0x000e0000),
(0x1 << 16) /* valid */ |
(BRW_RT_BVH_LEVEL_OBJECT << 24) /* leaf_type */));
brw_nir_rt_store(b, committed_addr, 16, committed_hit_dwords_0_3, 0xf /* write_mask */);
/* Set:
*
* committed.prim_leaf_ptr = potential.prim_leaf_ptr;
* committed.inst_leaf_ptr = potential.inst_leaf_ptr;
*/
brw_nir_memcpy_global(b,
nir_iadd_imm(b, committed_addr, 16), 16,
nir_iadd_imm(b, potential_addr, 16), 16,
16);
}
struct brw_nir_rt_mem_ray_defs {
nir_def *orig;
nir_def *dir;
nir_def *t_near;
nir_def *t_far;
nir_def *root_node_ptr;
nir_def *ray_flags;
nir_def *hit_group_sr_base_ptr;
nir_def *hit_group_sr_stride;
nir_def *miss_sr_ptr;
nir_def *shader_index_multiplier;
nir_def *inst_leaf_ptr;
nir_def *ray_mask;
};
static inline void
brw_nir_rt_store_mem_ray_query_at_addr(nir_builder *b,
nir_def *ray_addr,
const struct brw_nir_rt_mem_ray_defs *defs)
{
assert_def_size(defs->orig, 3, 32);
assert_def_size(defs->dir, 3, 32);
brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 0), 16,
nir_vec4(b, nir_channel(b, defs->orig, 0),
nir_channel(b, defs->orig, 1),
nir_channel(b, defs->orig, 2),
nir_channel(b, defs->dir, 0)),
~0 /* write mask */);
assert_def_size(defs->t_near, 1, 32);
assert_def_size(defs->t_far, 1, 32);
brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 16), 16,
nir_vec4(b, nir_channel(b, defs->dir, 1),
nir_channel(b, defs->dir, 2),
defs->t_near,
defs->t_far),
~0 /* write mask */);
assert_def_size(defs->root_node_ptr, 1, 64);
assert_def_size(defs->ray_flags, 1, 16);
brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 32), 16,
nir_vec2(b, nir_unpack_64_2x32_split_x(b, defs->root_node_ptr),
nir_pack_32_2x16_split(b,
nir_unpack_64_4x16_split_z(b, defs->root_node_ptr),
defs->ray_flags)),
0x3 /* write mask */);
/* leaf_ptr is optional */
nir_def *inst_leaf_ptr;
if (defs->inst_leaf_ptr) {
inst_leaf_ptr = defs->inst_leaf_ptr;
} else {
inst_leaf_ptr = nir_imm_int64(b, 0);
}
assert_def_size(inst_leaf_ptr, 1, 64);
assert_def_size(defs->ray_mask, 1, 32);
brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 56), 8,
nir_vec2(b, nir_unpack_64_2x32_split_x(b, inst_leaf_ptr),
nir_pack_32_2x16_split(b,
nir_unpack_64_4x16_split_z(b, inst_leaf_ptr),
nir_unpack_32_2x16_split_x(b, defs->ray_mask))),
~0 /* write mask */);
}
static inline void
brw_nir_rt_store_mem_ray(nir_builder *b,
const struct brw_nir_rt_mem_ray_defs *defs,
enum brw_rt_bvh_level bvh_level)
{
nir_def *ray_addr =
brw_nir_rt_mem_ray_addr(b, brw_nir_rt_stack_addr(b), bvh_level);
assert_def_size(defs->orig, 3, 32);
assert_def_size(defs->dir, 3, 32);
brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 0), 16,
nir_vec4(b, nir_channel(b, defs->orig, 0),
nir_channel(b, defs->orig, 1),
nir_channel(b, defs->orig, 2),
nir_channel(b, defs->dir, 0)),
~0 /* write mask */);
assert_def_size(defs->t_near, 1, 32);
assert_def_size(defs->t_far, 1, 32);
brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 16), 16,
nir_vec4(b, nir_channel(b, defs->dir, 1),
nir_channel(b, defs->dir, 2),
defs->t_near,
defs->t_far),
~0 /* write mask */);
assert_def_size(defs->root_node_ptr, 1, 64);
assert_def_size(defs->ray_flags, 1, 16);
assert_def_size(defs->hit_group_sr_base_ptr, 1, 64);
assert_def_size(defs->hit_group_sr_stride, 1, 16);
brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 32), 16,
nir_vec4(b, nir_unpack_64_2x32_split_x(b, defs->root_node_ptr),
nir_pack_32_2x16_split(b,
nir_unpack_64_4x16_split_z(b, defs->root_node_ptr),
defs->ray_flags),
nir_unpack_64_2x32_split_x(b, defs->hit_group_sr_base_ptr),
nir_pack_32_2x16_split(b,
nir_unpack_64_4x16_split_z(b, defs->hit_group_sr_base_ptr),
defs->hit_group_sr_stride)),
~0 /* write mask */);
/* leaf_ptr is optional */
nir_def *inst_leaf_ptr;
if (defs->inst_leaf_ptr) {
inst_leaf_ptr = defs->inst_leaf_ptr;
} else {
inst_leaf_ptr = nir_imm_int64(b, 0);
}
assert_def_size(defs->miss_sr_ptr, 1, 64);
assert_def_size(defs->shader_index_multiplier, 1, 32);
assert_def_size(inst_leaf_ptr, 1, 64);
assert_def_size(defs->ray_mask, 1, 32);
brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 48), 16,
nir_vec4(b, nir_unpack_64_2x32_split_x(b, defs->miss_sr_ptr),
nir_pack_32_2x16_split(b,
nir_unpack_64_4x16_split_z(b, defs->miss_sr_ptr),
nir_unpack_32_2x16_split_x(b,
nir_ishl(b, defs->shader_index_multiplier,
nir_imm_int(b, 8)))),
nir_unpack_64_2x32_split_x(b, inst_leaf_ptr),
nir_pack_32_2x16_split(b,
nir_unpack_64_4x16_split_z(b, inst_leaf_ptr),
nir_unpack_32_2x16_split_x(b, defs->ray_mask))),
~0 /* write mask */);
}
static inline void
brw_nir_rt_load_mem_ray_from_addr(nir_builder *b,
struct brw_nir_rt_mem_ray_defs *defs,
nir_def *ray_base_addr,
enum brw_rt_bvh_level bvh_level)
{
nir_def *ray_addr = brw_nir_rt_mem_ray_addr(b,
ray_base_addr,
bvh_level);
nir_def *data[4] = {
brw_nir_rt_load(b, nir_iadd_imm(b, ray_addr, 0), 16, 4, 32),
brw_nir_rt_load(b, nir_iadd_imm(b, ray_addr, 16), 16, 4, 32),
brw_nir_rt_load(b, nir_iadd_imm(b, ray_addr, 32), 16, 4, 32),
brw_nir_rt_load(b, nir_iadd_imm(b, ray_addr, 48), 16, 4, 32),
};
defs->orig = nir_trim_vector(b, data[0], 3);
defs->dir = nir_vec3(b, nir_channel(b, data[0], 3),
nir_channel(b, data[1], 0),
nir_channel(b, data[1], 1));
defs->t_near = nir_channel(b, data[1], 2);
defs->t_far = nir_channel(b, data[1], 3);
defs->root_node_ptr =
nir_pack_64_2x32_split(b, nir_channel(b, data[2], 0),
nir_extract_i16(b, nir_channel(b, data[2], 1),
nir_imm_int(b, 0)));
defs->ray_flags =
nir_unpack_32_2x16_split_y(b, nir_channel(b, data[2], 1));
defs->hit_group_sr_base_ptr =
nir_pack_64_2x32_split(b, nir_channel(b, data[2], 2),
nir_extract_i16(b, nir_channel(b, data[2], 3),
nir_imm_int(b, 0)));
defs->hit_group_sr_stride =
nir_unpack_32_2x16_split_y(b, nir_channel(b, data[2], 3));
defs->miss_sr_ptr =
nir_pack_64_2x32_split(b, nir_channel(b, data[3], 0),
nir_extract_i16(b, nir_channel(b, data[3], 1),
nir_imm_int(b, 0)));
defs->shader_index_multiplier =
nir_ushr(b, nir_unpack_32_2x16_split_y(b, nir_channel(b, data[3], 1)),
nir_imm_int(b, 8));
defs->inst_leaf_ptr =
nir_pack_64_2x32_split(b, nir_channel(b, data[3], 2),
nir_extract_i16(b, nir_channel(b, data[3], 3),
nir_imm_int(b, 0)));
defs->ray_mask =
nir_unpack_32_2x16_split_y(b, nir_channel(b, data[3], 3));
}
static inline void
brw_nir_rt_load_mem_ray(nir_builder *b,
struct brw_nir_rt_mem_ray_defs *defs,
enum brw_rt_bvh_level bvh_level)
{
brw_nir_rt_load_mem_ray_from_addr(b, defs, brw_nir_rt_stack_addr(b),
bvh_level);
}
struct brw_nir_rt_bvh_instance_leaf_defs {
nir_def *shader_index;
nir_def *contribution_to_hit_group_index;
nir_def *world_to_object[4];
nir_def *instance_id;
nir_def *instance_index;
nir_def *object_to_world[4];
};
static inline void
brw_nir_rt_load_bvh_instance_leaf(nir_builder *b,
struct brw_nir_rt_bvh_instance_leaf_defs *defs,
nir_def *leaf_addr)
{
nir_def *leaf_desc = brw_nir_rt_load(b, leaf_addr, 4, 2, 32);
defs->shader_index =
nir_iand_imm(b, nir_channel(b, leaf_desc, 0), (1 << 24) - 1);
defs->contribution_to_hit_group_index =
nir_iand_imm(b, nir_channel(b, leaf_desc, 1), (1 << 24) - 1);
defs->world_to_object[0] =
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 16), 4, 3, 32);
defs->world_to_object[1] =
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 28), 4, 3, 32);
defs->world_to_object[2] =
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 40), 4, 3, 32);
/* The last column of the matrices is swapped between the two probably
* because it makes it easier/faster for hardware somehow.
*/
defs->object_to_world[3] =
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 52), 4, 3, 32);
nir_def *data =
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 64), 4, 4, 32);
defs->instance_id = nir_channel(b, data, 2);
defs->instance_index = nir_channel(b, data, 3);
defs->object_to_world[0] =
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 80), 4, 3, 32);
defs->object_to_world[1] =
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 92), 4, 3, 32);
defs->object_to_world[2] =
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 104), 4, 3, 32);
defs->world_to_object[3] =
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 116), 4, 3, 32);
}
struct brw_nir_rt_bvh_primitive_leaf_defs {
nir_def *shader_index;
nir_def *geom_mask;
nir_def *geom_index;
nir_def *type;
nir_def *geom_flags;
};
static inline void
brw_nir_rt_load_bvh_primitive_leaf(nir_builder *b,
struct brw_nir_rt_bvh_primitive_leaf_defs *defs,
nir_def *leaf_addr)
{
nir_def *desc = brw_nir_rt_load(b, leaf_addr, 4, 2, 32);
defs->shader_index =
nir_ubitfield_extract(b, nir_channel(b, desc, 0),
nir_imm_int(b, 23), nir_imm_int(b, 0));
defs->geom_mask =
nir_ubitfield_extract(b, nir_channel(b, desc, 0),
nir_imm_int(b, 31), nir_imm_int(b, 24));
defs->geom_index =
nir_ubitfield_extract(b, nir_channel(b, desc, 1),
nir_imm_int(b, 28), nir_imm_int(b, 0));
defs->type =
nir_ubitfield_extract(b, nir_channel(b, desc, 1),
nir_imm_int(b, 29), nir_imm_int(b, 29));
defs->geom_flags =
nir_ubitfield_extract(b, nir_channel(b, desc, 1),
nir_imm_int(b, 31), nir_imm_int(b, 30));
}
struct brw_nir_rt_bvh_primitive_leaf_positions_defs {
nir_def *positions[3];
};
static inline void
brw_nir_rt_load_bvh_primitive_leaf_positions(nir_builder *b,
struct brw_nir_rt_bvh_primitive_leaf_positions_defs *defs,
nir_def *leaf_addr)
{
for (unsigned i = 0; i < ARRAY_SIZE(defs->positions); i++) {
defs->positions[i] =
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 16 + i * 4 * 3), 4, 3, 32);
}
}
static inline nir_def *
brw_nir_rt_load_primitive_id_from_hit(nir_builder *b,
nir_def *is_procedural,
const struct brw_nir_rt_mem_hit_defs *defs)
{
if (!is_procedural) {
is_procedural =
nir_ieq_imm(b, defs->leaf_type,
BRW_RT_BVH_NODE_TYPE_PROCEDURAL);
}
nir_def *prim_id_proc, *prim_id_quad;
nir_push_if(b, is_procedural);
{
/* For procedural leafs, the index is in dw[3]. */
nir_def *offset =
nir_iadd_imm(b, nir_ishl_imm(b, defs->prim_leaf_index, 2), 12);
prim_id_proc = nir_load_global(b, nir_iadd(b, defs->prim_leaf_ptr,
nir_u2u64(b, offset)),
4, /* align */ 1, 32);
}
nir_push_else(b, NULL);
{
/* For quad leafs, the index is dw[2] and there is a 16bit additional
* offset in dw[3].
*/
prim_id_quad = nir_load_global(b, nir_iadd_imm(b, defs->prim_leaf_ptr, 8),
4, /* align */ 1, 32);
prim_id_quad = nir_iadd(b,
prim_id_quad,
defs->prim_index_delta);
}
nir_pop_if(b, NULL);
return nir_if_phi(b, prim_id_proc, prim_id_quad);
}
static inline nir_def *
brw_nir_rt_acceleration_structure_to_root_node(nir_builder *b,
nir_def *as_addr)
{
/* The HW memory structure in which we specify what acceleration structure
* to traverse, takes the address to the root node in the acceleration
* structure, not the acceleration structure itself. To find that, we have
* to read the root node offset from the acceleration structure which is
* the first QWord.
*
* But if the acceleration structure pointer is NULL, then we should return
* NULL as root node pointer.
*
* TODO: we could optimize this by assuming that for a given version of the
* BVH, we can find the root node at a given offset.
*/
nir_def *root_node_ptr, *null_node_ptr;
nir_push_if(b, nir_ieq_imm(b, as_addr, 0));
{
null_node_ptr = nir_imm_int64(b, 0);
}
nir_push_else(b, NULL);
{
root_node_ptr =
nir_iadd(b, as_addr, brw_nir_rt_load(b, as_addr, 256, 1, 64));
}
nir_pop_if(b, NULL);
return nir_if_phi(b, null_node_ptr, root_node_ptr);
}
#endif /* BRW_NIR_RT_BUILDER_H */

View file

@ -1,292 +0,0 @@
/*
* Copyright © 2020 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef BRW_RT_H
#define BRW_RT_H
#include <stdint.h>
#include "compiler/shader_enums.h"
#include "util/macros.h"
#ifdef __cplusplus
extern "C" {
#endif
/** Vulkan defines shaderGroupHandleSize = 32 */
#define BRW_RT_SBT_HANDLE_SIZE 32
/** RT_DISPATCH_GLOBALS size (see gen_rt.xml) */
#define BRW_RT_DISPATCH_GLOBALS_SIZE 80
/** Offset after the RT dispatch globals at which "push" constants live */
#define BRW_RT_PUSH_CONST_OFFSET 128
/** Stride of the resume SBT */
#define BRW_BTD_RESUME_SBT_STRIDE 8
/* Vulkan always uses exactly two levels of BVH: world and object. At the API
* level, these are referred to as top and bottom.
*/
enum brw_rt_bvh_level {
BRW_RT_BVH_LEVEL_WORLD = 0,
BRW_RT_BVH_LEVEL_OBJECT = 1,
};
#define BRW_RT_MAX_BVH_LEVELS 2
enum brw_rt_bvh_node_type {
BRW_RT_BVH_NODE_TYPE_INTERNAL = 0,
BRW_RT_BVH_NODE_TYPE_INSTANCE = 1,
BRW_RT_BVH_NODE_TYPE_PROCEDURAL = 3,
BRW_RT_BVH_NODE_TYPE_QUAD = 4,
};
/** HitKind values returned for triangle geometry
*
* This enum must match the SPIR-V enum.
*/
enum brw_rt_hit_kind {
BRW_RT_HIT_KIND_FRONT_FACE = 0xfe,
BRW_RT_HIT_KIND_BACK_FACE = 0xff,
};
/** Ray flags
*
* This enum must match the SPIR-V RayFlags enum.
*/
enum brw_rt_ray_flags {
BRW_RT_RAY_FLAG_FORCE_OPAQUE = 0x01,
BRW_RT_RAY_FLAG_FORCE_NON_OPAQUE = 0x02,
BRW_RT_RAY_FLAG_TERMINATE_ON_FIRST_HIT = 0x04,
BRW_RT_RAY_FLAG_SKIP_CLOSEST_HIT_SHADER = 0x08,
BRW_RT_RAY_FLAG_CULL_BACK_FACING_TRIANGLES = 0x10,
BRW_RT_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES = 0x20,
BRW_RT_RAY_FLAG_CULL_OPAQUE = 0x40,
BRW_RT_RAY_FLAG_CULL_NON_OPAQUE = 0x80,
BRW_RT_RAY_FLAG_SKIP_TRIANGLES = 0x100,
BRW_RT_RAY_FLAG_SKIP_AABBS = 0x200,
};
struct brw_rt_scratch_layout {
/** Number of stack IDs per DSS */
uint32_t stack_ids_per_dss;
/** Start offset (in bytes) of the hardware MemRay stack */
uint32_t ray_stack_start;
/** Stride (in bytes) of the hardware MemRay stack */
uint32_t ray_stack_stride;
/** Start offset (in bytes) of the SW stacks */
uint64_t sw_stack_start;
/** Size (in bytes) of the SW stack for a single shader invocation */
uint32_t sw_stack_size;
/** Total size (in bytes) of the RT scratch memory area */
uint64_t total_size;
};
/** Parameters passed to the raygen trampoline shader
*
* This struct is carefully construected to be 32B and must be passed to the
* raygen trampoline shader as as inline constant data.
*/
struct brw_rt_raygen_trampoline_params {
/** The GPU address of the RT_DISPATCH_GLOBALS */
uint64_t rt_disp_globals_addr;
/** The GPU address of the BINDLESS_SHADER_RECORD for the raygen shader */
uint64_t raygen_bsr_addr;
/** 1 if this is an indirect dispatch, 0 otherwise */
uint8_t is_indirect;
/** The integer log2 of the local group size
*
* Ray-tracing shaders don't have a concept of local vs. global workgroup
* size. They only have a single 3D launch size. The raygen trampoline
* shader is always dispatched with a local workgroup size equal to the
* SIMD width but the shape of the local workgroup is determined at
* dispatch time based on the shape of the launch and passed to the
* trampoline via this field. (There's no sense having a Z dimension on
* the local workgroup if the launch is 2D.)
*
* We use the integer log2 of the size because there's no point in
* non-power-of-two sizes and shifts are cheaper than division.
*/
uint8_t local_group_size_log2[3];
uint32_t pad[3];
};
/** Size of the "hot zone" in bytes
*
* The hot zone is a SW-defined data structure which is a single uvec4
* containing two bits of information:
*
* - hotzone.x: Stack offset (in bytes)
*
* This is the offset (in bytes) into the per-thread scratch space at which
* the current shader's stack starts. This is incremented by the calling
* shader prior to any shader call type instructions and gets decremented
* by the resume shader as part of completing the return operation.
*
*
* - hotzone.yzw: The launch ID associated with the current thread
*
* Inside a bindless shader, the only information we have is the DSS ID
* from the hardware EU and a per-DSS stack ID. In particular, the three-
* dimensional launch ID is lost the moment we leave the raygen trampoline.
*/
#define BRW_RT_SIZEOF_HOTZONE 16
/* From the BSpec "Address Computation for Memory Based Data Structures:
* Ray and TraversalStack (Async Ray Tracing)":
*
* sizeof(Ray) = 64B, sizeof(HitInfo) = 32B, sizeof(TravStack) = 32B.
*/
#define BRW_RT_SIZEOF_RAY 64
#define BRW_RT_SIZEOF_HIT_INFO 32
#define BRW_RT_SIZEOF_TRAV_STACK 32
/* From the BSpec:
*
* syncStackSize = (maxBVHLevels % 2 == 1) ?
* (sizeof(HitInfo) * 2 +
* (sizeof(Ray) + sizeof(TravStack)) * maxBVHLevels + 32B) :
* (sizeof(HitInfo) * 2 +
* (sizeof(Ray) + sizeof(TravStack)) * maxBVHLevels);
*
* The select is just to align to 64B.
*/
#define BRW_RT_SIZEOF_RAY_QUERY \
(BRW_RT_SIZEOF_HIT_INFO * 2 + \
(BRW_RT_SIZEOF_RAY + BRW_RT_SIZEOF_TRAV_STACK) * BRW_RT_MAX_BVH_LEVELS + \
(BRW_RT_MAX_BVH_LEVELS % 2 ? 32 : 0))
#define BRW_RT_SIZEOF_SHADOW_RAY_QUERY \
(BRW_RT_SIZEOF_HIT_INFO * 2 + \
(BRW_RT_SIZEOF_RAY + BRW_RT_SIZEOF_TRAV_STACK) * BRW_RT_MAX_BVH_LEVELS)
#define BRW_RT_SIZEOF_HW_STACK \
(BRW_RT_SIZEOF_HIT_INFO * 2 + \
BRW_RT_SIZEOF_RAY * BRW_RT_MAX_BVH_LEVELS + \
BRW_RT_SIZEOF_TRAV_STACK * BRW_RT_MAX_BVH_LEVELS)
/* This is a mesa-defined region for hit attribute data */
#define BRW_RT_SIZEOF_HIT_ATTRIB_DATA 64
#define BRW_RT_OFFSETOF_HIT_ATTRIB_DATA BRW_RT_SIZEOF_HW_STACK
#define BRW_RT_ASYNC_STACK_STRIDE \
ALIGN_POT(BRW_RT_OFFSETOF_HIT_ATTRIB_DATA + \
BRW_RT_SIZEOF_HIT_ATTRIB_DATA, 64)
static inline void
brw_rt_compute_scratch_layout(struct brw_rt_scratch_layout *layout,
const struct intel_device_info *devinfo,
uint32_t stack_ids_per_dss,
uint32_t sw_stack_size)
{
layout->stack_ids_per_dss = stack_ids_per_dss;
const uint32_t dss_count = intel_device_info_dual_subslice_id_bound(devinfo);
const uint32_t num_stack_ids = dss_count * stack_ids_per_dss;
uint64_t size = 0;
/* The first thing in our scratch area is an array of "hot zones" which
* store the stack offset as well as the launch IDs for each active
* invocation.
*/
size += BRW_RT_SIZEOF_HOTZONE * num_stack_ids;
/* Next, we place the HW ray stacks */
assert(size % 64 == 0); /* Cache-line aligned */
assert(size < UINT32_MAX);
layout->ray_stack_start = size;
layout->ray_stack_stride = BRW_RT_ASYNC_STACK_STRIDE;
size += num_stack_ids * layout->ray_stack_stride;
/* Finally, we place the SW stacks for the individual ray-tracing shader
* invocations. We align these to 64B to ensure that we don't have any
* shared cache lines which could hurt performance.
*/
assert(size % 64 == 0);
layout->sw_stack_start = size;
layout->sw_stack_size = ALIGN(sw_stack_size, 64);
/* Currently it's always the case that sw_stack_size is a power of
* two, but power-of-two SW stack sizes are prone to causing
* collisions in the hashing function used by the L3 to map memory
* addresses to banks, which can cause stack accesses from most
* DSSes to bottleneck on a single L3 bank. Fix it by padding the
* SW stack by a single cacheline if it was a power of two.
*/
if (layout->sw_stack_size > 64 &&
util_is_power_of_two_nonzero(layout->sw_stack_size))
layout->sw_stack_size += 64;
size += num_stack_ids * layout->sw_stack_size;
layout->total_size = size;
}
static inline uint32_t
brw_rt_ray_queries_hw_stacks_size(const struct intel_device_info *devinfo)
{
/* Maximum slice/subslice/EU ID can be computed from the max_scratch_ids
* which includes all the threads.
*/
uint32_t max_eu_id = devinfo->max_scratch_ids[MESA_SHADER_COMPUTE];
uint32_t max_simd_size = 16; /* Cannot run in SIMD32 with ray queries */
return max_eu_id * max_simd_size * BRW_RT_SIZEOF_RAY_QUERY;
}
static inline uint32_t
brw_rt_ray_queries_shadow_stack_size(const struct intel_device_info *devinfo)
{
/* Maximum slice/subslice/EU ID can be computed from the max_scratch_ids
* which includes all the threads.
*/
uint32_t max_eu_id = devinfo->max_scratch_ids[MESA_SHADER_COMPUTE];
uint32_t max_simd_size = 16; /* Cannot run in SIMD32 with ray queries */
return max_eu_id * max_simd_size * BRW_RT_SIZEOF_SHADOW_RAY_QUERY;
}
static inline uint32_t
brw_rt_ray_queries_shadow_stacks_size(const struct intel_device_info *devinfo,
uint32_t ray_queries)
{
/* Don't bother a shadow stack if we only have a single query. We can
* directly write in the HW buffer.
*/
return (ray_queries > 1 ? ray_queries : 0) * brw_rt_ray_queries_shadow_stack_size(devinfo) +
ray_queries * 4; /* Ctrl + Level data */
}
#ifdef __cplusplus
}
#endif
#endif /* BRW_RT_H */

View file

@ -1,676 +0,0 @@
/*
* Copyright © 2021 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_compiler.h"
#include "brw_kernel.h"
#include "compiler/brw_disasm.h"
#include "compiler/clc/clc.h"
#include "compiler/glsl_types.h"
#include "compiler/nir/nir_serialize.h"
#include "dev/intel_debug.h"
#include "util/build_id.h"
#include "util/disk_cache.h"
#include "util/macros.h"
#include "util/mesa-sha1.h"
#include "util/u_dynarray.h"
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <inttypes.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
/* Shader functions */
#define SPIR_V_MAGIC_NUMBER 0x07230203
static struct disk_cache *
get_disk_cache(struct brw_compiler *compiler)
{
#ifdef ENABLE_SHADER_CACHE
char renderer[14];
ASSERTED int len = snprintf(renderer, sizeof(renderer), "brw_clc_%04x",
compiler->devinfo->pci_device_id);
assert(len == sizeof(renderer) - 2);
const struct build_id_note *note =
build_id_find_nhdr_for_addr(get_disk_cache);
if (note == NULL) {
fprintf(stderr, "Failed to find build-id\n");
abort();
}
unsigned build_id_len = build_id_length(note);
if (build_id_len < 20) {
fprintf(stderr, "build-id too short. It needs to be a SHA\n");
abort();
}
struct mesa_sha1 sha1_ctx;
uint8_t sha1[20];
_mesa_sha1_init(&sha1_ctx);
_mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
_mesa_sha1_final(&sha1_ctx, sha1);
char timestamp[41];
_mesa_sha1_format(timestamp, sha1);
const uint64_t driver_flags = brw_get_compiler_config_value(compiler);
return disk_cache_create(renderer, timestamp, driver_flags);
#endif
return NULL;
}
static void
compiler_log(void *data, unsigned *id, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
if (INTEL_DEBUG(DEBUG_CS))
vfprintf(stderr, fmt, args);
va_end(args);
}
static void
msg_callback(void *priv, const char *msg)
{
(void)priv;
fprintf(stderr, "%s", msg);
}
static void
print_u32_data(FILE *fp, const char *prefix, const char *arr_name,
const uint32_t *data, size_t len)
{
assert(len % 4 == 0);
fprintf(fp, "static const uint32_t %s_%s[] = {", prefix, arr_name);
for (unsigned i = 0; i < (len / 4); i++) {
if (i % 4 == 0)
fprintf(fp,"\n ");
fprintf(fp, " 0x%08" PRIx32 ",", data[i]);
}
fprintf(fp, "\n};\n");
}
static void
print_u8_data(FILE *fp, const char *prefix, const char *arr_name,
const uint8_t *data, size_t len)
{
fprintf(fp, "static const uint8_t %s_%s[] = {", prefix, arr_name);
for (unsigned i = 0; i < len; i++) {
if (i % 16 == 0)
fprintf(fp,"\n ");
fprintf(fp, " 0x%02" PRIx8 ",", data[i]);
}
fprintf(fp, "\n};\n");
}
static const char *
reloc_type_str(enum brw_shader_reloc_type type)
{
switch (type) {
#define CASE(e) case e: return #e;
CASE(BRW_SHADER_RELOC_TYPE_U32)
CASE(BRW_SHADER_RELOC_TYPE_MOV_IMM)
#undef CASE
default:
unreachable("Unknown relocation type");
}
}
static void
print_cs_prog_data_fields(FILE *fp, const char *prefix, const char *pad,
const struct brw_cs_prog_data *cs_prog_data)
{
#define PROG_DATA_FIELD(fmt, field) \
fprintf(fp, "%s." #field " = " fmt ",\n", pad, cs_prog_data->field)
#define PROG_DATA_BOOL_FIELD(field) \
fprintf(fp, "%s." #field " = %s,\n", pad, \
cs_prog_data->field ? "true" : "false")
PROG_DATA_FIELD("%u", base.nr_params);
assert(cs_prog_data->base.stage == MESA_SHADER_COMPUTE);
fprintf(fp, "%s.base.stage = MESA_SHADER_COMPUTE,\n", pad);
assert(cs_prog_data->base.zero_push_reg == 0);
assert(cs_prog_data->base.push_reg_mask_param == 0);
PROG_DATA_FIELD("%u", base.curb_read_length);
PROG_DATA_FIELD("%u", base.total_scratch);
PROG_DATA_FIELD("%u", base.total_shared);
PROG_DATA_FIELD("%u", base.program_size);
PROG_DATA_FIELD("%u", base.const_data_size);
PROG_DATA_FIELD("%u", base.const_data_offset);
PROG_DATA_FIELD("%u", base.num_relocs);
fprintf(fp, "%s.base.relocs = %s_relocs,\n", pad, prefix);
assert(!cs_prog_data->base.has_ubo_pull);
assert(cs_prog_data->base.dispatch_grf_start_reg == 0);
assert(!cs_prog_data->base.use_alt_mode);
assert(cs_prog_data->base.param == 0);
PROG_DATA_BOOL_FIELD(base.uses_atomic_load_store);
fprintf(fp, "%s.local_size = { %u, %u, %u },\n", pad,
cs_prog_data->local_size[0],
cs_prog_data->local_size[1],
cs_prog_data->local_size[2]);
fprintf(fp, "%s.prog_offset = { %u, %u, %u },\n", pad,
cs_prog_data->prog_offset[0],
cs_prog_data->prog_offset[1],
cs_prog_data->prog_offset[2]);
PROG_DATA_FIELD("%u", prog_mask);
PROG_DATA_FIELD("%u", prog_spilled);
PROG_DATA_BOOL_FIELD(uses_barrier);
PROG_DATA_BOOL_FIELD(uses_num_work_groups);
assert(!cs_prog_data->uses_inline_data);
assert(!cs_prog_data->uses_btd_stack_ids);
PROG_DATA_FIELD("%u", push.per_thread.dwords);
PROG_DATA_FIELD("%u", push.per_thread.regs);
PROG_DATA_FIELD("%u", push.per_thread.size);
PROG_DATA_FIELD("%u", push.cross_thread.dwords);
PROG_DATA_FIELD("%u", push.cross_thread.regs);
PROG_DATA_FIELD("%u", push.cross_thread.size);
#undef PROG_DATA_FIELD
#undef PROG_DATA_BOOL_FIELD
}
static void
print_kernel(FILE *fp, const char *prefix,
const struct brw_kernel *kernel,
const struct brw_isa_info *isa)
{
struct mesa_sha1 sha1_ctx;
_mesa_sha1_init(&sha1_ctx);
#define SHA1_UPDATE_VALUE(val) \
_mesa_sha1_update(&sha1_ctx, &val, sizeof(val))
fprintf(fp, "#include \"intel/compiler/brw_kernel.h\"\n");
fprintf(fp, "\n");
fprintf(fp, "static const struct brw_shader_reloc %s_relocs[] = {\n",
prefix);
for (unsigned i = 0; i < kernel->prog_data.base.num_relocs; i++) {
const struct brw_shader_reloc *reloc = &kernel->prog_data.base.relocs[i];
fprintf(fp, " { %"PRIu32", %s, %"PRIu32", %"PRIu32" },\n",
reloc->id, reloc_type_str(reloc->type),
reloc->offset, reloc->delta);
}
fprintf(fp, "};\n");
_mesa_sha1_update(&sha1_ctx, kernel->prog_data.base.relocs,
kernel->prog_data.base.num_relocs *
sizeof(kernel->prog_data.base.relocs[0]));
/* Get rid of the pointers before we hash */
struct brw_cs_prog_data cs_prog_data = kernel->prog_data;
cs_prog_data.base.relocs = NULL;
assert(cs_prog_data.base.param == NULL);
_mesa_sha1_update(&sha1_ctx, &cs_prog_data, sizeof(cs_prog_data));
SHA1_UPDATE_VALUE(kernel->args_size);
SHA1_UPDATE_VALUE(kernel->arg_count);
_mesa_sha1_update(&sha1_ctx, kernel->args,
kernel->arg_count * sizeof(kernel->args[0]));
fprintf(fp, "static const struct brw_kernel_arg_desc %s_args[] = {\n",
prefix);
for (unsigned i = 0; i < kernel->arg_count; i++) {
fprintf(fp, " { %d, %d },\n",
kernel->args[i].offset, kernel->args[i].size);
}
fprintf(fp, "};\n\n");
_mesa_sha1_update(&sha1_ctx, kernel->code,
kernel->prog_data.base.program_size);
fprintf(fp, "#if 0 /* BEGIN KERNEL ASSEMBLY */\n");
fprintf(fp, "\n");
brw_disassemble_with_errors(isa, kernel->code, 0, fp);
fprintf(fp, "\n");
fprintf(fp, "#endif /* END KERNEL ASSEMBLY */\n");
print_u32_data(fp, prefix, "code", kernel->code,
kernel->prog_data.base.program_size);
fprintf(fp, "static const struct brw_kernel %s = {\n", prefix);
fprintf(fp, " .prog_data = {\n");
print_cs_prog_data_fields(fp, prefix, " ", &kernel->prog_data);
fprintf(fp, " },\n");
fprintf(fp, " .args_size = %d,\n", (int)kernel->args_size);
fprintf(fp, " .arg_count = %d,\n", (int)kernel->arg_count);
fprintf(fp, " .args = %s_args,\n", prefix);
fprintf(fp, " .code = %s_code,\n", prefix);
fprintf(fp, "};\n");
unsigned char sha1[20];
_mesa_sha1_final(&sha1_ctx, sha1);
char sha1_str[41];
_mesa_sha1_format(sha1_str, sha1);
fprintf(fp, "const char *%s_sha1 = \"%s\";\n", prefix, sha1_str);
}
static void
print_usage(char *exec_name, FILE *f)
{
fprintf(f,
"Usage: %s [options] -- [clang args]\n"
"Options:\n"
" -h --help Print this help.\n"
" -e, --entrypoint <name> Specify the entry-point name.\n"
" -L, --llvm17-wa Enable LLVM 17 workarounds for opaque pointers"
" -p, --platform <name> Specify the target platform name.\n"
" --prefix <prefix> Prefix for variable names in generated C code.\n"
" -o, --out <filename> Specify the output filename.\n"
" -i, --in <filename> Specify one input filename. Accepted multiple times.\n"
" -s, --spv <filename> Specify the output filename for spirv.\n"
" -n, --nir Specify whether to output serialized NIR instead of ISA.\n"
" -t, --text <filename> Specify the output filename for the parsed text\n"
" -v, --verbose Print more information during compilation.\n"
" -M, --llvm-version Print LLVM version.\n"
, exec_name);
}
#define OPT_PREFIX 1000
struct intel_clc_params {
char *entry_point;
char *platform;
char *outfile;
char *spv_outfile;
char *txt_outfile;
char *prefix;
bool output_nir;
bool print_info;
bool llvm17_wa;
void *mem_ctx;
struct intel_device_info devinfo;
};
#include "compiler/spirv/nir_spirv.h"
static int
output_nir(const struct intel_clc_params *params, struct clc_binary *binary)
{
struct spirv_to_nir_options spirv_options = {
.environment = NIR_SPIRV_OPENCL,
.caps = {
.address = true,
.groups = true,
.image_write_without_format = true,
.int8 = true,
.int16 = true,
.int64 = true,
.int64_atomics = true,
.kernel = true,
.linkage = true, /* We receive linked kernel from clc */
.float_controls = true,
.generic_pointers = true,
.storage_8bit = true,
.storage_16bit = true,
.subgroup_arithmetic = true,
.subgroup_basic = true,
.subgroup_ballot = true,
.subgroup_dispatch = true,
.subgroup_quad = true,
.subgroup_shuffle = true,
.subgroup_vote = true,
.intel_subgroup_shuffle = true,
.intel_subgroup_buffer_block_io = true,
},
.shared_addr_format = nir_address_format_62bit_generic,
.global_addr_format = nir_address_format_62bit_generic,
.temp_addr_format = nir_address_format_62bit_generic,
.constant_addr_format = nir_address_format_64bit_global,
.create_library = true,
};
FILE *fp = params->outfile != NULL ?
fopen(params->outfile, "w") : stdout;
if (!fp) {
fprintf(stderr, "Failed to open %s\n", params->outfile);
return -1;
}
spirv_library_to_nir_builder(fp, binary->data, binary->size / 4,
&spirv_options);
nir_shader *nir = brw_nir_from_spirv(params->mem_ctx,
binary->data, binary->size,
params->llvm17_wa);
if (!nir) {
fprintf(stderr, "Failed to generate NIR out of SPIRV\n");
return -1;
}
struct blob blob;
blob_init(&blob);
nir_serialize(&blob, nir, false /* strip */);
print_u8_data(fp, params->prefix, "nir", blob.data, blob.size);
blob_finish(&blob);
if (params->outfile)
fclose(fp);
return 0;
}
static int
output_isa(const struct intel_clc_params *params, struct clc_binary *binary)
{
struct brw_kernel kernel = {};
char *error_str;
struct brw_isa_info _isa, *isa = &_isa;
brw_init_isa_info(isa, &params->devinfo);
struct brw_compiler *compiler = brw_compiler_create(params->mem_ctx,
&params->devinfo);
compiler->shader_debug_log = compiler_log;
compiler->shader_perf_log = compiler_log;
struct disk_cache *disk_cache = get_disk_cache(compiler);
if (!brw_kernel_from_spirv(compiler, disk_cache, &kernel, NULL, params->mem_ctx,
binary->data, binary->size,
params->entry_point, &error_str)) {
fprintf(stderr, "Compile failed: %s\n", error_str);
return -1;
}
if (params->print_info) {
fprintf(stdout, "kernel info:\n");
fprintf(stdout, " uses_barrier : %u\n", kernel.prog_data.uses_barrier);
fprintf(stdout, " uses_num_work_groups : %u\n", kernel.prog_data.uses_num_work_groups);
fprintf(stdout, " uses_inline_data : %u\n", kernel.prog_data.uses_inline_data);
fprintf(stdout, " local_size : %ux%ux%u\n",
kernel.prog_data.local_size[0],
kernel.prog_data.local_size[1],
kernel.prog_data.local_size[2]);
fprintf(stdout, " curb_read_length : %u\n", kernel.prog_data.base.curb_read_length);
fprintf(stdout, " total_scratch : %u\n", kernel.prog_data.base.total_scratch);
fprintf(stdout, " total_shared : %u\n", kernel.prog_data.base.total_shared);
fprintf(stdout, " program_size : %u\n", kernel.prog_data.base.program_size);
fprintf(stdout, " const_data_size : %u\n", kernel.prog_data.base.const_data_size);
fprintf(stdout, " uses_atomic_load_store : %u\n", kernel.prog_data.base.uses_atomic_load_store);
fprintf(stdout, " dispatch_grf_start_reg : %u\n", kernel.prog_data.base.dispatch_grf_start_reg);
}
char *prefix = params->prefix;
char prefix_tmp[256];
if (prefix == NULL) {
bool is_pt_5 = (params->devinfo.verx10 % 10) == 5;
snprintf(prefix_tmp, sizeof(prefix_tmp), "gfx%d%s_clc_%s",
params->devinfo.ver, is_pt_5 ? "5" : "", params->entry_point);
prefix = prefix_tmp;
}
if (params->outfile != NULL) {
FILE *fp = fopen(params->outfile, "w");
print_kernel(fp, prefix, &kernel, isa);
fclose(fp);
} else {
print_kernel(stdout, prefix, &kernel, isa);
}
return 0;
}
static void
print_llvm_version(FILE *out)
{
fprintf(out, "%s\n", MESA_LLVM_VERSION_STRING);
}
int main(int argc, char **argv)
{
int exit_code = 0;
process_intel_debug_variable();
static struct option long_options[] ={
{"help", no_argument, 0, 'h'},
{"entrypoint", required_argument, 0, 'e'},
{"platform", required_argument, 0, 'p'},
{"prefix", required_argument, 0, OPT_PREFIX},
{"in", required_argument, 0, 'i'},
{"out", required_argument, 0, 'o'},
{"spv", required_argument, 0, 's'},
{"text", required_argument, 0, 't'},
{"nir", no_argument, 0, 'n'},
{"llvm17-wa", no_argument, 0, 'L'},
{"llvm-version", no_argument, 0, 'M'},
{"verbose", no_argument, 0, 'v'},
{0, 0, 0, 0}
};
struct intel_clc_params params = {};
struct util_dynarray clang_args;
struct util_dynarray input_files;
struct clc_binary spirv_obj = {0};
struct clc_parsed_spirv parsed_spirv_data = {0};
struct disk_cache *disk_cache = NULL;
params.mem_ctx = ralloc_context(NULL);
util_dynarray_init(&clang_args, params.mem_ctx);
util_dynarray_init(&input_files, params.mem_ctx);
int ch;
while ((ch = getopt_long(argc, argv, "he:p:s:t:i:no:MLv", long_options, NULL)) != -1)
{
switch (ch)
{
case 'h':
print_usage(argv[0], stdout);
goto end;
case 'e':
params.entry_point = optarg;
break;
case 'p':
params.platform = optarg;
break;
case 'o':
params.outfile = optarg;
break;
case 'i':
util_dynarray_append(&input_files, char *, optarg);
break;
case 'n':
params.output_nir = true;
break;
case 's':
params.spv_outfile = optarg;
break;
case 't':
params.txt_outfile = optarg;
break;
case 'v':
params.print_info = true;
break;
case 'L':
params.llvm17_wa = true;
break;
case 'M':
print_llvm_version(stdout);
return EXIT_SUCCESS;
case OPT_PREFIX:
params.prefix = optarg;
break;
default:
fprintf(stderr, "Unrecognized option \"%s\".\n", optarg);
print_usage(argv[0], stderr);
goto fail;
}
}
for (int i = optind; i < argc; i++) {
util_dynarray_append(&clang_args, char *, argv[i]);
}
if (util_dynarray_num_elements(&input_files, char *) == 0) {
fprintf(stderr, "No input file(s).\n");
print_usage(argv[0], stderr);
goto fail;
}
struct clc_logger logger = {
.error = msg_callback,
.warning = msg_callback,
};
size_t total_size = 0;
char *all_inputs = NULL;
util_dynarray_foreach(&input_files, char *, infile) {
int fd = open(*infile, O_RDONLY);
if (fd < 0) {
fprintf(stderr, "Failed to open %s\n", *infile);
goto fail;
}
off_t len = lseek(fd, 0, SEEK_END);
size_t new_size = total_size + len;
all_inputs = reralloc_size(params.mem_ctx, all_inputs, new_size + 1);
if (!all_inputs) {
fprintf(stderr, "Failed to allocate memory\n");
goto fail;
}
lseek(fd, 0, SEEK_SET);
read(fd, all_inputs + total_size, len);
close(fd);
total_size = new_size;
all_inputs[total_size] = '\0';
}
if (params.txt_outfile) {
FILE *fp = fopen(params.txt_outfile, "w");
fwrite(all_inputs, total_size, 1, fp);
fclose(fp);
}
const char *allowed_spirv_extensions[] = {
"SPV_EXT_shader_atomic_float_add",
"SPV_EXT_shader_atomic_float_min_max",
"SPV_KHR_float_controls",
"SPV_INTEL_subgroups",
NULL,
};
struct clc_compile_args clc_args = {
.source = {
.name = "intel_clc_files",
.value = all_inputs,
},
.features = {
.fp16 = true,
.intel_subgroups = true,
.subgroups = true,
.subgroups_ifp = true,
},
.args = util_dynarray_begin(&clang_args),
.num_args = util_dynarray_num_elements(&clang_args, char *),
.allowed_spirv_extensions = allowed_spirv_extensions,
};
if (!clc_compile_c_to_spirv(&clc_args, &logger, &spirv_obj)) {
goto fail;
}
if (params.spv_outfile) {
FILE *fp = fopen(params.spv_outfile, "w");
fwrite(spirv_obj.data, spirv_obj.size, 1, fp);
fclose(fp);
}
glsl_type_singleton_init_or_ref();
if (params.output_nir) {
exit_code = output_nir(&params, &spirv_obj);
} else {
if (params.platform == NULL) {
fprintf(stderr, "No target platform name specified.\n");
print_usage(argv[0], stderr);
goto fail;
}
int pci_id = intel_device_name_to_pci_device_id(params.platform);
if (pci_id < 0) {
fprintf(stderr, "Invalid target platform name: %s\n", params.platform);
goto fail;
}
if (!intel_get_device_info_from_pci_id(pci_id, &params.devinfo)) {
fprintf(stderr, "Failed to get device information.\n");
goto fail;
}
if (params.devinfo.verx10 < 125) {
fprintf(stderr, "Platform currently not supported.\n");
goto fail;
}
if (params.entry_point == NULL) {
fprintf(stderr, "No entry-point name specified.\n");
print_usage(argv[0], stderr);
goto fail;
}
struct clc_parsed_spirv parsed_spirv_data;
if (!clc_parse_spirv(&spirv_obj, &logger, &parsed_spirv_data))
goto fail;
const struct clc_kernel_info *kernel_info = NULL;
for (unsigned i = 0; i < parsed_spirv_data.num_kernels; i++) {
if (strcmp(parsed_spirv_data.kernels[i].name, params.entry_point) == 0) {
kernel_info = &parsed_spirv_data.kernels[i];
break;
}
}
if (kernel_info == NULL) {
fprintf(stderr, "Kernel entrypoint %s not found\n", params.entry_point);
goto fail;
}
exit_code = output_isa(&params, &spirv_obj);
}
glsl_type_singleton_decref();
goto end;
fail:
exit_code = 1;
end:
disk_cache_destroy(disk_cache);
clc_free_parsed_spirv(&parsed_spirv_data);
clc_free_spirv(&spirv_obj);
ralloc_free(params.mem_ctx);
return exit_code;
}

View file

@ -65,7 +65,6 @@ libintel_compiler_elk_files = files(
'brw_fs_reg_allocate.cpp',
'brw_fs_register_coalesce.cpp',
'brw_fs_saturate_propagation.cpp',
'brw_fs_scoreboard.cpp',
'brw_fs_sel_peephole.cpp',
'brw_fs_thread_payload.cpp',
'brw_fs_validate.cpp',
@ -81,23 +80,14 @@ libintel_compiler_elk_files = files(
'brw_ir_vec4.h',
'brw_isa_info.h',
'brw_lower_logical_sends.cpp',
'brw_mesh.cpp',
'brw_nir.h',
'brw_nir.c',
'brw_nir_analyze_boolean_resolves.c',
'brw_nir_analyze_ubo_ranges.c',
'brw_nir_attribute_workarounds.c',
'brw_nir_lower_cooperative_matrix.c',
'brw_nir_lower_cs_intrinsics.c',
'brw_nir_lower_alpha_to_coverage.c',
'brw_nir_lower_intersection_shader.c',
'brw_nir_lower_ray_queries.c',
'brw_nir_lower_rt_intrinsics.c',
'brw_nir_lower_shader_calls.c',
'brw_nir_lower_storage_image.c',
'brw_nir_rt.h',
'brw_nir_rt.c',
'brw_nir_rt_builder.h',
'brw_packed_float.c',
'brw_predicated_break.cpp',
'brw_prim.h',
@ -105,7 +95,6 @@ libintel_compiler_elk_files = files(
'brw_reg.h',
'brw_reg_type.c',
'brw_reg_type.h',
'brw_rt.h',
'brw_schedule_instructions.cpp',
'brw_shader.cpp',
'brw_shader.h',
@ -173,7 +162,6 @@ if with_tests
'test_fs_combine_constants.cpp',
'test_fs_copy_propagation.cpp',
'test_fs_saturate_propagation.cpp',
'test_fs_scoreboard.cpp',
'test_simd_selection.cpp',
'test_vec4_cmod_propagation.cpp',
'test_vec4_copy_propagation.cpp',
@ -228,10 +216,6 @@ asm_testcases = [
['ivb', 'gfx7'],
['hsw', 'gfx7.5'],
['bdw', 'gfx8'],
['skl', 'gfx9'],
['icl', 'gfx11'],
['tgl', 'gfx12'],
['dg2', 'gfx12.5'],
]
test_runner = find_program('tests/run-test.py')

View file

@ -1,893 +0,0 @@
/*
* Copyright © 2019 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <gtest/gtest.h>
#include "brw_fs.h"
#include "brw_fs_builder.h"
#include "brw_cfg.h"
using namespace brw;
class scoreboard_test : public ::testing::Test {
protected:
scoreboard_test();
~scoreboard_test() override;
struct brw_compiler *compiler;
struct brw_compile_params params;
struct intel_device_info *devinfo;
void *ctx;
struct brw_wm_prog_data *prog_data;
struct gl_shader_program *shader_prog;
fs_visitor *v;
fs_builder bld;
};
scoreboard_test::scoreboard_test()
: bld(NULL, 0)
{
ctx = ralloc_context(NULL);
compiler = rzalloc(ctx, struct brw_compiler);
devinfo = rzalloc(ctx, struct intel_device_info);
devinfo->ver = 12;
devinfo->verx10 = devinfo->ver * 10;
compiler->devinfo = devinfo;
brw_init_isa_info(&compiler->isa, devinfo);
params = {};
params.mem_ctx = ctx;
prog_data = ralloc(ctx, struct brw_wm_prog_data);
nir_shader *shader =
nir_shader_create(ctx, MESA_SHADER_FRAGMENT, NULL, NULL);
v = new fs_visitor(compiler, &params, NULL, &prog_data->base, shader, 8,
false, false);
bld = fs_builder(v).at_end();
}
scoreboard_test::~scoreboard_test()
{
delete v;
v = NULL;
ralloc_free(ctx);
ctx = NULL;
}
static fs_inst *
instruction(bblock_t *block, int num)
{
fs_inst *inst = (fs_inst *)block->start();
for (int i = 0; i < num; i++) {
inst = (fs_inst *)inst->next;
}
return inst;
}
static void
lower_scoreboard(fs_visitor *v)
{
const bool print = getenv("TEST_DEBUG");
if (print) {
fprintf(stderr, "= Before =\n");
v->cfg->dump();
}
v->lower_scoreboard();
if (print) {
fprintf(stderr, "\n= After =\n");
v->cfg->dump();
}
}
fs_inst *
emit_SEND(const fs_builder &bld, const fs_reg &dst,
const fs_reg &desc, const fs_reg &payload)
{
fs_inst *inst = bld.emit(SHADER_OPCODE_SEND, dst, desc, desc, payload);
inst->mlen = 1;
return inst;
}
static tgl_swsb
tgl_swsb_testcase(unsigned regdist, unsigned sbid, enum tgl_sbid_mode mode)
{
tgl_swsb swsb = tgl_swsb_sbid(mode, sbid);
swsb.regdist = regdist;
return swsb;
}
bool operator ==(const tgl_swsb &a, const tgl_swsb &b)
{
return a.mode == b.mode &&
a.regdist == b.regdist &&
(a.mode == TGL_SBID_NULL || a.sbid == b.sbid);
}
std::ostream &operator<<(std::ostream &os, const tgl_swsb &swsb) {
if (swsb.regdist)
os << "@" << swsb.regdist;
if (swsb.mode) {
if (swsb.regdist)
os << " ";
os << "$" << swsb.sbid;
if (swsb.mode & TGL_SBID_DST)
os << ".dst";
if (swsb.mode & TGL_SBID_SRC)
os << ".src";
}
return os;
}
TEST_F(scoreboard_test, RAW_inorder_inorder)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
fs_reg y = v->vgrf(glsl_int_type());
bld.ADD( x, g[1], g[2]);
bld.MUL( y, g[3], g[4]);
bld.AND(g[5], x, y);
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
lower_scoreboard(v);
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_regdist(1));
}
TEST_F(scoreboard_test, RAW_inorder_outoforder)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.ADD( x, g[1], g[2]);
bld.MUL( g[3], g[4], g[5]);
emit_SEND(bld, g[6], g[7], x);
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
lower_scoreboard(v);
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(2, 0, TGL_SBID_SET));
}
TEST_F(scoreboard_test, RAW_outoforder_inorder)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
fs_reg y = v->vgrf(glsl_int_type());
emit_SEND(bld, x, g[1], g[2]);
bld.MUL( y, g[3], g[4]);
bld.AND( g[5], x, y);
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
lower_scoreboard(v);
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(1, 0, TGL_SBID_DST));
}
TEST_F(scoreboard_test, RAW_outoforder_outoforder)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
/* The second SEND depends on the first, and would need to refer to two
* SBIDs. Since it is not possible we expect a SYNC instruction to be
* added.
*/
fs_reg x = v->vgrf(glsl_int_type());
emit_SEND(bld, x, g[1], g[2]);
emit_SEND(bld, g[3], x, g[4])->sfid++;
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(1, block0->end_ip);
lower_scoreboard(v);
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
fs_inst *sync = instruction(block0, 1);
EXPECT_EQ(sync->opcode, BRW_OPCODE_SYNC);
EXPECT_EQ(sync->sched, tgl_swsb_sbid(TGL_SBID_DST, 0));
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_sbid(TGL_SBID_SET, 1));
}
TEST_F(scoreboard_test, WAR_inorder_inorder)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.ADD(g[1], x, g[2]);
bld.MUL(g[3], g[4], g[5]);
bld.AND( x, g[6], g[7]);
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
lower_scoreboard(v);
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_null());
}
TEST_F(scoreboard_test, WAR_inorder_outoforder)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.ADD( g[1], x, g[2]);
bld.MUL( g[3], g[4], g[5]);
emit_SEND(bld, x, g[6], g[7]);
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
lower_scoreboard(v);
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(2, 0, TGL_SBID_SET));
}
TEST_F(scoreboard_test, WAR_outoforder_inorder)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
emit_SEND(bld, g[1], g[2], x);
bld.MUL( g[4], g[5], g[6]);
bld.AND( x, g[7], g[8]);
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
lower_scoreboard(v);
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_sbid(TGL_SBID_SRC, 0));
}
TEST_F(scoreboard_test, WAR_outoforder_outoforder)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
emit_SEND(bld, g[1], g[2], x);
emit_SEND(bld, x, g[3], g[4])->sfid++;
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(1, block0->end_ip);
lower_scoreboard(v);
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
fs_inst *sync = instruction(block0, 1);
EXPECT_EQ(sync->opcode, BRW_OPCODE_SYNC);
EXPECT_EQ(sync->sched, tgl_swsb_sbid(TGL_SBID_SRC, 0));
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_sbid(TGL_SBID_SET, 1));
}
TEST_F(scoreboard_test, WAW_inorder_inorder)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.ADD( x, g[1], g[2]);
bld.MUL(g[3], g[4], g[5]);
bld.AND( x, g[6], g[7]);
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
lower_scoreboard(v);
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
/* NOTE: We only need this RegDist if a long instruction is followed by a
* short one. The pass is currently conservative about this and adding the
* annotation.
*/
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_regdist(2));
}
TEST_F(scoreboard_test, WAW_inorder_outoforder)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.ADD( x, g[1], g[2]);
bld.MUL( g[3], g[4], g[5]);
emit_SEND(bld, x, g[6], g[7]);
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
lower_scoreboard(v);
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(2, 0, TGL_SBID_SET));
}
TEST_F(scoreboard_test, WAW_outoforder_inorder)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
emit_SEND(bld, x, g[1], g[2]);
bld.MUL( g[3], g[4], g[5]);
bld.AND( x, g[6], g[7]);
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
lower_scoreboard(v);
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_sbid(TGL_SBID_DST, 0));
}
TEST_F(scoreboard_test, WAW_outoforder_outoforder)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
emit_SEND(bld, x, g[1], g[2]);
emit_SEND(bld, x, g[3], g[4])->sfid++;
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(1, block0->end_ip);
lower_scoreboard(v);
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
fs_inst *sync = instruction(block0, 1);
EXPECT_EQ(sync->opcode, BRW_OPCODE_SYNC);
EXPECT_EQ(sync->sched, tgl_swsb_sbid(TGL_SBID_DST, 0));
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_sbid(TGL_SBID_SET, 1));
}
TEST_F(scoreboard_test, loop1)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.XOR( x, g[1], g[2]);
bld.emit(BRW_OPCODE_DO);
bld.ADD( x, g[1], g[2]);
bld.emit(BRW_OPCODE_WHILE)->predicate = BRW_PREDICATE_NORMAL;
bld.MUL( x, g[1], g[2]);
v->calculate_cfg();
lower_scoreboard(v);
bblock_t *body = v->cfg->blocks[2];
fs_inst *add = instruction(body, 0);
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
EXPECT_EQ(add->sched, tgl_swsb_regdist(1));
bblock_t *last_block = v->cfg->blocks[3];
fs_inst *mul = instruction(last_block, 0);
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
EXPECT_EQ(mul->sched, tgl_swsb_regdist(1));
}
TEST_F(scoreboard_test, loop2)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.XOR( x, g[1], g[2]);
bld.XOR(g[3], g[1], g[2]);
bld.XOR(g[4], g[1], g[2]);
bld.XOR(g[5], g[1], g[2]);
bld.emit(BRW_OPCODE_DO);
bld.ADD( x, g[1], g[2]);
bld.emit(BRW_OPCODE_WHILE)->predicate = BRW_PREDICATE_NORMAL;
bld.MUL( x, g[1], g[2]);
v->calculate_cfg();
lower_scoreboard(v);
/* Now the write in ADD has the tightest RegDist for both ADD and MUL. */
bblock_t *body = v->cfg->blocks[2];
fs_inst *add = instruction(body, 0);
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
EXPECT_EQ(add->sched, tgl_swsb_regdist(2));
bblock_t *last_block = v->cfg->blocks[3];
fs_inst *mul = instruction(last_block, 0);
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
}
TEST_F(scoreboard_test, loop3)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.XOR( x, g[1], g[2]);
bld.emit(BRW_OPCODE_DO);
/* For the ADD in the loop body this extra distance will always apply. */
bld.XOR(g[3], g[1], g[2]);
bld.XOR(g[4], g[1], g[2]);
bld.XOR(g[5], g[1], g[2]);
bld.XOR(g[6], g[1], g[2]);
bld.ADD( x, g[1], g[2]);
bld.emit(BRW_OPCODE_WHILE)->predicate = BRW_PREDICATE_NORMAL;
bld.MUL( x, g[1], g[2]);
v->calculate_cfg();
lower_scoreboard(v);
bblock_t *body = v->cfg->blocks[2];
fs_inst *add = instruction(body, 4);
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
EXPECT_EQ(add->sched, tgl_swsb_regdist(5));
bblock_t *last_block = v->cfg->blocks[3];
fs_inst *mul = instruction(last_block, 0);
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
EXPECT_EQ(mul->sched, tgl_swsb_regdist(1));
}
TEST_F(scoreboard_test, conditional1)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.XOR( x, g[1], g[2]);
bld.emit(BRW_OPCODE_IF);
bld.ADD( x, g[1], g[2]);
bld.emit(BRW_OPCODE_ENDIF);
bld.MUL( x, g[1], g[2]);
v->calculate_cfg();
lower_scoreboard(v);
bblock_t *body = v->cfg->blocks[1];
fs_inst *add = instruction(body, 0);
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
EXPECT_EQ(add->sched, tgl_swsb_regdist(2));
bblock_t *last_block = v->cfg->blocks[2];
fs_inst *mul = instruction(last_block, 1);
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
}
TEST_F(scoreboard_test, conditional2)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.XOR( x, g[1], g[2]);
bld.XOR(g[3], g[1], g[2]);
bld.XOR(g[4], g[1], g[2]);
bld.XOR(g[5], g[1], g[2]);
bld.emit(BRW_OPCODE_IF);
bld.ADD( x, g[1], g[2]);
bld.emit(BRW_OPCODE_ENDIF);
bld.MUL( x, g[1], g[2]);
v->calculate_cfg();
lower_scoreboard(v);
bblock_t *body = v->cfg->blocks[1];
fs_inst *add = instruction(body, 0);
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
EXPECT_EQ(add->sched, tgl_swsb_regdist(5));
bblock_t *last_block = v->cfg->blocks[2];
fs_inst *mul = instruction(last_block, 1);
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
}
TEST_F(scoreboard_test, conditional3)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.XOR( x, g[1], g[2]);
bld.emit(BRW_OPCODE_IF);
bld.XOR(g[3], g[1], g[2]);
bld.XOR(g[4], g[1], g[2]);
bld.XOR(g[5], g[1], g[2]);
bld.ADD( x, g[1], g[2]);
bld.emit(BRW_OPCODE_ENDIF);
bld.MUL( x, g[1], g[2]);
v->calculate_cfg();
lower_scoreboard(v);
bblock_t *body = v->cfg->blocks[1];
fs_inst *add = instruction(body, 3);
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
EXPECT_EQ(add->sched, tgl_swsb_regdist(5));
bblock_t *last_block = v->cfg->blocks[2];
fs_inst *mul = instruction(last_block, 1);
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
}
TEST_F(scoreboard_test, conditional4)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.XOR( x, g[1], g[2]);
bld.emit(BRW_OPCODE_IF);
bld.ADD( x, g[1], g[2]);
bld.XOR(g[3], g[1], g[2]);
bld.XOR(g[4], g[1], g[2]);
bld.XOR(g[5], g[1], g[2]);
bld.emit(BRW_OPCODE_ENDIF);
bld.MUL( x, g[1], g[2]);
v->calculate_cfg();
lower_scoreboard(v);
bblock_t *body = v->cfg->blocks[1];
fs_inst *add = instruction(body, 0);
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
EXPECT_EQ(add->sched, tgl_swsb_regdist(2));
bblock_t *last_block = v->cfg->blocks[2];
fs_inst *mul = instruction(last_block, 1);
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
EXPECT_EQ(mul->sched, tgl_swsb_regdist(3));
}
TEST_F(scoreboard_test, conditional5)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.XOR( x, g[1], g[2]);
bld.emit(BRW_OPCODE_IF);
bld.ADD( x, g[1], g[2]);
bld.emit(BRW_OPCODE_ELSE);
bld.ROL( x, g[1], g[2]);
bld.emit(BRW_OPCODE_ENDIF);
bld.MUL( x, g[1], g[2]);
v->calculate_cfg();
lower_scoreboard(v);
bblock_t *then_body = v->cfg->blocks[1];
fs_inst *add = instruction(then_body, 0);
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
EXPECT_EQ(add->sched, tgl_swsb_regdist(2));
bblock_t *else_body = v->cfg->blocks[2];
fs_inst *rol = instruction(else_body, 0);
EXPECT_EQ(rol->opcode, BRW_OPCODE_ROL);
EXPECT_EQ(rol->sched, tgl_swsb_regdist(2));
bblock_t *last_block = v->cfg->blocks[3];
fs_inst *mul = instruction(last_block, 1);
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
}
TEST_F(scoreboard_test, conditional6)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.XOR( x, g[1], g[2]);
bld.emit(BRW_OPCODE_IF);
bld.XOR(g[3], g[1], g[2]);
bld.XOR(g[4], g[1], g[2]);
bld.XOR(g[5], g[1], g[2]);
bld.ADD( x, g[1], g[2]);
bld.emit(BRW_OPCODE_ELSE);
bld.XOR(g[6], g[1], g[2]);
bld.XOR(g[7], g[1], g[2]);
bld.XOR(g[8], g[1], g[2]);
bld.XOR(g[9], g[1], g[2]);
bld.ROL( x, g[1], g[2]);
bld.emit(BRW_OPCODE_ENDIF);
bld.MUL( x, g[1], g[2]);
v->calculate_cfg();
lower_scoreboard(v);
bblock_t *then_body = v->cfg->blocks[1];
fs_inst *add = instruction(then_body, 3);
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
EXPECT_EQ(add->sched, tgl_swsb_regdist(5));
bblock_t *else_body = v->cfg->blocks[2];
fs_inst *rol = instruction(else_body, 4);
EXPECT_EQ(rol->opcode, BRW_OPCODE_ROL);
EXPECT_EQ(rol->sched, tgl_swsb_regdist(6));
bblock_t *last_block = v->cfg->blocks[3];
fs_inst *mul = instruction(last_block, 1);
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
}
TEST_F(scoreboard_test, conditional7)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.XOR( x, g[1], g[2]);
bld.emit(BRW_OPCODE_IF);
bld.ADD( x, g[1], g[2]);
bld.XOR(g[3], g[1], g[2]);
bld.XOR(g[4], g[1], g[2]);
bld.XOR(g[5], g[1], g[2]);
bld.emit(BRW_OPCODE_ELSE);
bld.ROL( x, g[1], g[2]);
bld.XOR(g[6], g[1], g[2]);
bld.XOR(g[7], g[1], g[2]);
bld.XOR(g[8], g[1], g[2]);
bld.XOR(g[9], g[1], g[2]);
bld.emit(BRW_OPCODE_ENDIF);
bld.MUL( x, g[1], g[2]);
v->calculate_cfg();
lower_scoreboard(v);
bblock_t *then_body = v->cfg->blocks[1];
fs_inst *add = instruction(then_body, 0);
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
EXPECT_EQ(add->sched, tgl_swsb_regdist(2));
bblock_t *else_body = v->cfg->blocks[2];
fs_inst *rol = instruction(else_body, 0);
EXPECT_EQ(rol->opcode, BRW_OPCODE_ROL);
EXPECT_EQ(rol->sched, tgl_swsb_regdist(2));
bblock_t *last_block = v->cfg->blocks[3];
fs_inst *mul = instruction(last_block, 1);
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
EXPECT_EQ(mul->sched, tgl_swsb_regdist(6));
}
TEST_F(scoreboard_test, conditional8)
{
fs_reg g[16];
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
g[i] = v->vgrf(glsl_int_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.XOR( x, g[1], g[2]);
bld.XOR(g[3], g[1], g[2]);
bld.XOR(g[4], g[1], g[2]);
bld.XOR(g[5], g[1], g[2]);
bld.XOR(g[6], g[1], g[2]);
bld.XOR(g[7], g[1], g[2]);
bld.emit(BRW_OPCODE_IF);
bld.ADD( x, g[1], g[2]);
bld.emit(BRW_OPCODE_ELSE);
bld.ROL( x, g[1], g[2]);
bld.emit(BRW_OPCODE_ENDIF);
bld.MUL( x, g[1], g[2]);
v->calculate_cfg();
lower_scoreboard(v);
bblock_t *then_body = v->cfg->blocks[1];
fs_inst *add = instruction(then_body, 0);
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
EXPECT_EQ(add->sched, tgl_swsb_regdist(7));
/* Note that the ROL will have RegDist 2 and not 7, illustrating the
* physical CFG edge between the then-block and the else-block.
*/
bblock_t *else_body = v->cfg->blocks[2];
fs_inst *rol = instruction(else_body, 0);
EXPECT_EQ(rol->opcode, BRW_OPCODE_ROL);
EXPECT_EQ(rol->sched, tgl_swsb_regdist(2));
bblock_t *last_block = v->cfg->blocks[3];
fs_inst *mul = instruction(last_block, 1);
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
}
TEST_F(scoreboard_test, gfx125_RaR_over_different_pipes)
{
devinfo->verx10 = 125;
brw_init_isa_info(&compiler->isa, devinfo);
fs_reg a = v->vgrf(glsl_int_type());
fs_reg b = v->vgrf(glsl_int_type());
fs_reg f = v->vgrf(glsl_float_type());
fs_reg x = v->vgrf(glsl_int_type());
bld.ADD(f, x, x);
bld.ADD(a, x, x);
bld.ADD(x, b, b);
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
lower_scoreboard(v);
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_regdist(1));
}

View file

@ -1,7 +0,0 @@
and(1) cr0<1>UD cr0<0,1,0>UD 0xfffffb7fUD { align1 1N switch };
and(1) cr0<1>UD cr0<0,1,0>UD 0xffffff7fUD { align1 1N switch };
and(1) cr0<1>UD cr0<0,1,0>UD 0xffffffcfUD { align1 1N switch };
and(1) cr0<1>UD cr0<0,1,0>UD 0xfffffbffUD { align1 1N switch };
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000400UD { align1 1N switch };
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000030UD { align1 1N switch };
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000080UD { align1 1N switch };

View file

@ -1,7 +0,0 @@
05 80 00 00 00 00 00 30 00 10 00 06 7f fb ff ff
05 80 00 00 00 00 00 30 00 10 00 06 7f ff ff ff
05 80 00 00 00 00 00 30 00 10 00 06 cf ff ff ff
05 80 00 00 00 00 00 30 00 10 00 06 ff fb ff ff
06 80 00 00 00 00 00 30 00 10 00 06 00 04 00 00
06 80 00 00 00 00 00 30 00 10 00 06 30 00 00 00
06 80 00 00 00 00 00 30 00 10 00 06 80 00 00 00

View file

@ -1 +0,0 @@
rol(16) g3<1>UD g2<0,1,0>UD g2.1<0,1,0>UD { align1 1H };

View file

@ -1 +0,0 @@
0f 00 80 00 08 02 60 20 40 00 00 02 44 00 00 00

View file

@ -1 +0,0 @@
ror(16) g3<1>UD g2<0,1,0>UD g2.1<0,1,0>UD { align1 1H };

View file

@ -1 +0,0 @@
0e 00 80 00 08 02 60 20 40 00 00 02 44 00 00 00

View file

@ -1,7 +0,0 @@
add3(8) g118<1>D -g117<8,8,1>D g114<8,8,1>D g115<1,1,1>D { align1 1Q I@2 };
add3(16) g55<1>D g50<8,8,1>D g46<8,8,1>D -g53<1,1,1>D { align1 1H @2 $5.dst };
add3(16) g111<1>D -g40<8,8,1>D -g88<8,8,1>D g111<1,1,1>D { align1 1H I@1 };
add3(16) g49<1>D 0x0008UW g47<8,8,1>D g26<1,1,1>D { align1 1H I@4 };
add3(16) g55<1>D 0x0008UW g53<8,8,1>D g65<1,1,1>D { align1 2H I@3 };
add3(8) g57<1>D g52<8,8,1>D (abs)g48<8,8,1>D (abs)g59<1,1,1>D { align1 1Q I@4 };
add3(16) g51<1>D g63<8,8,1>D -g122<8,8,1>D (abs)g27<1,1,1>D { align1 1H I@7 };

View file

@ -1,7 +0,0 @@
52 1a 03 00 68 2e 04 76 05 75 0e 0e 05 72 05 73
52 a5 04 00 68 0e 04 37 05 32 2e 0e 05 2e 05 35
52 19 04 00 68 2e 04 6f 05 28 8e 0e 05 58 05 6f
52 1c 04 00 60 41 04 31 08 00 0e 0e 05 2f 05 1a
52 1b 24 00 60 41 04 37 08 00 0e 0e 05 35 05 41
52 1c 03 00 68 0e 04 39 05 34 5e 0e 05 30 05 3b
52 1f 04 00 68 0e 04 33 05 3f 9e 0e 05 7a 05 1b

View file

@ -1,30 +0,0 @@
(+f0.0.any8h) send(1) g57UD g58UD nullUD 0x6210c500 0x02000000
ugm MsgDesc: ( load, a32, d32, V8, transpose, L1STATE_L3MOCS dst_len = 1, src0_len = 1, src1_len = 0 bti ) BTI 2 base_offset 0 { align1 WE_all 1N $5 };
(+f0.0.any8h) send(1) g28UD g29UD nullUD 0x6210c500 0x02000000
ugm MsgDesc: ( load, a32, d32, V8, transpose, L1STATE_L3MOCS dst_len = 1, src0_len = 1, src1_len = 0 bti ) BTI 2 base_offset 0 { align1 WE_all 1N $2 };
(+f0.0.any32h) send(1) g57UD g58UD nullUD 0x6210c500 0x02000000
ugm MsgDesc: ( load, a32, d32, V8, transpose, L1STATE_L3MOCS dst_len = 1, src0_len = 1, src1_len = 0 bti ) BTI 2 base_offset 0 { align1 WE_all 1N $0 };
send(8) nullUD g79UD g10UD 0x6200f506 0x04000100
ugm MsgDesc: ( store_cmask, a32, d32, xyzw, L1STATE_L3MOCS dst_len = 0, src0_len = 1, src1_len = 4 bti ) BTI 4 base_offset 0 { align1 1Q $0 };
send(16) nullUD g9UD g7UD 0x44000504 a0.1<0>UD
ugm MsgDesc: ( store, a32, d32, V1, L1STATE_L3MOCS dst_len = 0, src0_len = 2, src1_len = 0 ss ) surface_state_index 0 { align1 1H @1 $0 };
send(1) g4UD g0UD nullUD 0x0210151f 0x00000000
tgm MsgDesc: ( fence, a32, tile, evict, normal_routing dst_len = 1, src0_len = 1, src1_len = 0 flat ) base_offset 0 { align1 WE_all 1N $3 };
send(8) nullUD g36UD g37UD 0x02000b04 0x00000040
slm MsgDesc: ( store, a32, d16u32, V1, L1STATE_L3MOCS dst_len = 0, src0_len = 1, src1_len = 1 flat ) base_offset 0 { align1 1Q $1 };
send(8) nullUD g34UD g35UD 0x02000b04 0x00000040
slm MsgDesc: ( store, a32, d16u32, V1, L1STATE_L3MOCS dst_len = 0, src0_len = 1, src1_len = 1 flat ) base_offset 0 { align1 1Q $0 };
send(8) nullUD g6UD g7UD 0x0200f506 0x00000100
slm MsgDesc: ( store_cmask, a32, d32, xyzw, L1STATE_L3MOCS dst_len = 0, src0_len = 1, src1_len = 4 flat ) base_offset 0 { align1 1Q $6 };
send(16) nullUD g82UD g91UD 0x04040519 0x00000080
slm MsgDesc: ( atomic_or, a32, d32, V1, L1UC_L3WB dst_len = 0, src0_len = 2, src1_len = 2 flat ) base_offset 0 { align1 2H $0 };
send(1) g10UD g0UD nullUD 0x0210011f 0x00000000
slm MsgDesc: ( fence, a32, threadgroup, none, normal_routing dst_len = 1, src0_len = 1, src1_len = 0 flat ) base_offset 0 { align1 WE_all 1N $1 };
send(1) g23UD g117UD nullUD 0x2210c500 a0.1<0>UD
ugm MsgDesc: ( load, a32, d32, V8, transpose, L1STATE_L3MOCS dst_len = 1, src0_len = 1, bss ) src1_len = 0 ex_bso surface_state_index 0 { align1 WE_all 1N @1 $10 };
send(8) nullUD g14UD g24UD 0x040350fc a0.1<0>UD
dp data 1 MsgDesc: (DC typed surface write, Surface = 252, SIMD16, Mask = 0x0) src1_len = 4 ex_bso mlen 2 rlen 0 { align1 1Q @1 $5 };
send(8) nullUD g51UD g52UD 0x02000000 0x00000040
rt accel MsgDesc: SIMD8, mlen 1 ex_mlen 1 rlen 0 { align1 1Q $2 };
send(16) nullUD g88UD g98UD 0x02000100 0x00000080
rt accel MsgDesc: SIMD16, mlen 1 ex_mlen 2 rlen 0 { align1 1H $6 };

View file

@ -1,15 +0,0 @@
31 45 00 88 00 00 0c 39 8e 3a 00 fa 00 00 30 04
31 42 00 88 00 00 0c 1c 8e 1d 00 fa 00 00 30 04
31 40 00 8c 00 00 0c 39 8e 3a 00 fa 00 00 30 04
31 40 03 00 00 00 00 00 8c 4f 0c fa 25 0a 3c 04
31 90 04 00 00 01 02 00 14 09 08 fa 04 07 00 04
31 43 00 80 00 00 0c 04 0c 00 3e da 00 00 04 00
31 41 03 00 00 00 00 00 0c 24 08 e6 0c 25 02 00
31 40 03 00 00 00 00 00 0c 22 08 e6 0c 23 02 00
31 46 03 00 00 00 00 00 0c 06 0c ea 24 07 3c 00
31 40 24 00 00 00 00 00 14 52 32 ea 14 5b 00 01
31 41 00 80 00 00 0c 0a 0c 00 3e e2 00 00 00 00
31 9a 00 80 80 01 0e 17 8c 75 00 fa 00 00 30 00
31 95 03 00 80 01 02 00 14 0e f8 c1 24 18 d4 00
31 42 03 00 00 00 00 00 0c 33 00 80 0c 34 00 00
31 46 04 00 00 00 00 00 0c 58 00 82 14 62 00 00

View file

@ -1,23 +0,0 @@
mul(8) g37<1>D g99<8,8,1>D g36<16,8,2>UW { align1 1Q I@1 };
mul(8) g37<1>D g99<8,8,1>D g36<16,8,2>UW { align1 1Q I@2 };
mul(8) g37<1>D g99<8,8,1>D g36<16,8,2>UW { align1 1Q I@3 };
mul(8) g37<1>D g99<8,8,1>D g36<16,8,2>UW { align1 1Q I@4 };
mul(8) g37<1>D g99<8,8,1>D g36<16,8,2>UW { align1 1Q I@5 };
mul(8) g37<1>D g99<8,8,1>D g36<16,8,2>UW { align1 1Q I@6 };
mul(8) g37<1>D g99<8,8,1>D g36<16,8,2>UW { align1 1Q I@7 };
mov(8) g36<1>UD g35<8,8,1>F { align1 1Q F@1 };
mov(8) g36<1>UD g35<8,8,1>F { align1 1Q F@2 };
mov(8) g36<1>UD g35<8,8,1>F { align1 1Q F@3 };
mov(8) g36<1>UD g35<8,8,1>F { align1 1Q F@4 };
mov(8) g36<1>UD g35<8,8,1>F { align1 1Q F@5 };
mov(8) g36<1>UD g35<8,8,1>F { align1 1Q F@6 };
mov(8) g36<1>UD g35<8,8,1>F { align1 1Q F@7 };
add(1) a0<1>UD a0<0,1,0>UD 0x00000800UD { align1 WE_all 1N A@1 };
add(1) a0<1>UD a0<0,1,0>UD 0x00000800UD { align1 WE_all 1N A@2 };
add(1) a0<1>UD a0<0,1,0>UD 0x00000800UD { align1 WE_all 1N A@3 };
add(1) a0<1>UD a0<0,1,0>UD 0x00000800UD { align1 WE_all 1N A@4 };
add(1) a0<1>UD a0<0,1,0>UD 0x00000800UD { align1 WE_all 1N A@5 };
add(1) a0<1>UD a0<0,1,0>UD 0x00000800UD { align1 WE_all 1N A@6 };
add(1) a0<1>UD a0<0,1,0>UD 0x00000800UD { align1 WE_all 1N A@7 };

View file

@ -1,21 +0,0 @@
41 19 03 00 60 06 05 25 05 63 46 01 06 24 56 00
41 1a 03 00 60 06 05 25 05 63 46 01 06 24 56 00
41 1b 03 00 60 06 05 25 05 63 46 01 06 24 56 00
41 1c 03 00 60 06 05 25 05 63 46 01 06 24 56 00
41 1d 03 00 60 06 05 25 05 63 46 01 06 24 56 00
41 1e 03 00 60 06 05 25 05 63 46 01 06 24 56 00
41 1f 03 00 60 06 05 25 05 63 46 01 06 24 56 00
61 11 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
61 12 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
61 13 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
61 14 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
61 15 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
61 16 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
61 17 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
40 09 00 80 20 82 01 10 00 10 00 02 00 08 00 00
40 0a 00 80 20 82 01 10 00 10 00 02 00 08 00 00
40 0b 00 80 20 82 01 10 00 10 00 02 00 08 00 00
40 0c 00 80 20 82 01 10 00 10 00 02 00 08 00 00
40 0d 00 80 20 82 01 10 00 10 00 02 00 08 00 00
40 0e 00 80 20 82 01 10 00 10 00 02 00 08 00 00
40 0f 00 80 20 82 01 10 00 10 00 02 00 08 00 00

View file

@ -1,33 +0,0 @@
dp4a(8) g10<1>D g2<8,8,1>D g6<8,8,1>D g7<1,1,1>D { align1 1Q @1 };
dp4a(8) g10<1>D g2<8,8,1>D g6<8,8,1>D g7<1,1,1>UD { align1 1Q @1 };
dp4a(8) g10<1>D g2<8,8,1>D g8<8,8,1>D g9<1,1,1>D { align1 1Q @1 };
dp4a(8) g10<1>D g2<8,8,1>D g8<8,8,1>D g9<1,1,1>UD { align1 1Q @1 };
dp4a(8) g10<1>UD g2<8,8,1>UD g6<8,8,1>UD g7<1,1,1>UD { align1 1Q @1 };
dp4a(8) g10<1>UD g2<8,8,1>UD g8<8,8,1>UD g9<1,1,1>UD { align1 1Q @1 };
dp4a(8) g5<1>D g2<8,8,1>D g3<8,8,1>D g4<1,1,1>D { align1 1Q @3 $0.dst };
dp4a(8) g5<1>D g2<8,8,1>D g3<8,8,1>D g4<1,1,1>UD { align1 1Q @3 $0.dst };
dp4a(8) g5<1>UD g2<8,8,1>UD g3<8,8,1>UD g4<1,1,1>UD { align1 1Q @3 $0.dst };
dp4a(8) g6<1>D g2<8,8,1>D g3<8,8,1>D g4<1,1,1>D { align1 1Q @4 $1.dst };
dp4a(8) g6<1>D g2<8,8,1>D g3<8,8,1>D g4<1,1,1>UD { align1 1Q @4 $1.dst };
dp4a(8) g6<1>D g2<8,8,1>D g4<8,8,1>D g5<1,1,1>D { align1 1Q @4 $0.dst };
dp4a(8) g6<1>D g2<8,8,1>D g4<8,8,1>D g5<1,1,1>UD { align1 1Q @4 $0.dst };
dp4a(8) g6<1>UD g2<8,8,1>UD g3<8,8,1>UD g4<1,1,1>UD { align1 1Q @4 $1.dst };
dp4a(8) g6<1>UD g2<8,8,1>UD g4<8,8,1>UD g5<1,1,1>UD { align1 1Q @4 $0.dst };
dp4a(8) g7<1>D g2<8,8,1>D g5<8,8,1>D g6<1,1,1>D { align1 1Q @1 };
dp4a(8) g7<1>D g2<8,8,1>D g5<8,8,1>D g6<1,1,1>UD { align1 1Q @1 };
dp4a(8) g7<1>UD g2<8,8,1>UD g5<8,8,1>UD g6<1,1,1>UD { align1 1Q @1 };
dp4a(8) g8<1>D g2<8,8,1>D g4<8,8,1>D g5<1,1,1>D { align1 1Q @3 $0.dst };
dp4a(8) g8<1>D g2<8,8,1>D g4<8,8,1>D g5<1,1,1>D { align1 1Q @4 $0.dst };
dp4a(8) g8<1>D g2<8,8,1>D g4<8,8,1>D g5<1,1,1>UD { align1 1Q @3 $0.dst };
dp4a(8) g8<1>D g2<8,8,1>D g4<8,8,1>D g5<1,1,1>UD { align1 1Q @4 $0.dst };
dp4a(8) g8<1>D g2<8,8,1>D g6<8,8,1>D g7<1,1,1>D { align1 1Q @1 };
dp4a(8) g8<1>D g2<8,8,1>D g6<8,8,1>D g7<1,1,1>UD { align1 1Q @1 };
dp4a(8) g8<1>UD g2<8,8,1>UD g4<8,8,1>UD g5<1,1,1>UD { align1 1Q @3 $0.dst };
dp4a(8) g8<1>UD g2<8,8,1>UD g4<8,8,1>UD g5<1,1,1>UD { align1 1Q @4 $0.dst };
dp4a(8) g8<1>UD g2<8,8,1>UD g6<8,8,1>UD g7<1,1,1>UD { align1 1Q @1 };
dp4a.sat(8) g10<1>D g5<8,8,1>D g6<8,8,1>D g7<1,1,1>D { align1 1Q @1 $2.dst };
dp4a.sat(8) g10<1>D g5<8,8,1>D g6<8,8,1>D g7<1,1,1>UD { align1 1Q @1 $2.dst };
dp4a.sat(8) g10<1>UD g5<8,8,1>UD g6<8,8,1>UD g7<1,1,1>UD { align1 1Q @1 $2.dst };
dp4a.sat(8) g8<1>D g5<8,8,1>D g3<8,8,1>D g4<1,1,1>D { align1 1Q $2.dst };
dp4a.sat(8) g8<1>D g5<8,8,1>D g3<8,8,1>D g4<1,1,1>UD { align1 1Q $2.dst };
dp4a.sat(8) g8<1>UD g5<8,8,1>UD g3<8,8,1>UD g4<1,1,1>UD { align1 1Q $2.dst };

View file

@ -1,33 +0,0 @@
58 01 03 00 68 0e 04 0a 05 02 0e 0e 05 06 05 07
58 01 03 00 68 0e 04 0a 05 02 0a 0e 05 06 05 07
58 01 03 00 68 0e 04 0a 05 02 0e 0e 05 08 05 09
58 01 03 00 68 0e 04 0a 05 02 0a 0e 05 08 05 09
58 01 03 00 28 0a 04 0a 05 02 0a 0a 05 06 05 07
58 01 03 00 28 0a 04 0a 05 02 0a 0a 05 08 05 09
58 b0 03 00 68 0e 04 05 05 02 0e 0e 05 03 05 04
58 b0 03 00 68 0e 04 05 05 02 0a 0e 05 03 05 04
58 b0 03 00 28 0a 04 05 05 02 0a 0a 05 03 05 04
58 c1 03 00 68 0e 04 06 05 02 0e 0e 05 03 05 04
58 c1 03 00 68 0e 04 06 05 02 0a 0e 05 03 05 04
58 c0 03 00 68 0e 04 06 05 02 0e 0e 05 04 05 05
58 c0 03 00 68 0e 04 06 05 02 0a 0e 05 04 05 05
58 c1 03 00 28 0a 04 06 05 02 0a 0a 05 03 05 04
58 c0 03 00 28 0a 04 06 05 02 0a 0a 05 04 05 05
58 01 03 00 68 0e 04 07 05 02 0e 0e 05 05 05 06
58 01 03 00 68 0e 04 07 05 02 0a 0e 05 05 05 06
58 01 03 00 28 0a 04 07 05 02 0a 0a 05 05 05 06
58 b0 03 00 68 0e 04 08 05 02 0e 0e 05 04 05 05
58 c0 03 00 68 0e 04 08 05 02 0e 0e 05 04 05 05
58 b0 03 00 68 0e 04 08 05 02 0a 0e 05 04 05 05
58 c0 03 00 68 0e 04 08 05 02 0a 0e 05 04 05 05
58 01 03 00 68 0e 04 08 05 02 0e 0e 05 06 05 07
58 01 03 00 68 0e 04 08 05 02 0a 0e 05 06 05 07
58 b0 03 00 28 0a 04 08 05 02 0a 0a 05 04 05 05
58 c0 03 00 28 0a 04 08 05 02 0a 0a 05 04 05 05
58 01 03 00 28 0a 04 08 05 02 0a 0a 05 06 05 07
58 92 03 00 6c 0e 04 0a 05 05 0e 0e 05 06 05 07
58 92 03 00 6c 0e 04 0a 05 05 0a 0e 05 06 05 07
58 92 03 00 2c 0a 04 0a 05 05 0a 0a 05 06 05 07
58 22 03 00 6c 0e 04 08 05 05 0e 0e 05 03 05 04
58 22 03 00 6c 0e 04 08 05 05 0a 0e 05 03 05 04
58 22 03 00 2c 0a 04 08 05 05 0a 0a 05 03 05 04

View file

@ -1,43 +0,0 @@
send(16) g113UD g12UD nullUD a0<0>UD 0x00000000
dp data 1 MsgDesc: indirect ex_mlen 0 { align1 1H @1 $6 };
(+f1.0) send(16) nullUD g15UD g17UD a0<0>UD 0x00000080
dp data 1 MsgDesc: indirect ex_mlen 2 { align1 1H @1 $4 };
send(8) g104UD g119UD nullUD 0x04116e13 0x00000000
dp data 1 MsgDesc: (DC typed surface read, Surface = 19, SIMD8, Mask = 0xe) mlen 2 ex_mlen 0 rlen 1 { align1 2Q $8 };
send(8) nullUD g92UD g117UD 0x020350fc a0.1<0>UD
dp data 1 MsgDesc: (DC typed surface write, Surface = 252, SIMD16, Mask = 0x0) mlen 1 rlen 0 { align1 1Q @1 $8 };
(+f0.0.any8h) send(8) g55UD g118UD nullUD 0x02184201 0x00000000
data MsgDesc: (DC unaligned OWORD block read, bti 1, 2) mlen 1 ex_mlen 0 rlen 1 { align1 WE_all 1Q @3 $9 };
send(8) nullUD g126UD nullUD 0x02000000 0x00000000
thread_spawner MsgDesc: mlen 1 ex_mlen 0 rlen 0 { align1 WE_all 1Q @1 EOT };
send(8) g18UD g24UD nullUD 0x04115e10 0x00000000
dp data 1 MsgDesc: (DC typed surface read, Surface = 16, SIMD16, Mask = 0xe) mlen 2 ex_mlen 0 rlen 1 { align1 1Q $1 };
send(8) g19UD g28UD nullUD 0x04116e10 0x00000000
dp data 1 MsgDesc: (DC typed surface read, Surface = 16, SIMD8, Mask = 0xe) mlen 2 ex_mlen 0 rlen 1 { align1 2Q @7 $2 };
send(16) g50UD g36UD nullUD a0<0>UD 0x00000000
sampler MsgDesc: indirect ex_mlen 0 { align1 1H @1 $3 };
send(8) nullUD g25UD g21UD 0x02035001 0x00000100
dp data 1 MsgDesc: (DC typed surface write, Surface = 1, SIMD16, Mask = 0x0) mlen 1 ex_mlen 4 rlen 0 { align1 1Q $9 };
send(8) g5UD g25UD nullUD 0x02415001 0x00000000
dp data 1 MsgDesc: (DC typed surface read, Surface = 1, SIMD16, Mask = 0x0) mlen 1 ex_mlen 0 rlen 4 { align1 1Q $10 };
send(8) g27UD g35UD nullUD 0x04146efd 0x00000000
dp data 1 MsgDesc: (DC A64 untyped surface read, Surface = 253, SIMD8, Mask = 0xe) mlen 2 ex_mlen 0 rlen 1 { align1 1Q @1 $0 };
send(8) nullUD g36UD g38UD 0x04035001 0x00000100
dp data 1 MsgDesc: (DC typed surface write, Surface = 1, SIMD16, Mask = 0x0) mlen 2 ex_mlen 4 rlen 0 { align1 1Q @1 $1 };
send(8) nullUD g126UD g118UD 0x02080007 0x00000200
urb MsgDesc: offset 0 SIMD8 write mlen 1 ex_mlen 8 rlen 0 { align1 1Q @1 EOT };
send(8) g14UD g37UD nullUD 0x02110401 0x00000000
data MsgDesc: (DC byte scattered read, bti 1, 4) mlen 1 ex_mlen 0 rlen 1 { align1 1Q @1 $0 };
send(1) g100UD g0UD nullUD 0x0219e000 0x00000000
data MsgDesc: (DC mfence, bti 0, 32) mlen 1 ex_mlen 0 rlen 1 { align1 WE_all 1N $1 };
send(1) g15UD g0UD nullUD 0x0219e000 0x00000000
data MsgDesc: (DC mfence, bti 0, 32) mlen 1 ex_mlen 0 rlen 1 { align1 WE_all 1N $5 };
sendc(16) nullUD g119UD nullUD 0x10031000 0x00000000
render MsgDesc: RT write SIMD16 LastRT Surface = 0 mlen 8 ex_mlen 0 rlen 0 { align1 1H @1 EOT };
sendc(8) nullUD g125UD g123UD 0x04031400 0x00000080
render MsgDesc: RT write SIMD8 LastRT Surface = 0 mlen 2 ex_mlen 2 rlen 0 { align1 1Q @1 EOT };
sendc(16) nullUD g119UD nullUD 0x10031000 0x00000000
render MsgDesc: RT write SIMD16 LastRT Surface = 0 mlen 8 ex_mlen 0 rlen 0 { align1 1H @1 EOT };
sendc(16) nullUD g123UD g119UD 0x08031000 0x00000100
render MsgDesc: RT write SIMD16 LastRT Surface = 0 mlen 4 ex_mlen 4 rlen 0 { align1 1H @1 EOT };

View file

@ -1,21 +0,0 @@
31 96 04 00 00 00 05 71 04 0c 00 c0 00 00 00 00
31 94 84 01 00 00 01 00 04 0f 00 c0 14 11 00 00
31 48 13 00 00 00 0c 68 14 77 26 cc 00 00 5a 00
31 98 03 00 00 01 02 00 0c 5c f8 c1 04 75 d4 00
31 b9 03 88 00 00 0c 37 0c 76 02 a4 00 00 10 02
31 01 03 80 04 00 00 00 0c 7e 00 70 00 00 00 00
31 41 03 00 00 00 0c 12 14 18 20 cc 00 00 56 00
31 f2 13 00 00 00 0c 13 14 1c 20 cc 00 00 5a 00
31 93 04 00 00 00 05 32 04 24 00 20 00 00 00 00
31 49 03 00 00 00 00 00 0c 19 02 c0 24 15 d4 00
31 4a 03 00 00 00 24 05 0c 19 02 c0 00 00 54 00
31 90 03 00 00 00 0c 1b 14 23 fa cd 00 00 1a 01
31 91 03 00 00 00 00 00 14 24 02 c0 24 26 d4 00
31 01 03 00 04 00 00 00 0c 7e 0e 60 44 76 00 02
31 90 03 00 00 00 0c 0e 0c 25 02 a8 00 00 40 00
31 41 00 80 00 00 0c 64 0c 00 00 a0 00 00 78 02
31 45 00 80 00 00 0c 0f 0c 00 00 a0 00 00 78 02
32 01 04 00 04 00 00 00 44 77 00 50 00 00 c4 00
32 01 03 00 04 00 00 00 14 7d 00 58 14 7b c4 00
32 01 04 00 04 00 00 00 44 77 00 50 00 00 c4 00
32 01 04 00 04 00 00 00 24 7b 00 50 24 77 c4 00

View file

@ -1,40 +0,0 @@
cmp.l.f0.0(8) g55<1>UD g54<8,8,1>UD 0x00000290UD { align1 1Q @1 };
mov(16) g6<1>D g20<8,8,1>W { align1 2H @2 };
add(16) g122<1>F g98<8,8,1>F (abs)g102<8,8,1>F { align1 1H @3 };
shl(8) g75<1>D g122<8,8,1>D 0x00000002UD { align1 1Q @4 };
sel.l(4) g90.4<1>D g90.3<0,1,0>D g90.4<4,4,1>D { align1 WE_all 1N @5 };
and(16) g58<1>UD g16<8,8,1>UD g56<8,8,1>UD { align1 1H @6 };
or.nz.f0.0(16) null<1>UD g105<8,8,1>UD g103<8,8,1>UD { align1 1H @7 };
math cos(16) g17<1>F g15<8,8,1>F null<8,8,1>F { align1 1H @1 $0 };
math exp(16) g1<1>F g29<8,8,1>F null<8,8,1>F { align1 1H @5 $2 };
math sqrt(8) g9<1>HF g6<8,8,1>HF null<8,8,1>F { align1 1Q @1 $3 };
math intdiv(8) g103<1>D g101<8,8,1>D g35<8,8,1>D { align1 1Q @4 $4 };
math intmod(8) g101<1>D g97<8,8,1>D g76<8,8,1>D { align1 2Q @2 $5 };
math inv(16) g10<1>F g8<8,8,1>F null<8,8,1>F { align1 2H @2 $6 };
math log(16) g102<1>F g100<8,8,1>F null<8,8,1>F { align1 2H @1 $7 };
math rsq(16) g76<1>F g74<8,8,1>F null<8,8,1>F { align1 1H @7 $8 };
math sin(16) g123<1>F g121<8,8,1>F null<8,8,1>F { align1 1H @4 $9 };
math sqrt(16) g43<1>F g47<8,8,1>F null<8,8,1>F { align1 2H @7 $10 };
math cos(8) g103<1>HF g98<8,8,1>HF null<8,8,1>F { align1 1Q @3 $11 };
math exp(8) g54<1>HF g52<8,8,1>HF null<8,8,1>F { align1 1Q @1 $12 };
math intdiv(8) g35<1>D g31<8,8,1>D g33<8,8,1>D { align1 4Q @2 $13 };
math intmod(8) g101<1>D g97<8,8,1>D g99<8,8,1>D { align1 2Q @4 $14 };
math inv(8) g102<1>HF g92<8,8,1>HF null<8,8,1>F { align1 1Q @6 $15 };
sel.ge(16) g7<1>UW g7<16,16,1>UW g89<16,8,2>UW { align1 1H @7 $0.dst };
mov(16) a0<1>UW 0x03e0UW { align1 WE_all 1H @3 $1.dst };
add(16) g100<1>D g102<8,8,1>D -2114D { align1 1H @3 $2.dst };
add(16) g100<1>D g105<8,8,1>D (abs)g18<8,8,1>D { align1 1H @3 $3.dst };
add(16) g36<1>D g36<8,8,1>D g106<8,8,1>D { align1 1H @7 $4.dst };
and(16) g49<1>UD g45<8,8,1>UD g47<8,8,1>UD { align1 1H @3 $5.dst };
asr(16) g102<2>W g41<16,8,2>W g28<8,8,1>UD { align1 2H @6 $6.dst };
cmp.l.f0.0(8) g97<1>F (abs)g96<8,8,1>F 0x3d4ccccdF /* 0.05F */ { align1 1Q @3 $7.dst };
cmp.nz.f0.0(8) g100<1>F g98<8,8,1>F g99<8,8,1>F { align1 1Q @1 $8.dst };
(+f0.0) sel(8) g64<1>D -g15<8,8,1>D g15<8,8,1>D { align1 1Q @1 $9.dst };
mov(16) g15<1>UD g13<8,8,1>D { align1 1H @1 $10.dst };
mul(8) acc0<1>UD g10<8,4,2>UD g101<16,8,2>UW { align1 1Q @7 $11.dst };
or(16) g51<1>UW g51<16,16,1>UW g75<16,8,2>UW { align1 1H @7 $12.dst };
sel.ge(16) g28<1>W g28<16,16,1>W g92<16,8,2>W { align1 2H @7 $13.dst };
xor(16) g10<1>UD g10<8,8,1>UD g100<8,8,1>UD { align1 1H @7 $14.dst };
and(16) g39<1>UD g35<8,8,1>UD g37<8,8,1>UD { align1 2H @5 $15.dst };

View file

@ -1,38 +0,0 @@
70 01 03 00 20 82 05 37 05 36 46 52 90 02 00 00
61 02 24 00 60 05 05 06 05 14 46 00 00 00 00 00
40 03 04 00 a0 0a 05 7a 05 62 46 0a 05 66 46 01
69 04 03 00 60 86 05 4b 05 7a 46 02 02 00 00 00
62 05 02 80 60 06 85 5a 64 5a 00 56 85 5a 34 00
65 06 04 00 20 02 05 3a 05 10 46 02 05 38 46 00
66 07 04 00 20 02 01 00 05 69 46 22 05 67 46 00
38 90 04 00 a0 0a 05 11 05 0f 46 7a 01 00 46 00
38 d2 04 00 a0 0a 05 01 05 1d 46 3a 01 00 46 00
38 93 03 00 90 09 05 09 05 06 46 4a 01 00 46 00
38 c4 03 00 60 06 05 67 05 65 46 c6 05 23 46 00
38 a5 13 00 60 06 05 65 05 61 46 d6 05 4c 46 00
38 a6 24 00 a0 0a 05 0a 05 08 46 1a 01 00 46 00
38 97 24 00 a0 0a 05 66 05 64 46 2a 01 00 46 00
38 f8 04 00 a0 0a 05 4c 05 4a 46 5a 01 00 46 00
38 c9 04 00 a0 0a 05 7b 05 79 46 6a 01 00 46 00
38 fa 24 00 a0 0a 05 2b 05 2f 46 4a 01 00 46 00
38 bb 03 00 90 09 05 67 05 62 46 7a 01 00 46 00
38 9c 03 00 90 09 05 36 05 34 46 3a 01 00 46 00
38 ad 33 00 60 06 05 23 05 1f 46 c6 05 21 46 00
38 ce 13 00 60 06 05 65 05 61 46 d6 05 63 46 00
38 ef 03 00 90 09 05 66 05 5c 46 1a 01 00 46 00
62 f0 04 00 10 01 05 07 05 07 58 41 06 59 56 00
61 b1 04 80 10 41 01 10 00 00 00 00 e0 03 e0 03
40 b2 04 00 60 86 05 64 05 66 46 06 be f7 ff ff
40 b3 04 00 60 06 05 64 05 69 46 06 05 12 46 01
40 f4 04 00 60 06 05 24 05 24 46 06 05 6a 46 00
65 b5 04 00 20 02 05 31 05 2d 46 02 05 2f 46 00
6c e6 24 00 50 05 06 66 06 29 56 02 05 1c 46 00
70 b7 03 00 a0 9a 05 61 05 60 46 5a cd cc 4c 3d
70 98 03 00 a0 0a 05 64 05 62 46 2a 05 63 46 00
62 99 03 01 60 26 05 40 05 0f 46 06 05 0f 46 00
61 9a 04 00 20 06 05 0f 05 0d 46 00 00 00 00 00
41 fb 03 00 20 02 01 20 06 0a 44 01 06 65 56 00
66 fc 04 00 10 01 05 33 05 33 58 01 06 4b 56 00
62 fd 24 00 50 05 05 1c 05 1c 58 45 06 5c 56 00
67 fe 04 00 20 02 05 0a 05 0a 46 02 05 64 46 00
65 df 24 00 20 02 05 27 05 23 46 02 05 25 46 00

View file

@ -1,33 +0,0 @@
sync nop(16) null<0,1,0>UB { align1 WE_all 1H @1 };
sync nop(1) null<0,1,0>UB { align1 WE_all 1N @1 };
sync nop(1) null<0,1,0>UB { align1 WE_all 1N @2 };
sync nop(1) null<0,1,0>UB { align1 WE_all 1N @3 };
sync nop(1) null<0,1,0>UB { align1 WE_all 1N @4 };
sync nop(1) null<0,1,0>UB { align1 WE_all 1N @5 };
sync nop(1) null<0,1,0>UB { align1 WE_all 1N @6 };
sync nop(1) null<0,1,0>UB { align1 WE_all 1N @7 };
sync nop(1) null<0,1,0>UB { align1 WE_all 3N @1 };
sync nop(1) null<0,1,0>UB { align1 WE_all 3N @2 };
sync nop(1) null<0,1,0>UB { align1 WE_all 3N @3 };
sync nop(1) null<0,1,0>UB { align1 WE_all 3N @4 };
sync nop(1) null<0,1,0>UB { align1 WE_all 3N @5 };
sync nop(1) null<0,1,0>UB { align1 WE_all 3N @6 };
sync nop(1) null<0,1,0>UB { align1 WE_all 3N @7 };
sync nop(1) null<0,1,0>UB { align1 WE_all 5N @1 };
sync nop(1) null<0,1,0>UB { align1 WE_all 5N @2 };
sync nop(1) null<0,1,0>UB { align1 WE_all 5N @3 };
sync nop(1) null<0,1,0>UB { align1 WE_all 5N @4 };
sync nop(1) null<0,1,0>UB { align1 WE_all 5N @5 };
sync nop(1) null<0,1,0>UB { align1 WE_all 5N @6 };
sync nop(1) null<0,1,0>UB { align1 WE_all 5N @7 };
sync nop(1) null<0,1,0>UB { align1 WE_all 7N @1 };
sync nop(1) null<0,1,0>UB { align1 WE_all 7N @2 };
sync nop(1) null<0,1,0>UB { align1 WE_all 7N @3 };
sync nop(1) null<0,1,0>UB { align1 WE_all 7N @4 };
sync nop(1) null<0,1,0>UB { align1 WE_all 7N @5 };
sync nop(1) null<0,1,0>UB { align1 WE_all 7N @6 };
sync nop(1) null<0,1,0>UB { align1 WE_all 7N @7 };
sync nop(32) null<0,1,0>UB { align1 WE_all @1 };
sync nop(8) null<0,1,0>UB { align1 WE_all 1Q @1 };
sync allwr(16) null<0,1,0>UB { align1 1H };
sync allwr(8) null<0,1,0>UB { align1 1Q };

View file

@ -1,33 +0,0 @@
01 01 04 80 00 00 00 00 00 00 00 00 00 00 00 00
01 01 00 80 00 00 00 00 00 00 00 00 00 00 00 00
01 02 00 80 00 00 00 00 00 00 00 00 00 00 00 00
01 03 00 80 00 00 00 00 00 00 00 00 00 00 00 00
01 04 00 80 00 00 00 00 00 00 00 00 00 00 00 00
01 05 00 80 00 00 00 00 00 00 00 00 00 00 00 00
01 06 00 80 00 00 00 00 00 00 00 00 00 00 00 00
01 07 00 80 00 00 00 00 00 00 00 00 00 00 00 00
01 01 10 80 00 00 00 00 00 00 00 00 00 00 00 00
01 02 10 80 00 00 00 00 00 00 00 00 00 00 00 00
01 03 10 80 00 00 00 00 00 00 00 00 00 00 00 00
01 04 10 80 00 00 00 00 00 00 00 00 00 00 00 00
01 05 10 80 00 00 00 00 00 00 00 00 00 00 00 00
01 06 10 80 00 00 00 00 00 00 00 00 00 00 00 00
01 07 10 80 00 00 00 00 00 00 00 00 00 00 00 00
01 01 20 80 00 00 00 00 00 00 00 00 00 00 00 00
01 02 20 80 00 00 00 00 00 00 00 00 00 00 00 00
01 03 20 80 00 00 00 00 00 00 00 00 00 00 00 00
01 04 20 80 00 00 00 00 00 00 00 00 00 00 00 00
01 05 20 80 00 00 00 00 00 00 00 00 00 00 00 00
01 06 20 80 00 00 00 00 00 00 00 00 00 00 00 00
01 07 20 80 00 00 00 00 00 00 00 00 00 00 00 00
01 01 30 80 00 00 00 00 00 00 00 00 00 00 00 00
01 02 30 80 00 00 00 00 00 00 00 00 00 00 00 00
01 03 30 80 00 00 00 00 00 00 00 00 00 00 00 00
01 04 30 80 00 00 00 00 00 00 00 00 00 00 00 00
01 05 30 80 00 00 00 00 00 00 00 00 00 00 00 00
01 06 30 80 00 00 00 00 00 00 00 00 00 00 00 00
01 07 30 80 00 00 00 00 00 00 00 00 00 00 00 00
01 01 05 80 00 00 00 00 00 00 00 00 00 00 00 00
01 01 03 80 00 00 00 00 00 00 00 00 00 00 00 00
01 00 04 00 00 00 00 00 00 00 00 30 00 00 00 00
01 00 03 00 00 00 00 00 00 00 00 30 00 00 00 00

View file

@ -1,40 +0,0 @@
add(8) g124<1>F g7<8,8,1>D 1D { align1 1Q };
add(16) g120<1>F g11<8,8,1>D 1D { align1 1H };
add(16) g4<1>F g1<0,1,0>F -g1.4<0,1,0>F { align1 1H };
add(8) g3.8<1>UW g3<8,8,1>UW 0x0008UW { align1 WE_all 1Q };
add(16) g3<1>D g18<8,8,1>D g12<8,8,1>D { align1 1H };
add(16) g6<1>UW g1.4<1,4,0>UW 0x11001010V { align1 WE_all 1H };
add(32) g10<1>UW g1.4<1,4,0>UW 0x11001010V { align1 WE_all };
add(8) g2<1>D g96<8,8,1>D -1023D { align1 1Q };
add(8) g4<1>F g5.6<0,1,0>F g7.2<0,1,0>F { align1 1Q };
add(8) g53<1>DF g49<4,4,1>DF g51<4,4,1>DF { align1 1Q };
add.sat(16) g5<1>UD g3<8,8,1>UD 0x00000001UD { align1 1H };
add(1) g125.3<1>UD g0.3<0,1,0>UD g7<0,1,0>UD { align1 WE_all 1N };
add(8) a0<1>UW g34<16,8,2>UW 0x0080UW { align1 1Q };
add(8) g8<1>DF g2<0,1,0>DF g3.2<0,1,0>DF { align1 2Q };
add(16) a0<1>UW g3<16,8,2>UW 0x0040UW { align1 1H };
add.sat.le.f0.0(8) g125<1>F -g6<8,8,1>F 0x3f000000F /* 0.5F */ { align1 1Q };
add.z.f0.0(8) g8<1>F g2<0,1,0>F -g2.4<0,1,0>F { align1 1Q };
add.z.f0.0(16) g3<1>F g2<0,1,0>F -g2.1<0,1,0>F { align1 1H };
add(8) g3<1>UD g2<8,8,1>UD 0xffffffffUD { align1 1Q };
(+f0.0) add(8) g15<1>D -g15<8,8,1>D 31D { align1 1Q };
add(1) a0<1>UD a0<0,1,0>UD 0x00000200UD { align1 WE_all 1N };
add.sat(8) g124<1>F g7<8,8,1>F -g6<8,8,1>F { align1 1Q };
add(8) g8<1>UD g6<8,8,1>D 0x00000001UD { align1 1Q };
add(16) g11<1>UD g9<8,8,1>D 0x00000001UD { align1 1H };
(+f0.0) add(16) g8<1>D -g8<8,8,1>D 31D { align1 1H };
add.sat(16) g126<1>F g2<0,1,0>F g2.4<0,1,0>F { align1 1H };
add.sat(8) g124<1>F g17<8,8,1>D 1D { align1 1Q };
add(16) g114<1>D g118<8,8,1>D g116<8,8,1>D { align1 2H };
add.z.f0.0(16) null<1>D g120<8,8,1>D 1D { align1 1H };
add.z.f0.0(16) null<1>D g116<8,8,1>D 1D { align1 2H };
add.z.f0.0(8) g3<1>D g5<8,8,1>D g4<8,8,1>D { align1 1Q };
add(16) g20<1>UD g17<8,8,1>UD 1D { align1 1H };
add(8) g7<1>F -g6<4>.xyxyF g6<4>.zwzwF { align16 1Q };
add(16) g9<1>F -g7<4>.xyxyF g7<4>.zwzwF { align16 1H };
add(8) g7<1>UD g2<8,8,1>UD -g6<8,8,1>UD { align1 WE_all 1Q };
add.le.f0.0(16) g1<1>D g3.1<0,1,0>D -g6<8,8,1>D { align1 1H };
add.sat(8) g10<1>UD g9<8,8,1>UD 0x00000001UD { align1 1Q };
add(1) g14<1>UD g14<0,1,0>UD 0x00000001UD { align1 WE_all 3N };
add(8) g25<1>Q g22<4,4,1>Q -g24<4,4,1>Q { align1 1Q };
add(8) g12<1>Q g5<4,4,1>Q -g11<4,4,1>Q { align1 2Q };

View file

@ -1,40 +0,0 @@
40 00 60 00 e8 0a 80 2f e0 00 8d 0e 01 00 00 00
40 00 80 00 e8 0a 00 2f 60 01 8d 0e 01 00 00 00
40 00 80 00 e8 3a 80 20 20 00 00 3a 30 40 00 00
40 00 60 00 4c 12 70 20 60 00 8d 16 08 00 08 00
40 00 80 00 28 0a 60 20 40 02 8d 0a 80 01 8d 00
40 00 80 00 4c 12 c0 20 28 00 28 36 10 10 00 11
40 00 a0 00 4c 12 40 21 28 00 28 36 10 10 00 11
40 00 60 00 28 0a 40 20 00 0c 8d 0e 01 fc ff ff
40 00 60 00 e8 3a 80 20 b8 00 00 3a e8 00 00 00
40 00 60 00 c8 32 a0 26 20 06 69 32 60 06 69 00
40 00 80 80 08 02 a0 20 60 00 8d 06 01 00 00 00
40 00 00 00 0c 02 ac 2f 0c 00 00 02 e0 00 00 00
40 00 60 00 40 12 00 22 40 04 ae 16 80 00 80 00
40 10 60 00 c8 32 00 21 40 00 00 32 70 00 00 00
40 00 80 00 40 12 00 22 60 00 ae 16 40 00 40 00
40 00 60 86 e8 3a a0 2f c0 40 8d 3e 00 00 00 3f
40 00 60 01 e8 3a 00 21 40 00 00 3a 50 40 00 00
40 00 80 01 e8 3a 60 20 40 00 00 3a 44 40 00 00
40 00 60 00 08 02 60 20 40 00 8d 06 ff ff ff ff
40 00 61 00 28 0a e0 21 e0 41 8d 0e 1f 00 00 00
40 00 00 00 04 00 00 22 00 02 00 06 00 02 00 00
40 00 60 80 e8 3a 80 2f e0 00 8d 3a c0 40 8d 00
40 00 60 00 08 0a 00 21 c0 00 8d 06 01 00 00 00
40 00 80 00 08 0a 60 21 20 01 8d 06 01 00 00 00
40 00 81 00 28 0a 00 21 00 41 8d 0e 1f 00 00 00
40 00 80 80 e8 3a c0 2f 40 00 00 3a 50 00 00 00
40 00 60 80 e8 0a 80 2f 20 02 8d 0e 01 00 00 00
40 20 80 00 28 0a 40 2e c0 0e 8d 0a 80 0e 8d 00
40 00 80 01 20 0a 00 20 00 0f 8d 0e 01 00 00 00
40 20 80 01 20 0a 00 20 80 0e 8d 0e 01 00 00 00
40 00 60 01 28 0a 60 20 a0 00 8d 0a 80 00 8d 00
40 00 80 00 08 02 80 22 20 02 8d 0e 01 00 00 00
40 01 60 00 e8 3a ef 20 c4 40 64 3a ce 00 6e 00
40 01 80 00 e8 3a 2f 21 e4 40 64 3a ee 00 6e 00
40 00 60 00 0c 02 e0 20 40 00 8d 02 c0 40 8d 00
40 00 80 06 28 0a 20 20 64 00 00 0a c0 40 8d 00
40 00 60 80 08 02 40 21 20 01 8d 06 01 00 00 00
40 10 00 00 0c 02 c0 21 c0 01 00 06 01 00 00 00
40 00 60 00 28 4b 20 23 c0 02 69 4a 00 43 69 00
40 10 60 00 28 4b 80 21 a0 00 69 4a 60 41 69 00

View file

@ -1,29 +0,0 @@
and(8) g3<1>UD g2<0,1,0>UD ~g2.2<0,1,0>D { align1 1Q };
and(16) g3<1>UD g2<0,1,0>UD ~g2.2<0,1,0>D { align1 1H };
and(8) g8<1>UD g0.1<0,1,0>UW 0x07ffUW { align1 1Q };
and(16) g18<1>UD g0.1<0,1,0>UW 0x07ffUW { align1 1H };
and(1) g7<1>UD g5<0,1,0>UD 0x000000f0UD { align1 WE_all 1N };
and.nz.f0.0(8) null<1>UD g36<8,8,1>UD g37<8,8,1>UD { align1 1Q };
and.nz.f0.0(16) null<1>UD g70<8,8,1>UD g72<8,8,1>UD { align1 1H };
and.z.f0.0(16) g21<1>UD g19<8,8,1>UD g17<8,8,1>UD { align1 1H };
and(8) g61<1>UD g79<8,8,1>UD g32.1<8,4,2>UD { align1 2Q };
and(8) g96<1>D ~g94<8,8,1>D ~g95<8,8,1>D { align1 1Q };
and(16) g24<1>D ~g20<8,8,1>D ~g22<8,8,1>D { align1 1H };
and(1) a0<1>UD g4<0,1,0>UD 0x000000ffUD { align1 WE_all 1N };
and(16) g118<1>UD g114<8,8,1>UD 0x0000003fUD { align1 2H };
and(1) g4<1>UD g20<0,1,0>UD 0x000000ffUD { align1 WE_all 3N };
and.z.f0.0(8) null<1>D g13<8,8,1>UD 0x0000001fUD { align1 1Q };
and(8) g21<1>UD g15<8,8,1>UD 0x00000003UD { align1 WE_all 1Q };
and.z.f0.0(8) null<1>UD g20<8,8,1>UD 0x00000001UD { align1 1Q };
and.z.f0.0(16) null<1>UD g45<8,8,1>UD 0x00000001UD { align1 1H };
and(8) g4<1>UW g3<8,8,1>UW 0xfffcUW { align1 1Q };
and(16) g13<1>UW g19<16,8,2>UW 0xfffcUW { align1 1H };
and.nz.f0.0(8) null<1>UD ~g2.2<0,1,0>D g9<8,8,1>UD { align1 1Q };
and(8) g18<1>UD ~g2.2<0,1,0>D g7<8,8,1>UD { align1 1Q };
and.nz.f0.0(16) null<1>UD ~g2.2<0,1,0>D g14<8,8,1>UD { align1 1H };
and(16) g30<1>UD ~g2.2<0,1,0>D g10<8,8,1>UD { align1 1H };
and.nz.f0.0(8) g10<1>UD g9<8,8,1>UD 0x00000001UD { align1 1Q };
and.nz.f0.0(16) g16<1>UD g14<8,8,1>UD 0x00000001UD { align1 1H };
and.z.f0.0(8) g9<1>UD g8<8,8,1>UD 0x00000003UD { align1 1Q };
and(8) g12<1>UQ g9<4,4,1>UQ g11<4,4,1>UQ { align1 1Q };
and(8) g26<1>UQ g18<4,4,1>UQ g22<4,4,1>UQ { align1 2Q };

View file

@ -1,29 +0,0 @@
05 00 60 00 08 02 60 20 40 00 00 0a 48 40 00 00
05 00 80 00 08 02 60 20 40 00 00 0a 48 40 00 00
05 00 60 00 08 12 00 21 02 00 00 16 ff 07 ff 07
05 00 80 00 08 12 40 22 02 00 00 16 ff 07 ff 07
05 00 00 00 0c 02 e0 20 a0 00 00 06 f0 00 00 00
05 00 60 02 00 02 00 20 80 04 8d 02 a0 04 8d 00
05 00 80 02 00 02 00 20 c0 08 8d 02 00 09 8d 00
05 00 80 01 08 02 a0 22 60 02 8d 02 20 02 8d 00
05 10 60 00 08 02 a0 27 e0 09 8d 02 04 04 8a 00
05 00 60 00 28 0a 00 2c c0 4b 8d 0a e0 4b 8d 00
05 00 80 00 28 0a 00 23 80 42 8d 0a c0 42 8d 00
05 00 00 00 04 02 00 22 80 00 00 06 ff 00 00 00
05 20 80 00 08 02 c0 2e 40 0e 8d 06 3f 00 00 00
05 10 00 00 0c 02 80 20 80 02 00 06 ff 00 00 00
05 00 60 01 20 02 00 20 a0 01 8d 06 1f 00 00 00
05 00 60 00 0c 02 a0 22 e0 01 8d 06 03 00 00 00
05 00 60 01 00 02 00 20 80 02 8d 06 01 00 00 00
05 00 80 01 00 02 00 20 a0 05 8d 06 01 00 00 00
05 00 60 00 48 12 80 20 60 00 8d 16 fc ff fc ff
05 00 80 00 48 12 a0 21 60 02 ae 16 fc ff fc ff
05 00 60 02 00 0a 00 20 48 40 00 02 20 01 8d 00
05 00 60 00 08 0a 40 22 48 40 00 02 e0 00 8d 00
05 00 80 02 00 0a 00 20 48 40 00 02 c0 01 8d 00
05 00 80 00 08 0a c0 23 48 40 00 02 40 01 8d 00
05 00 60 02 08 02 40 21 20 01 8d 06 01 00 00 00
05 00 80 02 08 02 00 22 c0 01 8d 06 01 00 00 00
05 00 60 01 08 02 20 21 00 01 8d 06 03 00 00 00
05 00 60 00 08 43 80 21 20 01 69 42 60 01 69 00
05 10 60 00 08 43 40 23 40 02 69 42 c0 02 69 00

View file

@ -1,6 +0,0 @@
asr(8) g19<1>D g7<8,8,1>D 0x00000001UD { align1 1Q };
asr(16) g20<1>D g2.7<0,1,0>D 0x0000001fUD { align1 1H };
asr.nz.f0.0(8) null<1>D -g0<0,1,0>W 15D { align1 1Q };
asr.nz.f0.0(16) null<1>D -g0<0,1,0>W 15D { align1 1H };
asr(8) g2<1>D -g0<0,1,0>W 15D { align1 1Q };
asr(16) g2<1>D -g0<0,1,0>W 15D { align1 1H };

View file

@ -1,6 +0,0 @@
0c 00 60 00 28 0a 60 22 e0 00 8d 06 01 00 00 00
0c 00 80 00 28 0a 80 22 5c 00 00 06 1f 00 00 00
0c 00 60 02 20 1a 00 20 00 40 00 0e 0f 00 00 00
0c 00 80 02 20 1a 00 20 00 40 00 0e 0f 00 00 00
0c 00 60 00 28 1a 40 20 00 40 00 0e 0f 00 00 00
0c 00 80 00 28 1a 40 20 00 40 00 0e 0f 00 00 00

View file

@ -1,4 +0,0 @@
bfe(8) g96<1>UD g89<4,4,1>UD g30<4,4,1>UD g91<4,4,1>UD { align16 1Q };
bfe(16) g13<1>UD g44<4,4,1>UD g115<4,4,1>UD g126<4,4,1>UD { align16 1H };
bfe(8) g18<1>D g17<4,4,1>D g16<4,4,1>D g49<4,4,1>D { align16 1Q };
bfe(16) g13<1>D g11<4,4,1>D g42<4,4,1>D g5<4,4,1>D { align16 1H };

View file

@ -1,4 +0,0 @@
18 01 60 00 00 90 1e 60 c8 91 05 39 3c 20 c7 16
18 01 80 00 00 90 1e 0d c8 c1 02 39 e6 20 87 1f
18 01 60 00 00 48 1e 12 c8 11 01 39 20 20 47 0c
18 01 80 00 00 48 1e 0d c8 b1 00 39 54 20 47 01

View file

@ -1,2 +0,0 @@
bfi1(8) g20<1>UD g19<8,8,1>D g18<8,8,1>D { align1 1Q };
bfi1(16) g16<1>UD g14<8,8,1>D g12<8,8,1>D { align1 1H };

View file

@ -1,2 +0,0 @@
19 00 60 00 08 0a 80 22 60 02 8d 0a 40 02 8d 00
19 00 80 00 08 0a 00 22 c0 01 8d 0a 80 01 8d 00

View file

@ -1,2 +0,0 @@
bfi2(8) g31<1>UD g88<4,4,1>UD g90<4,4,1>UD g91<4,4,1>UD { align16 1Q };
bfi2(16) g5<1>UD g42<4,4,1>UD g40<4,4,1>UD g126<4,4,1>UD { align16 1H };

View file

@ -1,2 +0,0 @@
1a 01 60 00 00 90 1e 1f c8 81 05 39 b4 20 c7 16
1a 01 80 00 00 90 1e 05 c8 a1 02 39 50 20 87 1f

View file

@ -1,2 +0,0 @@
bfrev(8) g5<1>UD g5<8,8,1>UD { align1 1Q };
bfrev(16) g6<1>UD g8<8,8,1>UD { align1 1H };

View file

@ -1,2 +0,0 @@
17 00 60 00 08 02 a0 20 a0 00 8d 00 00 00 00 00
17 00 80 00 08 02 c0 20 00 01 8d 00 00 00 00 00

View file

@ -1,6 +0,0 @@
break(8) JIP: LABEL0 UIP: LABEL1 { align1 1Q };
break(16) JIP: LABEL0 UIP: LABEL1 { align1 1H };
LABEL0:
(+f0.0) break(8) JIP: LABEL1 UIP: LABEL1 { align1 1Q };
(+f0.0) break(16) JIP: LABEL1 UIP: LABEL1 { align1 1H };
LABEL1:

View file

@ -1,4 +0,0 @@
28 00 60 00 20 0e 00 20 40 00 00 00 20 00 00 00
28 00 80 00 20 0e 00 20 30 00 00 00 10 00 00 00
28 00 61 00 20 0e 00 20 20 00 00 00 20 00 00 00
28 00 81 00 20 0e 00 20 10 00 00 00 10 00 00 00

View file

@ -1,2 +0,0 @@
cbit(8) g9<1>UD g31<8,8,1>UD { align1 1Q };
cbit(16) g6<1>UD g8<8,8,1>UD { align1 1H };

View file

@ -1,2 +0,0 @@
4d 00 60 00 08 02 20 21 e0 03 8d 00 00 00 00 00
4d 00 80 00 08 02 c0 20 00 01 8d 00 00 00 00 00

View file

@ -1,104 +0,0 @@
cmp.z.f0.0(8) null<1>F g20<8,8,1>F 0xbf800000F /* -1F */ { align1 1Q };
cmp.nz.f0.0(8) g59<1>DF g2.1<0,1,0>DF g59<4,4,1>DF { align1 1Q };
cmp.nz.f0.0(8) g49<1>F g47<8,8,1>F g14.1<0,1,0>F { align1 1Q };
cmp.nz.f0.0(8) null<1>D g7<8,8,1>D 0D { align1 1Q };
cmp.z.f0.0(8) g5<1>D g4<8,8,1>D g2.5<0,1,0>D { align1 1Q };
cmp.z.f0.0(16) g7<1>D g5<8,8,1>D g2.5<0,1,0>D { align1 1H };
cmp.l.f0.0(16) g28<1>F g26<8,8,1>F g24<8,8,1>F { align1 1H };
cmp.ge.f0.0(16) g30<1>F g26<8,8,1>F g24<8,8,1>F { align1 1H };
cmp.nz.f0.0(8) g43<1>D g42<8,8,1>D g2.1<0,1,0>D { align1 1Q };
cmp.z.f0.0(8) g86<1>DF (abs)g6.2<0,1,0>DF g68<4,4,1>DF { align1 1Q };
cmp.le.f0.0(8) g108<1>D g106<8,8,1>D 0D { align1 1Q };
cmp.nz.f0.0(8) null<1>DF g6.2<0,1,0>DF g66<4,4,1>DF { align1 1Q };
cmp.l.f0.0(8) g5<1>DF g36<4,4,1>DF g53<4,4,1>DF { align1 1Q };
cmp.ge.f0.0(8) g18<1>DF g36<4,4,1>DF g53<4,4,1>DF { align1 1Q };
cmp.z.f0.0(8) g34<1>DF (abs)g106<4,4,1>DF g52<4,4,1>DF { align1 2Q };
cmp.le.f0.0(16) g35<1>D g21<8,8,1>D 0D { align1 1H };
cmp.nz.f0.0(8) null<1>DF g106<4,4,1>DF g50<4,4,1>DF { align1 2Q };
cmp.nz.f0.0(8) g113<1>DF g3.1<0,1,0>DF g59<4,4,1>DF { align1 2Q };
cmp.l.f0.0(8) null<1>UD g12<8,8,1>UD 0x00000004UD { align1 1Q };
cmp.l.f0.0(8) g53<1>F g52<8,8,1>F g51<8,8,1>F { align1 1Q };
cmp.ge.f0.0(8) g55<1>F g52<8,8,1>F g51<8,8,1>F { align1 1Q };
cmp.ge.f0.0(8) g15<1>D (abs)g12<8,8,1>D 1D { align1 1Q };
cmp.l.f0.0(8) null<1>D g6<0,1,0>D 2D { align1 1Q };
(+f0.1) cmp.z.f0.1(8) null<1>D g8<8,8,1>D 0D { align1 1Q };
cmp.nz.f0.0(16) g11<1>D g9<8,8,1>D 3D { align1 1H };
(+f0.1) cmp.z.f0.1(16) null<1>D g11<8,8,1>D 0D { align1 1H };
cmp.z.f0.0(8) null<1>D g22<8,8,1>D 1D { align1 1Q };
cmp.z.f0.0(16) null<1>D g47<8,8,1>D 1D { align1 1H };
cmp.ge.f0.0(8) g30<1>UD g29<8,8,1>UD g5.7<0,1,0>UD { align1 1Q };
cmp.l.f0.0(8) g31<1>UD g29<8,8,1>UD g5.3<0,1,0>UD { align1 1Q };
cmp.ge.f0.0(16) g50<1>UD g48<8,8,1>UD g7.7<0,1,0>UD { align1 1H };
cmp.l.f0.0(16) g52<1>UD g48<8,8,1>UD g7.3<0,1,0>UD { align1 1H };
cmp.nz.f0.0(16) g9<1>F g2.5<0,1,0>F g1.1<0,1,0>F { align1 1H };
cmp.ge.f0.0(8) null<1>D g38<8,8,1>D 32D { align1 1Q };
cmp.ge.f0.0(8) null<1>DF g21<4,4,1>DF g13<4,4,1>DF { align1 1Q };
cmp.ge.f0.0(16) g3<1>D g1.1<0,1,0>D g1<0,1,0>D { align1 1H };
cmp.l.f0.0(16) g5<1>D g1.1<0,1,0>D g1<0,1,0>D { align1 1H };
cmp.z.f0.0(8) g25<1>F g4.3<0,1,0>F g4.1<0,1,0>F { align1 1Q };
cmp.l.f0.0(8) g33<1>D g5<0,1,0>D 1D { align1 1Q };
cmp.l.f0.0(8) g43<1>DF g39<4,4,1>DF g37<4,4,1>DF { align1 2Q };
cmp.ge.f0.0(8) g46<1>DF g39<4,4,1>DF g37<4,4,1>DF { align1 2Q };
cmp.l.f0.0(16) null<1>D g6<0,1,0>D 1D { align1 1H };
cmp.z.f0.0(16) g62<1>F g12<8,8,1>F g6.3<0,1,0>F { align1 1H };
cmp.nz.f0.0(8) null<1>F g2<0,1,0>F 0x0F /* 0F */ { align1 1Q };
cmp.nz.f0.0(16) null<1>F g2<0,1,0>F 0x0F /* 0F */ { align1 1H };
cmp.ge.f0.0(16) null<1>UD g46<8,8,1>UD 0x00000040UD { align1 1H };
cmp.z.f0.0(16) null<1>F g14<8,8,1>F g6.1<0,1,0>F { align1 1H };
cmp.nz.f0.0(16) null<1>D g6<0,1,0>D 0D { align1 1H };
cmp.l.f0.0(16) null<1>UD g39<8,8,1>UD 0x00000004UD { align1 1H };
cmp.le.f0.0(8) null<1>F g2<8,8,1>F 0x3f000000F /* 0.5F */ { align1 1Q };
cmp.le.f0.0(16) null<1>F g2<8,8,1>F 0x3f000000F /* 0.5F */ { align1 1H };
cmp.le.f0.0(8) g20<1>F g5.3<0,1,0>F 0x0F /* 0F */ { align1 1Q };
cmp.ge.f0.0(8) null<1>F (abs)g26<8,8,1>F 0x5d5e0b6bF /* 1e+18F */ { align1 1Q };
cmp.g.f0.0(8) g80<1>F (abs)g44<8,8,1>F 0x3f800000F /* 1F */ { align1 1Q };
cmp.ge.f0.0(16) null<1>D g67<8,8,1>D 32D { align1 1H };
cmp.g.f0.0(8) null<1>F g124<8,8,1>F 0x0F /* 0F */ { align1 1Q };
cmp.z.f0.0(8) g4<1>F g13<8,4,2>F g2.5<0,1,0>F { align1 2Q };
cmp.g.f0.0(16) null<1>F g120<8,8,1>F 0x0F /* 0F */ { align1 1H };
cmp.g.f0.0(16) g2<1>F (abs)g17<8,8,1>F 0x3f800000F /* 1F */ { align1 1H };
cmp.l.f0.0(8) null<1>DF (abs)g5<0,1,0>DF g20<4,4,1>DF { align1 1Q };
cmp.nz.f0.0(8) g29<1>D g22.1<8,4,2>D g3.2<0,1,0>D { align1 2Q };
cmp.l.f0.0(8) null<1>DF g11<4,4,1>DF g8<4,4,1>DF { align1 2Q };
cmp.nz.f0.0(8) g73<1>F g6.1<0,1,0>F g14<8,4,2>F { align1 2Q };
cmp.g.f0.0(8) g7<1>D g2<0,1,0>D 0D { align1 1Q };
cmp.l.f0.0(8) null<1>F g4.4<0,1,0>F 0x0F /* 0F */ { align1 1Q };
cmp.l.f0.0(16) null<1>F g6.4<0,1,0>F 0x0F /* 0F */ { align1 1H };
cmp.le.f0.0(8) null<1>D g2<8,8,1>D 50D { align1 1Q };
cmp.le.f0.0(16) null<1>D g2<8,8,1>D 50D { align1 1H };
cmp.ge.f0.0(16) null<1>F g35<8,8,1>F 0x3f000000F /* 0.5F */ { align1 1H };
cmp.le.f0.0(8) g4<1>UD g2<0,1,0>UD 0x00000001UD { align1 1Q };
cmp.g.f0.0(8) g5<1>UD g2<0,1,0>UD 0x00000001UD { align1 1Q };
cmp.le.f0.0(16) g5<1>UD g2<0,1,0>UD 0x00000001UD { align1 1H };
cmp.g.f0.0(16) g7<1>UD g2<0,1,0>UD 0x00000001UD { align1 1H };
cmp.le.f0.0(16) g121<1>F g27<8,8,1>F 0x461c3f9aF /* 9999.9F */ { align1 1H };
cmp.z.f0.0(8) g5<1>D g14<8,4,2>D g3.1<0,1,0>D { align1 2Q };
cmp.g.f0.0(8) null<1>D g5.2<0,1,0>D 31D { align1 1Q };
cmp.g.f0.0(8) null<1>UD g4.2<0,1,0>UD 0x0000001fUD { align1 1Q };
(+f0.1) cmp.nz.f0.1(8) null<1>UW g0<8,8,1>UW g0<8,8,1>UW { align1 1Q };
(+f0.1) cmp.nz.f0.1(16) null<1>UW g0<8,8,1>UW g0<8,8,1>UW { align1 1H };
cmp.z.f0.0(16) null<1>D g1<8,8,1>D 1024D { align1 2H };
cmp.l.f0.0(16) null<1>D g118<8,8,1>D 32D { align1 2H };
cmp.nz.f0.0(8) null<1>UD g3<8,8,1>UD 0x00000000UD { align1 1Q };
cmp.nz.f0.0(16) null<1>UD g3<8,8,1>UD 0x00000000UD { align1 1H };
cmp.g.f0.0(16) null<1>D g2.1<0,1,0>D 0D { align1 1H };
cmp.nz.f0.0(8) null<1>Q g6<4,4,1>Q g3<4,4,1>Q { align1 1Q };
cmp.z.f0.0(8) g8<1>Q g5<4,4,1>Q g3<4,4,1>Q { align1 1Q };
cmp.nz.f0.0(8) g2<1>Q g5<4,4,1>Q g3<4,4,1>Q { align1 1Q };
cmp.nz.f0.0(8) null<1>Q g9<4,4,1>Q g4<4,4,1>Q { align1 2Q };
cmp.z.f0.0(8) g17<1>Q g11<4,4,1>Q g4<4,4,1>Q { align1 2Q };
cmp.nz.f0.0(8) g20<1>Q g11<4,4,1>Q g4<4,4,1>Q { align1 2Q };
cmp.z.f0.0(8) null<1>UD g5<8,8,1>UD 0x00000000UD { align1 1Q };
cmp.z.f0.0(16) null<1>UD g15<8,8,1>UD 0x00000000UD { align1 1H };
cmp.g.f0.0(16) g1<1>D g8<8,8,1>D 0D { align1 1H };
cmp.ge.f0.0(8) null<1>UD g10<8,8,1>UD g8<8,8,1>UD { align1 1Q };
cmp.ge.f0.0(8) null<1>DF g37<4,4,1>DF g26<4,4,1>DF { align1 2Q };
cmp.l.f0.0(8) null<1>Q g20<4,4,1>Q g25<4,4,1>Q { align1 1Q };
cmp.l.f0.0(8) null<1>Q g2<4,4,1>Q g12<4,4,1>Q { align1 2Q };
cmp.ge.f0.0(8) null<1>Q g20<4,4,1>Q g27<4,4,1>Q { align1 1Q };
cmp.ge.f0.0(8) null<1>Q g2<4,4,1>Q g8<4,4,1>Q { align1 2Q };
cmp.le.f0.0(8) null<1>UD g18<8,8,1>UD 0x000000ffUD { align1 1Q };
cmp.le.f0.0(16) null<1>UD g32<8,8,1>UD 0x000000ffUD { align1 1H };
cmp.z.f0.0(8) null<1>Q g12<4,4,1>Q g7<4,4,1>Q { align1 1Q };
cmp.z.f0.0(8) null<1>Q g26<4,4,1>Q g12<4,4,1>Q { align1 2Q };
cmp.g.f0.0(16) null<1>UD g4.2<0,1,0>UD 0x0000001fUD { align1 1H };

View file

@ -1,104 +0,0 @@
10 00 60 01 e0 3a 00 20 80 02 8d 3e 00 00 80 bf
10 00 60 02 c8 32 60 27 48 00 00 32 60 07 69 00
10 00 60 02 e8 3a 20 26 e0 05 8d 3a c4 01 00 00
10 00 60 02 20 0a 00 20 e0 00 8d 0e 00 00 00 00
10 00 60 01 28 0a a0 20 80 00 8d 0a 54 00 00 00
10 00 80 01 28 0a e0 20 a0 00 8d 0a 54 00 00 00
10 00 80 05 e8 3a 80 23 40 03 8d 3a 00 03 8d 00
10 00 80 04 e8 3a c0 23 40 03 8d 3a 00 03 8d 00
10 00 60 02 28 0a 60 25 40 05 8d 0a 44 00 00 00
10 00 60 01 c8 32 c0 2a d0 20 00 32 80 08 69 00
10 00 60 06 28 0a 80 2d 40 0d 8d 0e 00 00 00 00
10 00 60 02 c0 32 00 20 d0 00 00 32 40 08 69 00
10 00 60 05 c8 32 a0 20 80 04 69 32 a0 06 69 00
10 00 60 04 c8 32 40 22 80 04 69 32 a0 06 69 00
10 10 60 01 c8 32 40 24 40 2d 69 32 80 06 69 00
10 00 80 06 28 0a 60 24 a0 02 8d 0e 00 00 00 00
10 10 60 02 c0 32 00 20 40 0d 69 32 40 06 69 00
10 10 60 02 c8 32 20 2e 68 00 00 32 60 07 69 00
10 00 60 05 00 02 00 20 80 01 8d 06 04 00 00 00
10 00 60 05 e8 3a a0 26 80 06 8d 3a 60 06 8d 00
10 00 60 04 e8 3a e0 26 80 06 8d 3a 60 06 8d 00
10 00 60 04 28 0a e0 21 80 21 8d 0e 01 00 00 00
10 00 60 05 20 0a 00 20 c0 00 00 0e 02 00 00 00
10 00 61 01 21 0a 00 20 00 01 8d 0e 00 00 00 00
10 00 80 02 28 0a 60 21 20 01 8d 0e 03 00 00 00
10 00 81 01 21 0a 00 20 60 01 8d 0e 00 00 00 00
10 00 60 01 20 0a 00 20 c0 02 8d 0e 01 00 00 00
10 00 80 01 20 0a 00 20 e0 05 8d 0e 01 00 00 00
10 00 60 04 08 02 c0 23 a0 03 8d 02 bc 00 00 00
10 00 60 05 08 02 e0 23 a0 03 8d 02 ac 00 00 00
10 00 80 04 08 02 40 26 00 06 8d 02 fc 00 00 00
10 00 80 05 08 02 80 26 00 06 8d 02 ec 00 00 00
10 00 80 02 e8 3a 20 21 54 00 00 3a 24 00 00 00
10 00 60 04 20 0a 00 20 c0 04 8d 0e 20 00 00 00
10 00 60 04 c0 32 00 20 a0 02 69 32 a0 01 69 00
10 00 80 04 28 0a 60 20 24 00 00 0a 20 00 00 00
10 00 80 05 28 0a a0 20 24 00 00 0a 20 00 00 00
10 00 60 01 e8 3a 20 23 8c 00 00 3a 84 00 00 00
10 00 60 05 28 0a 20 24 a0 00 00 0e 01 00 00 00
10 10 60 05 c8 32 60 25 e0 04 69 32 a0 04 69 00
10 10 60 04 c8 32 c0 25 e0 04 69 32 a0 04 69 00
10 00 80 05 20 0a 00 20 c0 00 00 0e 01 00 00 00
10 00 80 01 e8 3a c0 27 80 01 8d 3a cc 00 00 00
10 00 60 02 e0 3a 00 20 40 00 00 3e 00 00 00 00
10 00 80 02 e0 3a 00 20 40 00 00 3e 00 00 00 00
10 00 80 04 00 02 00 20 c0 05 8d 06 40 00 00 00
10 00 80 01 e0 3a 00 20 c0 01 8d 3a c4 00 00 00
10 00 80 02 20 0a 00 20 c0 00 00 0e 00 00 00 00
10 00 80 05 00 02 00 20 e0 04 8d 06 04 00 00 00
10 00 60 06 e0 3a 00 20 40 00 8d 3e 00 00 00 3f
10 00 80 06 e0 3a 00 20 40 00 8d 3e 00 00 00 3f
10 00 60 06 e8 3a 80 22 ac 00 00 3e 00 00 00 00
10 00 60 04 e0 3a 00 20 40 23 8d 3e 6b 0b 5e 5d
10 00 60 03 e8 3a 00 2a 80 25 8d 3e 00 00 80 3f
10 00 80 04 20 0a 00 20 60 08 8d 0e 20 00 00 00
10 00 60 03 e0 3a 00 20 80 0f 8d 3e 00 00 00 00
10 10 60 01 e8 3a 80 20 a0 01 8a 3a 54 00 00 00
10 00 80 03 e0 3a 00 20 00 0f 8d 3e 00 00 00 00
10 00 80 03 e8 3a 40 20 20 22 8d 3e 00 00 80 3f
10 00 60 05 c0 32 00 20 a0 20 00 32 80 02 69 00
10 10 60 02 28 0a a0 23 c4 02 8a 0a 68 00 00 00
10 10 60 05 c0 32 00 20 60 01 69 32 00 01 69 00
10 10 60 02 e8 3a 20 29 c4 00 00 3a c0 01 8a 00
10 00 60 03 28 0a e0 20 40 00 00 0e 00 00 00 00
10 00 60 05 e0 3a 00 20 90 00 00 3e 00 00 00 00
10 00 80 05 e0 3a 00 20 d0 00 00 3e 00 00 00 00
10 00 60 06 20 0a 00 20 40 00 8d 0e 32 00 00 00
10 00 80 06 20 0a 00 20 40 00 8d 0e 32 00 00 00
10 00 80 04 e0 3a 00 20 60 04 8d 3e 00 00 00 3f
10 00 60 06 08 02 80 20 40 00 00 06 01 00 00 00
10 00 60 03 08 02 a0 20 40 00 00 06 01 00 00 00
10 00 80 06 08 02 a0 20 40 00 00 06 01 00 00 00
10 00 80 03 08 02 e0 20 40 00 00 06 01 00 00 00
10 00 80 06 e8 3a 20 2f 60 03 8d 3e 9a 3f 1c 46
10 10 60 01 28 0a a0 20 c0 01 8a 0a 64 00 00 00
10 00 60 03 20 0a 00 20 a8 00 00 0e 1f 00 00 00
10 00 60 03 00 02 00 20 88 00 00 06 1f 00 00 00
10 00 61 02 41 12 00 20 00 00 8d 12 00 00 8d 00
10 00 81 02 41 12 00 20 00 00 8d 12 00 00 8d 00
10 20 80 01 20 0a 00 20 20 00 8d 0e 00 04 00 00
10 20 80 05 20 0a 00 20 c0 0e 8d 0e 20 00 00 00
10 00 60 02 00 02 00 20 60 00 8d 06 00 00 00 00
10 00 80 02 00 02 00 20 60 00 8d 06 00 00 00 00
10 00 80 03 20 0a 00 20 44 00 00 0e 00 00 00 00
10 00 60 02 20 4b 00 20 c0 00 69 4a 60 00 69 00
10 00 60 01 28 4b 00 21 a0 00 69 4a 60 00 69 00
10 00 60 02 28 4b 40 20 a0 00 69 4a 60 00 69 00
10 10 60 02 20 4b 00 20 20 01 69 4a 80 00 69 00
10 10 60 01 28 4b 20 22 60 01 69 4a 80 00 69 00
10 10 60 02 28 4b 80 22 60 01 69 4a 80 00 69 00
10 00 60 01 00 02 00 20 a0 00 8d 06 00 00 00 00
10 00 80 01 00 02 00 20 e0 01 8d 06 00 00 00 00
10 00 80 03 28 0a 20 20 00 01 8d 0e 00 00 00 00
10 00 60 04 00 02 00 20 40 01 8d 02 00 01 8d 00
10 10 60 04 c0 32 00 20 a0 04 69 32 40 03 69 00
10 00 60 05 20 4b 00 20 80 02 69 4a 20 03 69 00
10 10 60 05 20 4b 00 20 40 00 69 4a 80 01 69 00
10 00 60 04 20 4b 00 20 80 02 69 4a 60 03 69 00
10 10 60 04 20 4b 00 20 40 00 69 4a 00 01 69 00
10 00 60 06 00 02 00 20 40 02 8d 06 ff 00 00 00
10 00 80 06 00 02 00 20 00 04 8d 06 ff 00 00 00
10 00 60 01 20 4b 00 20 80 01 69 4a e0 00 69 00
10 10 60 01 20 4b 00 20 40 03 69 4a 80 01 69 00
10 00 80 03 00 02 00 20 88 00 00 06 1f 00 00 00

View file

@ -1,4 +0,0 @@
cont(8) JIP: LABEL0 UIP: LABEL1 { align1 1Q };
LABEL0:
cont(16) JIP: LABEL1 UIP: LABEL1 { align1 1H };
LABEL1:

View file

@ -1,2 +0,0 @@
29 00 60 00 00 0e 00 34 20 00 00 00 10 00 00 00
29 00 80 00 00 0e 00 34 10 00 00 00 10 00 00 00

View file

@ -1,14 +0,0 @@
and(1) cr0<1>UD cr0<0,1,0>UD 0xfffffb3fUD { align1 1N switch };
and(1) cr0<1>UD cr0<0,1,0>UD 0xffffff3fUD { align1 1N switch };
and(1) cr0<1>UD cr0<0,1,0>UD 0xfffffb7fUD { align1 1N switch };
and(1) cr0<1>UD cr0<0,1,0>UD 0xffffff7fUD { align1 1N switch };
and(1) cr0<1>UD cr0<0,1,0>UD 0xfffffbbfUD { align1 1N switch };
and(1) cr0<1>UD cr0<0,1,0>UD 0xffffffbfUD { align1 1N switch };
and(1) cr0<1>UD cr0<0,1,0>UD 0xffffffcfUD { align1 1N switch };
and(1) cr0<1>UD cr0<0,1,0>UD 0xfffffbffUD { align1 1N switch };
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000400UD { align1 1N switch };
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000030UD { align1 1N switch };
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000040UD { align1 1N switch };
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000440UD { align1 1N switch };
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000080UD { align1 1N switch };
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000480UD { align1 1N switch };

View file

@ -1,14 +0,0 @@
05 80 00 00 00 00 00 30 00 10 00 06 3f fb ff ff
05 80 00 00 00 00 00 30 00 10 00 06 3f ff ff ff
05 80 00 00 00 00 00 30 00 10 00 06 7f fb ff ff
05 80 00 00 00 00 00 30 00 10 00 06 7f ff ff ff
05 80 00 00 00 00 00 30 00 10 00 06 bf fb ff ff
05 80 00 00 00 00 00 30 00 10 00 06 bf ff ff ff
05 80 00 00 00 00 00 30 00 10 00 06 cf ff ff ff
05 80 00 00 00 00 00 30 00 10 00 06 ff fb ff ff
06 80 00 00 00 00 00 30 00 10 00 06 00 04 00 00
06 80 00 00 00 00 00 30 00 10 00 06 30 00 00 00
06 80 00 00 00 00 00 30 00 10 00 06 40 00 00 00
06 80 00 00 00 00 00 30 00 10 00 06 40 04 00 00
06 80 00 00 00 00 00 30 00 10 00 06 80 00 00 00
06 80 00 00 00 00 00 30 00 10 00 06 80 04 00 00

View file

@ -1,13 +0,0 @@
csel.nz(8) g15<1>F g11<4,4,1>F (abs)g11<4,4,1>F g11<4,4,1>F { align16 1Q };
csel.nz(16) g14<1>F g8<4,4,1>F (abs)g8<4,4,1>F g8<4,4,1>F { align16 1H };
csel.le(8) g21<1>F (abs)g5.3<0,1,0>F g5.0<0,1,0>F g5.3<0,1,0>F { align16 1Q };
csel.l(8) g107<1>F -g101<4,4,1>F g101<4,4,1>F g104<4,4,1>F { align16 1Q };
csel.le(8) g21<1>F g5.0<0,1,0>F (abs)g5.1<0,1,0>F g5.1<0,1,0>F { align16 1Q };
csel.l(8) g127<1>F g2<4,4,1>F g8<4,4,1>F g4.0<0,1,0>F { align16 1Q };
csel.l(16) g126<1>F g2<4,4,1>F g13<4,4,1>F g6.0<0,1,0>F { align16 1H };
csel.le(16) g13<1>F (abs)g73<4,4,1>F g58<4,4,1>F g73<4,4,1>F { align16 1H };
csel.le(16) g15<1>F g58<4,4,1>F (abs)g73<4,4,1>F g73<4,4,1>F { align16 1H };
csel.l(16) g69<1>F -g65<4,4,1>F g65<4,4,1>F g67<4,4,1>F { align16 1H };
csel.sat.g(8) g125<1>F g2.3<0,1,0>F g2.2<0,1,0>F g2.0<0,1,0>F { align16 1Q };
csel.g(8) g125<1>F g2.3<0,1,0>F g2.2<0,1,0>F g2.0<0,1,0>F { align16 1Q };
csel.g(16) g122<1>F g2.3<0,1,0>F g2.2<0,1,0>F g2.0<0,1,0>F { align16 1H };

View file

@ -1,13 +0,0 @@
12 01 60 02 80 00 1e 0f c8 b1 00 39 16 20 c7 02
12 01 80 02 80 00 1e 0e c8 81 00 39 10 20 07 02
12 01 60 06 20 00 1e 15 01 56 20 00 0a 04 58 01
12 01 60 05 40 00 1e 6b c8 51 06 39 ca 20 07 1a
12 01 60 06 80 00 1e 15 01 50 20 40 0a 04 48 01
12 01 60 05 00 00 1e 7f c8 21 00 39 10 04 00 01
12 01 80 05 00 00 1e 7e c8 21 00 39 1a 04 80 01
12 01 80 06 20 00 1e 0d c8 91 04 39 74 20 47 12
12 01 80 06 80 00 1e 0f c8 a1 03 39 92 20 47 12
12 01 80 05 40 00 1e 45 c8 11 04 39 82 20 c7 10
12 01 60 83 00 00 1e 7d 01 26 20 80 04 04 80 00
12 01 60 03 00 00 1e 7d 01 26 20 80 04 04 80 00
12 01 80 03 00 00 1e 7a 01 26 20 80 04 04 80 00

View file

@ -1,4 +0,0 @@
else(8) JIP: LABEL0 UIP: LABEL0 { align1 1Q };
else(16) JIP: LABEL0 UIP: LABEL0 { align1 1H };
else(32) JIP: LABEL0 UIP: LABEL0 { align1 };
LABEL0:

View file

@ -1,3 +0,0 @@
24 00 60 00 20 0e 00 20 30 00 00 00 30 00 00 00
24 00 80 00 20 0e 00 20 20 00 00 00 20 00 00 00
24 00 a0 00 20 0e 00 20 10 00 00 00 10 00 00 00

View file

@ -1,4 +0,0 @@
endif(8) JIP: LABEL0 { align1 1Q };
endif(16) JIP: LABEL0 { align1 1H };
endif(32) JIP: LABEL0 { align1 };
LABEL0:

View file

@ -1,3 +0,0 @@
25 00 60 00 00 0e 00 00 00 00 00 08 30 00 00 00
25 00 80 00 00 0e 00 00 00 00 00 08 20 00 00 00
25 00 a0 00 00 0e 00 00 00 00 00 08 10 00 00 00

View file

@ -1,2 +0,0 @@
fbh(8) g15<1>D g35<8,8,1>D { align1 1Q };
fbh(16) g8<1>D g4<8,8,1>D { align1 1H };

View file

@ -1,2 +0,0 @@
4b 00 60 00 28 0a e0 21 60 04 8d 00 00 00 00 00
4b 00 80 00 28 0a 00 21 80 00 8d 00 00 00 00 00

View file

@ -1,3 +0,0 @@
fbl(8) g5<1>UD g5<8,8,1>UD { align1 1Q };
fbl(16) g6<1>UD g8<8,8,1>UD { align1 1H };
fbl(1) g43<1>UD mask0<0,1,0>UD { align1 WE_all 1N };

View file

@ -1,3 +0,0 @@
4c 00 60 00 08 02 a0 20 a0 00 8d 00 00 00 00 00
4c 00 80 00 08 02 c0 20 00 01 8d 00 00 00 00 00
4c 00 00 00 0c 00 60 25 00 08 00 00 00 00 00 00

View file

@ -1,2 +0,0 @@
frc(8) g28<1>F g4<8,8,1>F { align1 1Q };
frc(16) g3<1>F g1<0,1,0>F { align1 1H };

View file

@ -1,2 +0,0 @@
43 00 60 00 e8 3a 80 23 80 00 8d 00 00 00 00 00
43 00 80 00 e8 3a 60 20 20 00 00 00 00 00 00 00

View file

@ -1,6 +0,0 @@
(-f0.1.any4h) halt(8) JIP: LABEL0 UIP: LABEL0 { align1 1Q };
halt(8) JIP: LABEL1 UIP: LABEL1 { align1 1Q };
LABEL1:
(-f0.1.any4h) halt(16) JIP: LABEL0 UIP: LABEL0 { align1 1H };
halt(16) JIP: LABEL0 UIP: LABEL0 { align1 1H };
LABEL0:

View file

@ -1,4 +0,0 @@
2a 00 76 00 21 0e 00 20 40 00 00 00 40 00 00 00
2a 00 60 00 20 0e 00 20 10 00 00 00 10 00 00 00
2a 00 96 00 21 0e 00 20 20 00 00 00 20 00 00 00
2a 00 80 00 20 0e 00 20 10 00 00 00 10 00 00 00

View file

@ -1,7 +0,0 @@
(+f0.0) if(8) JIP: LABEL0 UIP: LABEL1 { align1 1Q };
(-f0.0) if(8) JIP: LABEL0 UIP: LABEL1 { align1 1Q };
LABEL0:
(-f0.0) if(16) JIP: LABEL1 UIP: LABEL1 { align1 1H };
(+f0.0) if(16) JIP: LABEL1 UIP: LABEL1 { align1 1H };
(+f0.0) if(32) JIP: LABEL1 UIP: LABEL1 { align1 };
LABEL1:

View file

@ -1,5 +0,0 @@
22 00 61 00 20 0e 00 20 50 00 00 00 20 00 00 00
22 00 71 00 20 0e 00 20 40 00 00 00 10 00 00 00
22 00 91 00 20 0e 00 20 30 00 00 00 30 00 00 00
22 00 81 00 20 0e 00 20 20 00 00 00 20 00 00 00
22 00 a1 00 20 0e 00 20 10 00 00 00 10 00 00 00

View file

@ -1,5 +0,0 @@
lrp(8) g4<1>F g16<4,4,1>F g7.2<0,1,0>F g6.6<0,1,0>F { align16 1Q };
lrp(16) g4<1>F g2.4<0,1,0>F g2.2<0,1,0>F g2.0<0,1,0>F { align16 1H };
lrp.z.f0.0(8) g8<1>F g3.2<0,1,0>F g3.1<0,1,0>F g3.0<0,1,0>F { align16 1Q };
lrp.sat(8) g7<1>F g10<4,4,1>F g13<4,4,1>F g16<4,4,1>F { align16 1Q };
lrp.sat(16) g18<1>F g20<4,4,1>F g26<4,4,1>F g32<4,4,1>F { align16 1H };

View file

@ -1,5 +0,0 @@
5c 01 60 00 00 00 1e 04 c8 01 21 80 0e 04 b0 01
5c 01 80 00 00 00 1e 04 01 28 20 80 04 04 80 00
5c 01 60 01 00 00 1e 08 01 34 20 40 06 04 c0 00
5c 01 60 80 00 00 1e 07 c8 a1 00 39 1a 20 07 04
5c 01 80 80 00 00 1e 12 c8 41 01 39 34 20 07 08

View file

@ -1,2 +0,0 @@
lzd(8) g25<1>UD g3.1<0,1,0>UD { align1 1Q };
lzd(16) g27<1>UD g3.1<0,1,0>UD { align1 1H };

View file

@ -1,2 +0,0 @@
4a 00 60 00 08 02 20 23 64 00 00 00 00 00 00 00
4a 00 80 00 08 02 60 23 64 00 00 00 00 00 00 00

View file

@ -1,4 +0,0 @@
mach(8) g19<1>UD g17<8,8,1>UD 0xaaaaaaabUD { align1 1Q AccWrEnable };
mach(8) g23<1>D g17<8,8,1>D 1431655766D { align1 1Q AccWrEnable };
mach(8) g42<1>UD g39<8,8,1>UD 0xaaaaaaabUD { align1 2Q AccWrEnable };
mach(8) g50<1>D g39<8,8,1>D 1431655766D { align1 2Q AccWrEnable };

View file

@ -1,4 +0,0 @@
49 00 60 10 08 02 60 22 20 02 8d 06 ab aa aa aa
49 00 60 10 28 0a e0 22 20 02 8d 0e 56 55 55 55
49 10 60 10 08 02 40 25 e0 04 8d 06 ab aa aa aa
49 10 60 10 28 0a 40 26 e0 04 8d 0e 56 55 55 55

View file

@ -1,43 +0,0 @@
mad(8) g26<1>F g22<4,4,1>F g2.4<0,1,0>F g5<4,4,1>F { align16 1Q };
mad(16) g14<1>F g12<4,4,1>F g4<4,4,1>F g4<4,4,1>F { align16 1H };
mad(8) g64<1>DF g62<4,4,1>DF g40<4,4,1>DF g92<4,4,1>DF { align16 1Q };
mad(8) g80<1>DF -g50<4,4,1>DF g24<4,4,1>DF g80<4,4,1>DF { align16 1Q };
mad(8) g27<1>DF g48<4,4,1>DF g106<4,4,1>DF g25<4,4,1>DF { align16 2Q };
mad(8) g13<1>F -g14.0<0,1,0>F g11<4,4,1>F g6<4,4,1>F { align16 1Q };
mad(16) g29<1>F -g33.0<0,1,0>F g25<4,4,1>F g15<4,4,1>F { align16 1H };
mad(8) g29<1>DF g23<4,4,1>DF g27<4,4,1>DF -g25<4,4,1>DF { align16 1Q };
mad.le.f0.0(8) g5<1>F g3<4,4,1>F g4.2<0,1,0>F g64<4,4,1>F { align16 1Q };
mad.le.f0.0(16) g7<1>F g4<4,4,1>F g6.2<0,1,0>F g16<4,4,1>F { align16 1H };
mad(8) g32<1>F g31<4,4,1>F g2.3<0,1,0>F -g15<4,4,1>F { align16 1Q };
mad(16) g56<1>F g54<4,4,1>F g2.3<0,1,0>F -g5<4,4,1>F { align16 1H };
mad.sat(8) g12<1>F g4.1<0,1,0>F g4.0<0,1,0>F g8<4,4,1>F { align16 1Q };
mad.sat(16) g18<1>F g6.1<0,1,0>F g6.0<0,1,0>F g10<4,4,1>F { align16 1H };
mad(8) g86<1>F g88.6<0,1,0>F -g88.7<0,1,0>F g77<4,4,1>F { align16 1Q };
mad(8) g85<1>DF g28<4,4,1>DF g83<4,4,1>DF -g81<4,4,1>DF { align16 2Q };
mad(8) g11<1>F -g2.0<0,1,0>F g10<4,4,1>F (abs)g5.6<0,1,0>F { align16 1Q };
mad(8) g15<1>F g2.1<0,1,0>F g11<4,4,1>F (abs)g5.6<0,1,0>F { align16 1Q };
mad.l.f0.0(8) g2<1>F g22<4,4,1>F g5.7<0,1,0>F g6.3<0,1,0>F { align16 1Q };
mad(8) g79<1>DF -g39<4,4,1>DF g21<4,4,1>DF g79<4,4,1>DF { align16 2Q };
mad(8) g117<1>F -g116<4,4,1>F g9.0<0,1,0>F -g113<4,4,1>F { align16 1Q };
mad.ge.f0.0(8) g13<1>F g28.0<0,1,0>F g9<4,4,1>F -g2.4<0,1,0>F { align16 1Q };
mad.ge.f0.0(16) g23<1>F g17.0<0,1,0>F g6<4,4,1>F -g3.0<0,1,0>F { align16 1H };
mad(8) g26<1>F g2.0<0,1,0>F -g2.1<0,1,0>F (abs)g5.6<0,1,0>F { align16 1Q };
mad(8) g70<1>F -g13<4,4,1>F -g2.1<0,1,0>F -g47<4,4,1>F { align16 1Q };
mad(16) g95<1>F -g93<4,4,1>F g85<4,4,1>F -g85<4,4,1>F { align16 1H };
mad(16) g5<1>F -g21<4,4,1>F -g2.1<0,1,0>F -g85<4,4,1>F { align16 1H };
mad(16) g56<1>F g6.4<0,1,0>F -g6.5<0,1,0>F g51<4,4,1>F { align16 1H };
mad.sat(8) g124<1>F -g7<4,4,1>F g2.6<0,1,0>F g2.1<0,1,0>F { align16 1Q };
mad(16) g71<1>F g55.0<0,1,0>F -g55.1<0,1,0>F (abs)g1.0<0,1,0>F { align16 1H };
mad(16) g77<1>F -g55.2<0,1,0>F g71<4,4,1>F (abs)g1.0<0,1,0>F { align16 1H };
mad(16) g37<1>F g55.3<0,1,0>F g77<4,4,1>F (abs)g1.0<0,1,0>F { align16 1H };
mad(8) g43<1>DF g42<4,4,1>DF -g34<4,4,1>DF g7<4,4,1>DF { align16 1Q };
mad(8) g3<1>DF g2<4,4,1>DF -g111<4,4,1>DF g39<4,4,1>DF { align16 2Q };
mad(8) g12<1>F -g17<4,4,1>F (abs)g7<4,4,1>F g4.0<0,1,0>F { align16 1Q };
mad(16) g27<1>F -g22<4,4,1>F (abs)g19<4,4,1>F g29.0<0,1,0>F { align16 1H };
mad.sat(8) g125<1>F g9<4,4,1>F g6<4,4,1>F -g64.0<0,1,0>F { align16 1Q };
mad.l.f0.0(16) g5<1>F g9<4,4,1>F g2.7<0,1,0>F g3.3<0,1,0>F { align16 1H };
mad(8) g6<1>DF -g55<4,4,1>DF g2<4,4,1>DF -g47<4,4,1>DF { align16 1Q };
mad.z.f0.0(8) g8<1>F g3.2<0,1,0>F g3.1<0,1,0>F g3.0<0,1,0>F { align16 1Q };
mad(8) g63<1>DF -g48<4,4,1>DF g56<4,4,1>DF -g44<4,4,1>DF { align16 2Q };
mad.nz.f0.0(8) g10<1>F -g12.0<0,1,0>F g7<4,4,1>F g10<4,4,1>F { align16 1Q };
mad.nz.f0.0(16) g15<1>F -g33.0<0,1,0>F g9<4,4,1>F g17<4,4,1>F { align16 1H };

View file

@ -1,43 +0,0 @@
5b 01 60 00 00 00 1e 1a c8 61 21 00 05 20 47 01
5b 01 80 00 00 00 1e 0e c8 c1 00 39 08 20 07 01
5b 01 60 00 00 d8 1e 40 c8 e1 03 39 50 20 07 17
5b 01 60 00 40 d8 1e 50 c8 21 03 39 30 20 07 14
5b 11 60 00 00 d8 1e 1b c8 01 03 39 d4 20 47 06
5b 01 60 00 40 00 1e 0d 01 e0 00 39 16 20 87 01
5b 01 80 00 40 00 1e 1d 01 10 02 39 32 20 c7 03
5b 01 60 00 00 dc 1e 1d c8 71 01 39 36 20 47 06
5b 01 60 06 00 00 1e 05 c8 31 20 80 08 20 07 10
5b 01 80 06 00 00 1e 07 c8 41 20 80 0c 20 07 04
5b 01 60 00 00 04 1e 20 c8 f1 21 c0 04 20 c7 03
5b 01 80 00 00 04 1e 38 c8 61 23 c0 04 20 47 01
5b 01 60 80 00 00 1e 0c 01 42 20 00 08 20 07 02
5b 01 80 80 00 00 1e 12 01 62 20 00 0c 20 87 02
5b 01 60 00 00 01 1e 56 01 8c 25 c0 b1 20 47 13
5b 11 60 00 00 dc 1e 55 c8 c1 01 39 a6 20 47 14
5b 01 60 00 40 02 1e 0b 01 20 00 39 14 04 70 01
5b 01 60 00 00 02 1e 0f 01 22 00 39 16 04 70 01
5b 01 60 05 00 00 1e 02 c8 61 21 c0 0b 04 98 01
5b 11 60 00 40 d8 1e 4f c8 71 02 39 2a 20 c7 13
5b 01 60 00 40 04 1e 75 c8 41 27 00 12 20 47 1c
5b 01 60 04 00 04 1e 0d 01 c0 01 39 12 04 a0 00
5b 01 80 04 00 04 1e 17 01 10 01 39 0c 04 c0 00
5b 01 60 00 00 03 1e 1a 01 20 20 40 04 04 70 01
5b 01 60 00 40 05 1e 46 c8 d1 20 40 04 20 c7 0b
5b 01 80 00 40 04 1e 5f c8 d1 05 39 aa 20 47 15
5b 01 80 00 40 05 1e 05 c8 51 21 40 04 20 47 15
5b 01 80 00 00 01 1e 38 01 68 20 40 0d 20 c7 0c
5b 01 60 80 40 00 1e 7c c8 71 20 80 05 04 88 00
5b 01 80 00 00 03 1e 47 01 70 23 40 6e 04 40 00
5b 01 80 00 40 02 1e 4d 01 74 03 39 8e 04 40 00
5b 01 80 00 00 02 1e 25 01 76 03 39 9a 04 40 00
5b 01 60 00 00 d9 1e 2b c8 a1 02 39 44 20 c7 01
5b 11 60 00 00 d9 1e 03 c8 21 00 39 de 20 c7 09
5b 01 60 00 c0 00 1e 0c c8 11 01 39 0e 04 00 01
5b 01 80 00 c0 00 1e 1b c8 61 01 39 26 04 40 07
5b 01 60 80 00 04 1e 7d c8 91 00 39 0c 04 00 10
5b 01 80 05 00 00 1e 05 c8 91 20 c0 05 04 d8 00
5b 01 60 00 40 dc 1e 06 c8 71 03 39 04 20 c7 0b
5b 01 60 01 00 00 1e 08 01 34 20 40 06 04 c0 00
5b 11 60 00 40 dc 1e 3f c8 01 03 39 70 20 07 0b
5b 01 60 02 40 00 1e 0a 01 c0 00 39 0e 20 87 02
5b 01 80 02 40 00 1e 0f 01 10 02 39 12 20 47 04

View file

@ -1,31 +0,0 @@
math sqrt(16) g20<1>F g18<8,8,1>F null<8,8,1>F { align1 1H };
math inv(8) g95<1>F g94<8,8,1>F null<8,8,1>F { align1 1Q };
math inv(16) g10<1>F g8<8,8,1>F null<8,8,1>F { align1 1H };
math intmod(8) g3<1>UD g1<0,1,0>UD g1.2<0,1,0>UD { align1 1Q };
math intmod(8) g4<1>UD g1<0,1,0>UD g1.2<0,1,0>UD { align1 2Q };
math sqrt(8) g24<1>F g23<8,8,1>F null<8,8,1>F { align1 1Q };
math rsq(8) g5<1>F g2<8,8,1>F null<8,8,1>F { align1 1Q };
math pow(8) g11<1>F g10<8,8,1>F 0x42fc6666F /* 126.2F */ { align1 1Q };
math pow(16) g18<1>F g16<8,8,1>F 0x42fc6666F /* 126.2F */ { align1 1H };
math log(8) g7<1>F g6<8,8,1>F null<8,8,1>F { align1 1Q };
math log(16) g11<1>F g9<8,8,1>F null<8,8,1>F { align1 1H };
math cos(8) g3<1>F g2<8,8,1>F null<8,8,1>F { align1 1Q };
math cos(16) g4<1>F g2<8,8,1>F null<8,8,1>F { align1 1H };
math intdiv(8) g4<1>UD g1<0,1,0>UD g1.4<0,1,0>UD { align1 1Q };
math intdiv(8) g5<1>UD g1<0,1,0>UD g1.4<0,1,0>UD { align1 2Q };
math intdiv(8) g24<1>D g4<0,1,0>D g2.2<0,1,0>D { align1 1Q };
math sin(8) g10<1>F g9<8,8,1>F null<8,8,1>F { align1 1Q };
math rsq(16) g68<1>F g66<8,8,1>F null<8,8,1>F { align1 1H };
math exp(8) g124<1>F g10<8,8,1>F null<8,8,1>F { align1 1Q };
math exp(16) g120<1>F g7<8,8,1>F null<8,8,1>F { align1 1H };
math intdiv(8) g5<1>D g2<0,1,0>D g2.4<0,1,0>D { align1 2Q };
math sin(16) g3<1>F g2<0,1,0>F null<8,8,1>F { align1 1H };
math.sat pow(8) g3<1>F g2<0,1,0>F g2.4<0,1,0>F { align1 1Q };
math.sat pow(16) g3<1>F g2<0,1,0>F g2.4<0,1,0>F { align1 1H };
math.sat sqrt(8) g3<1>F g2<0,1,0>F null<8,8,1>F { align1 1Q };
math.sat sqrt(16) g3<1>F g2<0,1,0>F null<8,8,1>F { align1 1H };
math.sat exp(8) g3<1>F g2<0,1,0>F null<8,8,1>F { align1 1Q };
math.sat exp(16) g3<1>F g2<0,1,0>F null<8,8,1>F { align1 1H };
math.sat rsq(8) g127<1>F (abs)g7<8,8,1>F null<8,8,1>F { align1 1Q };
math.sat inv(8) g124<1>F g2<0,1,0>F null<8,8,1>F { align1 1Q };
math.sat log(8) g127<1>F g7<8,8,1>F null<8,8,1>F { align1 1Q };

View file

@ -1,31 +0,0 @@
38 00 80 04 e8 3a 80 22 40 02 8d 38 00 00 8d 00
38 00 60 01 e8 3a e0 2b c0 0b 8d 38 00 00 8d 00
38 00 80 01 e8 3a 40 21 00 01 8d 38 00 00 8d 00
38 00 60 0d 08 02 60 20 20 00 00 02 28 00 00 00
38 10 60 0d 08 02 80 20 20 00 00 02 28 00 00 00
38 00 60 04 e8 3a 00 23 e0 02 8d 38 00 00 8d 00
38 00 60 05 e8 3a a0 20 40 00 8d 38 00 00 8d 00
38 00 60 0a e8 3a 60 21 40 01 8d 3e 66 66 fc 42
38 00 80 0a e8 3a 40 22 00 02 8d 3e 66 66 fc 42
38 00 60 02 e8 3a e0 20 c0 00 8d 38 00 00 8d 00
38 00 80 02 e8 3a 60 21 20 01 8d 38 00 00 8d 00
38 00 60 07 e8 3a 60 20 40 00 8d 38 00 00 8d 00
38 00 80 07 e8 3a 80 20 40 00 8d 38 00 00 8d 00
38 00 60 0c 08 02 80 20 20 00 00 02 30 00 00 00
38 10 60 0c 08 02 a0 20 20 00 00 02 30 00 00 00
38 00 60 0c 28 0a 00 23 80 00 00 0a 48 00 00 00
38 00 60 06 e8 3a 40 21 20 01 8d 38 00 00 8d 00
38 00 80 05 e8 3a 80 28 40 08 8d 38 00 00 8d 00
38 00 60 03 e8 3a 80 2f 40 01 8d 38 00 00 8d 00
38 00 80 03 e8 3a 00 2f e0 00 8d 38 00 00 8d 00
38 10 60 0c 28 0a a0 20 40 00 00 0a 50 00 00 00
38 00 80 06 e8 3a 60 20 40 00 00 38 00 00 8d 00
38 00 60 8a e8 3a 60 20 40 00 00 3a 50 00 00 00
38 00 80 8a e8 3a 60 20 40 00 00 3a 50 00 00 00
38 00 60 84 e8 3a 60 20 40 00 00 38 00 00 8d 00
38 00 80 84 e8 3a 60 20 40 00 00 38 00 00 8d 00
38 00 60 83 e8 3a 60 20 40 00 00 38 00 00 8d 00
38 00 80 83 e8 3a 60 20 40 00 00 38 00 00 8d 00
38 00 60 85 e8 3a e0 2f e0 20 8d 38 00 00 8d 00
38 00 60 81 e8 3a 80 2f 40 00 00 38 00 00 8d 00
38 00 60 82 e8 3a e0 2f e0 00 8d 38 00 00 8d 00

View file

@ -1,139 +0,0 @@
mov(8) g123<1>UD g1<8,8,1>UD { align1 WE_all 1Q };
mov(8) g124<1>F 0x40c00000F /* 6F */ { align1 1Q };
mov(8) g14<1>UD 0x00000000UD { align1 1Q };
mov(8) g17<1>F g12<8,8,1>F { align1 1Q };
mov.sat(8) g124<1>F g8<8,8,1>F { align1 1Q };
mov(8) g61<2>D g22<8,8,1>D { align1 1Q };
mov(8) g21<1>D g59<8,4,2>UD { align1 1Q };
mov(8) g4<1>D -1D { align1 1Q };
mov.nz.f0.0(8) null<1>D g4<8,8,1>D { align1 1Q };
mov(1) g2.2<1>UD 0x00000000UD { align1 WE_all 1N };
mov(4) g114<1>F g2.3<8,2,4>F { align1 WE_all 1N };
mov(8) g126<1>F g4<8,8,1>D { align1 1Q };
mov(16) g124<1>F g4<8,8,1>D { align1 1H };
mov(16) g120<1>F g124<8,8,1>F { align1 1H };
mov(16) g124<1>F 0x0F /* 0F */ { align1 1H };
mov(16) g124<1>D 1065353216D { align1 1H };
mov.nz.f0.0(16) null<1>D g2<0,1,0>D { align1 1H };
mov(8) g3<1>UW 0x76543210V { align1 WE_all 1Q };
mov(16) g20<1>UD g0.1<0,1,0>UD { align1 1H };
mov(16) g6<1>D g3<8,8,1>UW { align1 1H };
mov(8) g1<1>D g4<8,8,1>D { align1 2Q };
mov(8) g5<1>D 0D { align1 2Q };
mov(8) g2<1>F g6<8,4,1>UW { align1 1Q };
mov(8) g7<1>D g2<8,8,1>F { align1 1Q };
mov(16) g2<1>F g10<8,4,1>UW { align1 1H };
mov(16) g11<1>D g2<8,8,1>F { align1 1H };
mov(8) g80<1>DF g5<0,1,0>DF { align1 1Q };
mov(8) g92<2>UD g6.4<0,1,0>UD { align1 1Q };
mov(8) g62<1>Q 0xbff0000000000000Q { align1 1Q };
mov(8) g92<2>F g92<4,4,1>DF { align1 1Q };
mov(8) g92<1>DF g95<4,4,1>F { align1 1Q };
mov(8) g106<1>DF g2<0,1,0>F { align1 2Q };
mov(8) g48<1>Q 0xbff0000000000000Q { align1 2Q };
mov(8) g127<1>UD g106.1<8,4,2>UD { align1 2Q };
mov(8) g11<2>F g7<4,4,1>DF { align1 2Q };
mov(8) g33<1>D g34<8,4,2>UD { align1 2Q };
mov(8) g6<2>UD 0x00000000UD { align1 2Q };
mov(8) g2<1>UW 0x76543210UV { align1 1Q };
mov(8) g12<1>UD g2<8,8,1>UW { align1 1Q };
mov(8) g7<1>UD 0x00080000UD { align1 WE_all 1Q };
mov(1) g2<1>F 0x3e800000F /* 0.25F */ { align1 WE_all 1N };
mov(8) g15<1>F g11<8,8,1>UD { align1 1Q };
mov(1) f0.1<1>UW g1.14<0,1,0>UW { align1 WE_all 1N };
mov(8) g18<1>UD g2<8,8,1>D { align1 1Q };
mov(16) g18<1>UD g26<8,8,1>D { align1 1H };
mov(16) g120<1>D g34<8,8,1>D { align1 1H };
mov(8) g8<1>Q g13<4,4,1>Q { align1 1Q };
mov(8) g21<1>UD g0<8,8,1>UD { align1 WE_all 2Q };
mov(8) g23<1>F g6<0,1,0>F { align1 2Q };
mov(1) g21.2<1>UD 0x000003f2UD { align1 WE_all 3N };
mov.nz.f0.0(8) g19<1>D g3<8,4,2>UD { align1 1Q };
mov(1) f1<1>UD g1.7<0,1,0>UD { align1 WE_all 1N };
mov.sat(8) g126<1>F 0x0F /* 0F */ { align1 1Q };
mov.sat(8) g124<1>F -g36<8,8,1>D { align1 1Q };
mov(8) g41<1>F 0x0F /* 0F */ { align1 2Q };
mov(8) g42<1>UD g11<8,8,1>D { align1 2Q };
mov(16) g86<1>UD g88<8,8,1>UD { align1 WE_all 1H };
mov.sat(16) g120<1>F g2<0,1,0>F { align1 1H };
mov(16) g2<1>F g18<8,8,1>UD { align1 1H };
mov(8) g4<1>UD 0x0F /* 0F */ { align1 1Q };
mov(8) g8<1>DF g2<0,1,0>D { align1 1Q };
mov(16) g8<1>UD 0x00000000UD { align1 1H };
mov.nz.f0.0(8) g4<1>F -(abs)g2<0,1,0>F { align1 1Q };
(+f0.0) mov(8) g4<1>F 0xbf800000F /* -1F */ { align1 1Q };
mov.nz.f0.0(16) g4<1>F -(abs)g2<0,1,0>F { align1 1H };
(+f0.0) mov(16) g4<1>F 0xbf800000F /* -1F */ { align1 1H };
mov(1) f1<1>UD g1.7<0,1,0>UD { align1 WE_all 3N };
mov(8) g32<1>DF g2<0,1,0>DF { align1 2Q };
mov(8) g5<1>F g2<0,1,0>HF { align1 1Q };
mov(16) g6<1>F g2<0,1,0>HF { align1 1H };
mov(8) g7<1>UD g2<0,1,0>F { align1 1Q };
mov(16) g15<1>UD g11<8,8,1>F { align1 1H };
mov(16) g19<1>UD g15<16,8,2>UW { align1 1H };
mov(1) g19<1>UD g[a0 64]<0,1,0>UD { align1 WE_all 1N };
mov(16) g23<1>UD g21<32,8,4>UB { align1 1H };
mov(8) g7<1>DF 0x0000000000000000DF /* 0DF */ { align1 1Q };
mov(8) g5<1>F 0x0F /* 0F */ { align1 WE_all 1Q };
mov(16) g4<1>UD 0x00000000UD { align1 WE_all 1H };
mov(8) g5<2>UD g2<0,1,0>DF { align1 1Q };
mov(8) g10<2>UD g2<0,1,0>DF { align1 2Q };
mov(8) g3<1>DF g2<0,1,0>UD { align1 1Q };
mov(8) g3<1>DF g2<0,1,0>UD { align1 2Q };
mov(1) f0<1>UW 0x0000UW { align1 WE_all 1N };
mov(1) g1<1>D 0D { align1 WE_all 1N };
(+f0.0.any16h) mov(1) g1<1>D -1D { align1 WE_all 1N };
mov(8) g9<1>F g2<0,1,0>W { align1 1Q };
mov(8) g7<1>UQ g4<4,4,1>UQ { align1 1Q };
mov(16) g11<1>UD 0x0F /* 0F */ { align1 1H };
mov(8) g5<2>D g2<0,1,0>DF { align1 1Q };
mov(8) g10<2>D g2<0,1,0>DF { align1 2Q };
mov(1) f1<1>UW f0.1<0,1,0>UW { align1 WE_all 1N };
mov(1) f1<1>UW f0.1<0,1,0>UW { align1 WE_all 3N };
mov(16) g4<1>D 0D { align1 2H };
mov(8) g14<1>UD g13<32,8,4>UB { align1 1Q };
mov(16) g124<1>UD g15<8,8,1>UD { align1 2H };
mov(16) g118<1>D g122<8,8,1>UW { align1 2H };
mov(16) g101<1>UD 0x00000001UD { align1 2H };
mov(1) g4<2>UW 0x00000000UD { align1 WE_all 1N };
mov(8) g4<1>UD f0<0,1,0>UW { align1 1Q };
mov(8) g8<1>D g2<8,8,1>UW { align1 1Q };
mov(16) g4<1>UD f0<0,1,0>UW { align1 1H };
mov(8) g3<1>DF -g2<0,1,0>D { align1 2Q };
mov(8) g5<1>F g2<0,1,0>B { align1 1Q };
mov(16) g6<1>F g2<0,1,0>B { align1 1H };
mov(8) g4<1>DF 0x0000000000000000DF /* 0DF */ { align1 2Q };
mov.nz.f0.0(8) g16<1>D g17<8,4,2>UD { align1 2Q };
mov(8) g34<1>UW 0x76543210V { align1 1Q };
mov(8) g8<1>UD 48D { align1 1Q };
mov(16) g8<1>UD 0D { align1 1H };
mov(8) g7<2>HF g2.1<0,1,0>F { align1 1Q };
mov(1) g5<1>D g[a0 96]<0,1,0>D { align1 WE_all 1N };
(+f0.0.any8h) mov(1) g2<1>D -1D { align1 WE_all 1N };
mov(8) g9<1>UD 0D { align1 WE_all 1Q };
mov(8) g2<2>UW g9<8,8,1>F { align1 1Q };
mov(8) g3<1>UW g2<16,8,2>UW { align1 1Q };
mov(8) g12<1>UW g8<16,8,2>UW { align1 WE_all 1Q };
mov.sat(16) g13<1>F 0x3f800000F /* 1F */ { align1 1H };
mov(16) g19<2>UW g17<8,8,1>F { align1 1H };
mov(16) g4<1>UW g13<16,8,2>UW { align1 WE_all 1H };
mov.nz.f0.0(8) null<1>D 0x00000000UD { align1 1Q };
mov.nz.f0.0(16) null<1>D 0x00000000UD { align1 1H };
mov(4) g3<1>UD tm0<4,4,1>UD { align1 WE_all 1N };
(+f0.0.all16h) mov(1) g1<1>D -1D { align1 WE_all 1N };
mov(8) g9<1>F g2<0,1,0>UB { align1 1Q };
mov(16) g6<1>F g2<0,1,0>UB { align1 1H };
mov(16) g10<2>HF g4<8,8,1>F { align1 1H };
mov.z.f0.0(8) null<1>UD g2<8,8,1>UD { align1 1Q };
mov.sat(8) g125<1>F g9<8,8,1>UD { align1 1Q };
mov.z.f0.0(16) g1<1>UD g0.7<0,1,0>UD { align1 1H };
mov.z.f0.0(8) g18<1>D g17<8,8,1>F { align1 1Q };
mov(16) g35<1>F g15<16,8,2>W { align1 1H };
mov(8) g23<1>Q g26<4,4,1>Q { align1 2Q };
mov(8) g2<1>D 0x00000000UD { align1 1Q };
mov(16) g2<1>D 0x00000000UD { align1 1H };
(+f0.0.all8h) mov(1) g7<1>D -1D { align1 WE_all 1N };
mov(8) g127<1>UB g2<0,1,0>UB { align1 WE_all 1Q };
mov.z.f0.0(8) null<1>D g24<8,8,1>F { align1 1Q };
mov.z.f0.0(16) null<1>D g76<8,8,1>F { align1 1H };
mov(16) g7<1>D g2<16,8,2>B { align1 1H };

View file

@ -1,139 +0,0 @@
01 00 60 00 0c 02 60 2f 20 00 8d 00 00 00 00 00
01 00 60 00 e8 3e 80 2f 00 00 00 38 00 00 c0 40
01 00 60 00 08 06 c0 21 00 00 00 00 00 00 00 00
01 00 60 00 e8 3a 20 22 80 01 8d 00 00 00 00 00
01 00 60 80 e8 3a 80 2f 00 01 8d 00 00 00 00 00
01 00 60 00 28 0a a0 47 c0 02 8d 00 00 00 00 00
01 00 60 00 28 02 a0 22 60 07 8a 00 00 00 00 00
01 00 60 00 28 0e 80 20 00 00 00 08 ff ff ff ff
01 00 60 02 20 0a 00 20 80 00 8d 00 00 00 00 00
01 00 00 00 0c 06 48 20 00 00 00 00 00 00 00 00
01 00 40 00 ec 3a 40 2e 4c 00 87 00 00 00 00 00
01 00 60 00 e8 0a c0 2f 80 00 8d 00 00 00 00 00
01 00 80 00 e8 0a 80 2f 80 00 8d 00 00 00 00 00
01 00 80 00 e8 3a 00 2f 80 0f 8d 00 00 00 00 00
01 00 80 00 e8 3e 80 2f 00 00 00 38 00 00 00 00
01 00 80 00 28 0e 80 2f 00 00 00 08 00 00 80 3f
01 00 80 02 20 0a 00 20 40 00 00 00 00 00 00 00
01 00 60 00 4c 36 60 20 00 00 00 30 10 32 54 76
01 00 80 00 08 02 80 22 04 00 00 00 00 00 00 00
01 00 80 00 28 12 c0 20 60 00 8d 00 00 00 00 00
01 10 60 00 28 0a 20 20 80 00 8d 00 00 00 00 00
01 10 60 00 28 0e a0 20 00 00 00 08 00 00 00 00
01 00 60 00 e8 12 40 20 c0 00 89 00 00 00 00 00
01 00 60 00 28 3a e0 20 40 00 8d 00 00 00 00 00
01 00 80 00 e8 12 40 20 40 01 89 00 00 00 00 00
01 00 80 00 28 3a 60 21 40 00 8d 00 00 00 00 00
01 00 60 00 c8 32 00 2a a0 00 00 00 00 00 00 00
01 00 60 00 08 02 80 4b d0 00 00 00 00 00 00 00
01 00 60 00 28 4f c0 27 00 00 00 00 00 00 f0 bf
01 00 60 00 e8 32 80 4b 80 0b 69 00 00 00 00 00
01 00 60 00 c8 3a 80 2b e0 0b 69 00 00 00 00 00
01 10 60 00 c8 3a 40 2d 40 00 00 00 00 00 00 00
01 10 60 00 28 4f 00 26 00 00 00 00 00 00 f0 bf
01 10 60 00 08 02 e0 2f 44 0d 8a 00 00 00 00 00
01 10 60 00 e8 32 60 41 e0 00 69 00 00 00 00 00
01 10 60 00 28 02 20 24 40 04 8a 00 00 00 00 00
01 10 60 00 08 06 c0 40 00 00 00 00 00 00 00 00
01 00 60 00 48 26 40 20 00 00 00 20 10 32 54 76
01 00 60 00 08 12 80 21 40 00 8d 00 00 00 00 00
01 00 60 00 0c 06 e0 20 00 00 00 00 00 00 08 00
01 00 00 00 ec 3e 40 20 00 00 00 38 00 00 80 3e
01 00 60 00 e8 02 e0 21 60 01 8d 00 00 00 00 00
01 00 00 00 44 12 02 26 3c 00 00 00 00 00 00 00
01 00 60 00 08 0a 40 22 40 00 8d 00 00 00 00 00
01 00 80 00 08 0a 40 22 40 03 8d 00 00 00 00 00
01 00 80 00 28 0a 00 2f 40 04 8d 00 00 00 00 00
01 00 60 00 28 4b 00 21 a0 01 69 00 00 00 00 00
01 10 60 00 0c 02 a0 22 00 00 8d 00 00 00 00 00
01 10 60 00 e8 3a e0 22 c0 00 00 00 00 00 00 00
01 10 00 00 0c 06 a8 22 00 00 00 00 f2 03 00 00
01 00 60 02 28 02 60 22 60 00 8a 00 00 00 00 00
01 00 00 00 04 02 20 26 3c 00 00 00 00 00 00 00
01 00 60 80 e8 3e c0 2f 00 00 00 38 00 00 00 00
01 00 60 80 e8 0a 80 2f 80 44 8d 00 00 00 00 00
01 10 60 00 e8 3e 20 25 00 00 00 38 00 00 00 00
01 10 60 00 08 0a 40 25 60 01 8d 00 00 00 00 00
01 00 80 00 0c 02 c0 2a 00 0b 8d 00 00 00 00 00
01 00 80 80 e8 3a 00 2f 40 00 00 00 00 00 00 00
01 00 80 00 e8 02 40 20 40 02 8d 00 00 00 00 00
01 00 60 00 08 3e 80 20 00 00 00 38 00 00 00 00
01 00 60 00 c8 0a 00 21 40 00 00 00 00 00 00 00
01 00 80 00 08 06 00 21 00 00 00 00 00 00 00 00
01 00 60 02 e8 3a 80 20 40 60 00 00 00 00 00 00
01 00 61 00 e8 3e 80 20 00 00 00 38 00 00 80 bf
01 00 80 02 e8 3a 80 20 40 60 00 00 00 00 00 00
01 00 81 00 e8 3e 80 20 00 00 00 38 00 00 80 bf
01 10 00 00 04 02 20 26 3c 00 00 00 00 00 00 00
01 10 60 00 c8 32 00 24 40 00 00 00 00 00 00 00
01 00 60 00 e8 52 a0 20 40 00 00 00 00 00 00 00
01 00 80 00 e8 52 c0 20 40 00 00 00 00 00 00 00
01 00 60 00 08 3a e0 20 40 00 00 00 00 00 00 00
01 00 80 00 08 3a e0 21 60 01 8d 00 00 00 00 00
01 00 80 00 08 12 60 22 e0 01 ae 00 00 00 00 00
01 00 00 00 0c 02 60 22 40 80 00 00 00 00 00 00
01 00 80 00 08 22 e0 22 a0 02 cf 00 00 00 00 00
01 00 60 00 c8 56 e0 20 00 00 00 00 00 00 00 00
01 00 60 00 ec 3e a0 20 00 00 00 38 00 00 00 00
01 00 80 00 0c 06 80 20 00 00 00 00 00 00 00 00
01 00 60 00 08 32 a0 40 40 00 00 00 00 00 00 00
01 10 60 00 08 32 40 41 40 00 00 00 00 00 00 00
01 00 60 00 c8 02 60 20 40 00 00 00 00 00 00 00
01 10 60 00 c8 02 60 20 40 00 00 00 00 00 00 00
01 00 00 00 44 16 00 26 00 00 00 10 00 00 00 00
01 00 00 00 2c 0e 20 20 00 00 00 08 00 00 00 00
01 00 0a 00 2c 0e 20 20 00 00 00 08 ff ff ff ff
01 00 60 00 e8 1a 20 21 40 00 00 00 00 00 00 00
01 00 60 00 08 43 e0 20 80 00 69 00 00 00 00 00
01 00 80 00 08 3e 60 21 00 00 00 38 00 00 00 00
01 00 60 00 28 32 a0 40 40 00 00 00 00 00 00 00
01 10 60 00 28 32 40 41 40 00 00 00 00 00 00 00
01 00 00 00 44 10 20 26 02 06 00 00 00 00 00 00
01 10 00 00 44 10 20 26 02 06 00 00 00 00 00 00
01 20 80 00 28 0e 80 20 00 00 00 08 00 00 00 00
01 00 60 00 08 22 c0 21 a0 01 cf 00 00 00 00 00
01 20 80 00 08 02 80 2f e0 01 8d 00 00 00 00 00
01 20 80 00 28 12 c0 2e 40 0f 8d 00 00 00 00 00
01 20 80 00 08 06 a0 2c 00 00 00 00 01 00 00 00
01 00 00 00 4c 06 80 40 00 00 00 00 00 00 00 00
01 00 60 00 08 10 80 20 00 06 00 00 00 00 00 00
01 00 60 00 28 12 00 21 40 00 8d 00 00 00 00 00
01 00 80 00 08 10 80 20 00 06 00 00 00 00 00 00
01 10 60 00 c8 0a 60 20 40 40 00 00 00 00 00 00
01 00 60 00 e8 2a a0 20 40 00 00 00 00 00 00 00
01 00 80 00 e8 2a c0 20 40 00 00 00 00 00 00 00
01 10 60 00 c8 56 80 20 00 00 00 00 00 00 00 00
01 10 60 02 28 02 00 22 20 02 8a 00 00 00 00 00
01 00 60 00 48 36 40 24 00 00 00 30 10 32 54 76
01 00 60 00 08 0e 00 21 00 00 00 08 30 00 00 00
01 00 80 00 08 0e 00 21 00 00 00 08 00 00 00 00
01 00 60 00 48 3b e0 40 44 00 00 00 00 00 00 00
01 00 00 00 2c 0a a0 20 60 80 00 00 00 00 00 00
01 00 08 00 2c 0e 40 20 00 00 00 08 ff ff ff ff
01 00 60 00 0c 0e 20 21 00 00 00 08 00 00 00 00
01 00 60 00 48 3a 40 40 20 01 8d 00 00 00 00 00
01 00 60 00 48 12 60 20 40 00 ae 00 00 00 00 00
01 00 60 00 4c 12 80 21 00 01 ae 00 00 00 00 00
01 00 80 80 e8 3e a0 21 00 00 00 38 00 00 80 3f
01 00 80 00 48 3a 60 42 20 02 8d 00 00 00 00 00
01 00 80 00 4c 12 80 20 a0 01 ae 00 00 00 00 00
01 00 60 02 20 06 00 20 00 00 00 00 00 00 00 00
01 00 80 02 20 06 00 20 00 00 00 00 00 00 00 00
01 00 40 00 0c 00 60 20 00 18 69 00 00 00 00 00
01 00 0b 00 2c 0e 20 20 00 00 00 08 ff ff ff ff
01 00 60 00 e8 22 20 21 40 00 00 00 00 00 00 00
01 00 80 00 e8 22 c0 20 40 00 00 00 00 00 00 00
01 00 80 00 48 3b 40 41 80 00 8d 00 00 00 00 00
01 00 60 01 00 02 00 20 40 00 8d 00 00 00 00 00
01 00 60 80 e8 02 a0 2f 20 01 8d 00 00 00 00 00
01 00 80 01 08 02 20 20 1c 00 00 00 00 00 00 00
01 00 60 01 28 3a 40 22 20 02 8d 00 00 00 00 00
01 00 80 00 e8 1a 60 24 e0 01 ae 00 00 00 00 00
01 10 60 00 28 4b e0 22 40 03 69 00 00 00 00 00
01 00 60 00 28 06 40 20 00 00 00 00 00 00 00 00
01 00 80 00 28 06 40 20 00 00 00 00 00 00 00 00
01 00 09 00 2c 0e e0 20 00 00 00 08 ff ff ff ff
01 00 60 00 8c 22 e0 2f 40 00 00 00 00 00 00 00
01 00 60 01 20 3a 00 20 00 03 8d 00 00 00 00 00
01 00 80 01 20 3a 00 20 80 09 8d 00 00 00 00 00
01 00 80 00 28 2a e0 20 40 00 ae 00 00 00 00 00

View file

@ -1,31 +0,0 @@
mul(8) g22<1>F g4<8,8,1>F g2<0,1,0>F { align1 1Q };
mul(16) g26<1>F g2<0,1,0>F g2<0,1,0>F { align1 1H };
mul(8) g36<1>DF g8<0,1,0>DF g8<0,1,0>DF { align1 1Q };
mul(8) g9<1>UD g86<8,8,1>UD 0x00000004UD { align1 1Q };
mul(8) acc0<1>UD g17<8,8,1>UD 0xaaabUW { align1 1Q };
mul(8) acc0<1>D g17<8,8,1>D 0x5556UW { align1 1Q };
mul(8) g21<1>D g20<8,8,1>D 3D { align1 1Q };
mul(8) acc0<1>UD g39<8,8,1>UD 0xaaabUW { align1 2Q };
mul(16) g45<1>D g43<8,8,1>D 3D { align1 1H };
mul(8) acc0<1>D g39<8,8,1>D 0x5556UW { align1 2Q };
mul.z.f0.0(8) g10<1>F g5<0,1,0>F g9<8,8,1>F { align1 1Q };
mul(8) g39<1>DF g3.3<0,1,0>DF g3.3<0,1,0>DF { align1 2Q };
mul.z.f0.0(16) g6<1>F g2<0,1,0>F g4<8,8,1>F { align1 1H };
mul.sat(8) g17<1>F g4<8,8,1>F g16<8,8,1>F { align1 1Q };
mul.sat(16) g9<1>F g3<8,8,1>F g7<8,8,1>F { align1 1H };
mul.l.f0.0(8) null<1>F g6<0,1,0>F g5.7<0,1,0>F { align1 1Q };
mul.sat(8) g8<1>DF g34<4,4,1>DF g5<4,4,1>DF { align1 1Q };
mul(8) g4<1>UQ g8<4,4,1>UD g12<4,4,1>UD { align1 1Q };
mul(8) g20<1>UQ g5<4,4,1>UD g13<4,4,1>UD { align1 2Q };
mul(8) g5<1>Q g9<4,4,1>D g13<4,4,1>D { align1 1Q };
mul.sat(8) g10<1>DF g10<4,4,1>DF g16<4,4,1>DF { align1 2Q };
mul.l.f0.0(8) g20<1>F g2<8,8,1>F 0x42700000F /* 60F */ { align1 1Q };
mul.l.f0.0(16) g32<1>F g2<8,8,1>F 0x42700000F /* 60F */ { align1 1H };
mul(1) g6<1>UD g12<0,1,0>UD 0x00000101UD { align1 WE_all 1N };
mul(8) g21<1>Q g6<4,4,1>D g14<4,4,1>D { align1 2Q };
mul.l.f0.0(16) null<1>F g2.2<0,1,0>F g2.1<0,1,0>F { align1 1H };
mul(8) g6<1>UW g6<8,8,1>UW 0x0808UW { align1 1Q };
mul(16) g15<1>UW g14<16,16,1>UW 0x0808UW { align1 1H };
mul.nz.f0.0(8) g6<1>F g12<8,8,1>F 0x3f808000F /* 1.00391F */ { align1 1Q };
mul.nz.f0.0(16) g9<1>F g7<8,8,1>F 0x3f808000F /* 1.00391F */ { align1 1H };
mul(1) g4<1>UD g4<0,1,0>UD 0x00000101UD { align1 WE_all 3N };

View file

@ -1,31 +0,0 @@
41 00 60 00 e8 3a c0 22 80 00 8d 3a 40 00 00 00
41 00 80 00 e8 3a 40 23 40 00 00 3a 40 00 00 00
41 00 60 00 c8 32 80 24 00 01 00 32 00 01 00 00
41 00 60 00 08 02 20 21 c0 0a 8d 06 04 00 00 00
41 00 60 00 00 02 00 24 20 02 8d 16 ab aa ab aa
41 00 60 00 20 0a 00 24 20 02 8d 16 56 55 56 55
41 00 60 00 28 0a a0 22 80 02 8d 0e 03 00 00 00
41 10 60 00 00 02 00 24 e0 04 8d 16 ab aa ab aa
41 00 80 00 28 0a a0 25 60 05 8d 0e 03 00 00 00
41 10 60 00 20 0a 00 24 e0 04 8d 16 56 55 56 55
41 00 60 01 e8 3a 40 21 a0 00 00 3a 20 01 8d 00
41 10 60 00 c8 32 e0 24 78 00 00 32 78 00 00 00
41 00 80 01 e8 3a c0 20 40 00 00 3a 80 00 8d 00
41 00 60 80 e8 3a 20 22 80 00 8d 3a 00 02 8d 00
41 00 80 80 e8 3a 20 21 60 00 8d 3a e0 00 8d 00
41 00 60 05 e0 3a 00 20 c0 00 00 3a bc 00 00 00
41 00 60 80 c8 32 00 21 40 04 69 32 a0 00 69 00
41 00 60 00 08 03 80 20 00 01 69 02 80 01 69 00
41 10 60 00 08 03 80 22 a0 00 69 02 a0 01 69 00
41 00 60 00 28 0b a0 20 20 01 69 0a a0 01 69 00
41 10 60 80 c8 32 40 21 40 01 69 32 00 02 69 00
41 00 60 05 e8 3a 80 22 40 00 8d 3e 00 00 70 42
41 00 80 05 e8 3a 00 24 40 00 8d 3e 00 00 70 42
41 00 00 00 0c 02 c0 20 80 01 00 06 01 01 00 00
41 10 60 00 28 0b a0 22 c0 00 69 0a c0 01 69 00
41 00 80 05 e0 3a 00 20 48 00 00 3a 44 00 00 00
41 00 60 00 48 12 c0 20 c0 00 8d 16 08 08 08 08
41 00 80 00 48 12 e0 21 c0 01 b1 16 08 08 08 08
41 00 60 02 e8 3a c0 20 80 01 8d 3e 00 80 80 3f
41 00 80 02 e8 3a 20 21 e0 00 8d 3e 00 80 80 3f
41 10 00 00 0c 02 80 20 80 00 00 06 01 01 00 00

View file

@ -1 +0,0 @@
nop ;

View file

@ -1 +0,0 @@
7e 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

View file

@ -1,2 +0,0 @@
not(16) g3<1>D g1.2<0,1,0>D { align1 1H };
not(8) g4<1>D g8<8,8,1>D { align1 1Q };

View file

@ -1,2 +0,0 @@
04 00 80 00 28 0a 60 20 28 00 00 00 00 00 00 00
04 00 60 00 28 0a 80 20 00 01 8d 00 00 00 00 00

View file

@ -1,23 +0,0 @@
or(8) g53<1>UD g49<8,8,1>UD g21<8,8,1>UD { align1 1Q };
or.nz.f0.0(8) null<1>UD g21<8,8,1>UD g2<8,8,1>UD { align1 1Q };
or.nz.f0.0(8) g5<1>UD g62<8,8,1>UD g67<8,8,1>UD { align1 1Q };
or(8) g5<1>UD g106.1<8,4,2>UD 0x7ff00000UD { align1 2Q };
or.nz.f0.0(16) null<1>UD g35<8,8,1>UD g32<8,8,1>UD { align1 1H };
or(16) g36<1>UD g34<8,8,1>UD g20<8,8,1>UD { align1 1H };
or.nz.f0.0(16) g53<1>UD g51<8,8,1>UD g49<8,8,1>UD { align1 1H };
or(1) g8<1>UD g8<0,1,0>UD g4<0,1,0>UD { align1 WE_all 1N };
or(1) a0<1>UD g8<0,1,0>UD 0x060ba000UD { align1 WE_all 1N };
(+f0.0) or(8) g3<1>UD g3<8,8,1>UD 0x3f800000UD { align1 1Q };
(+f0.0) or(16) g3<1>UD g3<8,8,1>UD 0x3f800000UD { align1 1H };
or(1) a0<1>UD a0<0,1,0>UD 0x02280300UD { align1 WE_all 1N };
or(1) a0<1>UD g4<0,1,0>UD 0x04036000UD { align1 WE_all 3N };
(+f0.0) or(8) g17.1<2>UD g17.1<8,4,2>UD 0x3ff00000UD { align1 2Q };
or(8) g4<1>UW g4<8,8,1>UW g6<8,8,1>UW { align1 1Q };
or(16) g16<1>UW g14<16,16,1>UW g15<16,16,1>UW { align1 1H };
or(8) g22<1>UD ~g2.2<0,1,0>D g21<8,8,1>UD { align1 1Q };
or(16) g37<1>UD ~g2.2<0,1,0>D g35<8,8,1>UD { align1 1H };
or(8) g9<1>D ~g8<8,8,1>D ~g7<8,8,1>D { align1 1Q };
or(16) g13<1>D ~g11<8,8,1>D ~g9<8,8,1>D { align1 1H };
or(1) g14<1>UD g14<0,1,0>UD g19<0,1,0>UD { align1 WE_all 3N };
or.z.f0.0(8) null<1>UD g5<8,8,1>UD g6<8,8,1>UD { align1 1Q };
or.z.f0.0(16) null<1>UD g17<8,8,1>UD g19<8,8,1>UD { align1 1H };

View file

@ -1,23 +0,0 @@
06 00 60 00 08 02 a0 26 20 06 8d 02 a0 02 8d 00
06 00 60 02 00 02 00 20 a0 02 8d 02 40 00 8d 00
06 00 60 02 08 02 a0 20 c0 07 8d 02 60 08 8d 00
06 10 60 00 08 02 a0 20 44 0d 8a 06 00 00 f0 7f
06 00 80 02 00 02 00 20 60 04 8d 02 00 04 8d 00
06 00 80 00 08 02 80 24 40 04 8d 02 80 02 8d 00
06 00 80 02 08 02 a0 26 60 06 8d 02 20 06 8d 00
06 00 00 00 0c 02 00 21 00 01 00 02 80 00 00 00
06 00 00 00 04 02 00 22 00 01 00 06 00 a0 0b 06
06 00 61 00 08 02 60 20 60 00 8d 06 00 00 80 3f
06 00 81 00 08 02 60 20 60 00 8d 06 00 00 80 3f
06 00 00 00 04 00 00 22 00 02 00 06 00 03 28 02
06 10 00 00 04 02 00 22 80 00 00 06 00 60 03 04
06 10 61 00 08 02 24 42 24 02 8a 06 00 00 f0 3f
06 00 60 00 48 12 80 20 80 00 8d 12 c0 00 8d 00
06 00 80 00 48 12 00 22 c0 01 b1 12 e0 01 b1 00
06 00 60 00 08 0a c0 22 48 40 00 02 a0 02 8d 00
06 00 80 00 08 0a a0 24 48 40 00 02 60 04 8d 00
06 00 60 00 28 0a 20 21 00 41 8d 0a e0 40 8d 00
06 00 80 00 28 0a a0 21 60 41 8d 0a 20 41 8d 00
06 10 00 00 0c 02 c0 21 c0 01 00 02 60 02 00 00
06 00 60 01 00 02 00 20 a0 00 8d 02 c0 00 8d 00
06 00 80 01 00 02 00 20 20 02 8d 02 60 02 8d 00

View file

@ -1,10 +0,0 @@
pln(8) g124<1>F g4<0,1,0>F g2<8,8,1>F { align1 1Q };
pln(16) g120<1>F g6<0,1,0>F g2<8,8,1>F { align1 1H };
pln.sat(8) g9<1>F g5<0,1,0>F g2<8,8,1>F { align1 1Q };
pln.sat(16) g12<1>F g7<0,1,0>F g2<8,8,1>F { align1 1H };
pln.g.f0.0(8) g7<1>F g4<0,1,0>F g2<8,8,1>F { align1 1Q };
pln.g.f0.0(16) g11<1>F g6<0,1,0>F g2<8,8,1>F { align1 1H };
pln.l.f0.0(8) g8<1>F g4<0,1,0>F g2<8,8,1>F { align1 1Q };
pln.l.f0.0(16) g11<1>F g6<0,1,0>F g2<8,8,1>F { align1 1H };
pln.nz.f0.0(8) g18<1>F g5<0,1,0>F g2<8,8,1>F { align1 1Q };
pln.nz.f0.0(16) g14<1>F g7<0,1,0>F g2<8,8,1>F { align1 1H };

View file

@ -1,10 +0,0 @@
5a 00 60 00 e8 3a 80 2f 80 00 00 3a 40 00 8d 00
5a 00 80 00 e8 3a 00 2f c0 00 00 3a 40 00 8d 00
5a 00 60 80 e8 3a 20 21 a0 00 00 3a 40 00 8d 00
5a 00 80 80 e8 3a 80 21 e0 00 00 3a 40 00 8d 00
5a 00 60 03 e8 3a e0 20 80 00 00 3a 40 00 8d 00
5a 00 80 03 e8 3a 60 21 c0 00 00 3a 40 00 8d 00
5a 00 60 05 e8 3a 00 21 80 00 00 3a 40 00 8d 00
5a 00 80 05 e8 3a 60 21 c0 00 00 3a 40 00 8d 00
5a 00 60 02 e8 3a 40 22 a0 00 00 3a 40 00 8d 00
5a 00 80 02 e8 3a c0 21 e0 00 00 3a 40 00 8d 00

Some files were not shown because too many files have changed in this diff Show more