mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-10 06:00:14 +01:00
intel/elk: Remove a bunch of files that don't apply for Gfx8-
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27563>
This commit is contained in:
parent
06b553f02c
commit
dcf29202d4
124 changed files with 0 additions and 17536 deletions
|
|
@ -3490,8 +3490,6 @@ fs_visitor::emit_repclear_shader()
|
|||
calculate_cfg();
|
||||
|
||||
this->first_non_payload_grf = payload().num_regs;
|
||||
|
||||
lower_scoreboard();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -6823,8 +6821,6 @@ fs_visitor::allocate_registers(bool allow_spilling)
|
|||
*/
|
||||
assert(prog_data->total_scratch < max_scratch_size);
|
||||
}
|
||||
|
||||
lower_scoreboard();
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,790 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2020 Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_kernel.h"
|
||||
#include "brw_nir.h"
|
||||
#include "intel_nir.h"
|
||||
|
||||
#include "intel_nir.h"
|
||||
#include "nir_clc_helpers.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "compiler/spirv/nir_spirv.h"
|
||||
#include "dev/intel_debug.h"
|
||||
#include "util/u_atomic.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
static const nir_shader *
|
||||
load_clc_shader(struct brw_compiler *compiler, struct disk_cache *disk_cache,
|
||||
const nir_shader_compiler_options *nir_options,
|
||||
const struct spirv_to_nir_options *spirv_options)
|
||||
{
|
||||
if (compiler->clc_shader)
|
||||
return compiler->clc_shader;
|
||||
|
||||
nir_shader *nir = nir_load_libclc_shader(64, disk_cache,
|
||||
spirv_options, nir_options,
|
||||
disk_cache != NULL);
|
||||
if (nir == NULL)
|
||||
return NULL;
|
||||
|
||||
const nir_shader *old_nir =
|
||||
p_atomic_cmpxchg(&compiler->clc_shader, NULL, nir);
|
||||
if (old_nir == NULL) {
|
||||
/* We won the race */
|
||||
ralloc_steal(compiler, nir);
|
||||
return nir;
|
||||
} else {
|
||||
/* Someone else built the shader first */
|
||||
ralloc_free(nir);
|
||||
return old_nir;
|
||||
}
|
||||
}
|
||||
|
||||
static nir_builder
|
||||
builder_init_new_impl(nir_function *func)
|
||||
{
|
||||
nir_function_impl *impl = nir_function_impl_create(func);
|
||||
return nir_builder_at(nir_before_impl(impl));
|
||||
}
|
||||
|
||||
static void
|
||||
implement_atomic_builtin(nir_function *func, nir_atomic_op atomic_op,
|
||||
enum glsl_base_type data_base_type,
|
||||
nir_variable_mode mode)
|
||||
{
|
||||
nir_builder b = builder_init_new_impl(func);
|
||||
const struct glsl_type *data_type = glsl_scalar_type(data_base_type);
|
||||
|
||||
unsigned p = 0;
|
||||
|
||||
nir_deref_instr *ret = NULL;
|
||||
ret = nir_build_deref_cast(&b, nir_load_param(&b, p++),
|
||||
nir_var_function_temp, data_type, 0);
|
||||
|
||||
nir_intrinsic_op op = nir_intrinsic_deref_atomic;
|
||||
nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b.shader, op);
|
||||
nir_intrinsic_set_atomic_op(atomic, atomic_op);
|
||||
|
||||
for (unsigned i = 0; i < nir_intrinsic_infos[op].num_srcs; i++) {
|
||||
nir_def *src = nir_load_param(&b, p++);
|
||||
if (i == 0) {
|
||||
/* The first source is our deref */
|
||||
assert(nir_intrinsic_infos[op].src_components[i] == -1);
|
||||
src = &nir_build_deref_cast(&b, src, mode, data_type, 0)->def;
|
||||
}
|
||||
atomic->src[i] = nir_src_for_ssa(src);
|
||||
}
|
||||
|
||||
nir_def_init_for_type(&atomic->instr, &atomic->def, data_type);
|
||||
|
||||
nir_builder_instr_insert(&b, &atomic->instr);
|
||||
nir_store_deref(&b, ret, &atomic->def, ~0);
|
||||
}
|
||||
|
||||
static void
|
||||
implement_sub_group_ballot_builtin(nir_function *func)
|
||||
{
|
||||
nir_builder b = builder_init_new_impl(func);
|
||||
nir_deref_instr *ret =
|
||||
nir_build_deref_cast(&b, nir_load_param(&b, 0),
|
||||
nir_var_function_temp, glsl_uint_type(), 0);
|
||||
nir_def *cond = nir_load_param(&b, 1);
|
||||
|
||||
nir_intrinsic_instr *ballot =
|
||||
nir_intrinsic_instr_create(b.shader, nir_intrinsic_ballot);
|
||||
ballot->src[0] = nir_src_for_ssa(cond);
|
||||
ballot->num_components = 1;
|
||||
nir_def_init(&ballot->instr, &ballot->def, 1, 32);
|
||||
nir_builder_instr_insert(&b, &ballot->instr);
|
||||
|
||||
nir_store_deref(&b, ret, &ballot->def, ~0);
|
||||
}
|
||||
|
||||
static bool
|
||||
implement_intel_builtins(nir_shader *nir)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
nir_foreach_function(func, nir) {
|
||||
if (strcmp(func->name, "_Z10atomic_minPU3AS1Vff") == 0) {
|
||||
/* float atom_min(__global float volatile *p, float val) */
|
||||
implement_atomic_builtin(func, nir_atomic_op_fmin,
|
||||
GLSL_TYPE_FLOAT, nir_var_mem_global);
|
||||
progress = true;
|
||||
} else if (strcmp(func->name, "_Z10atomic_maxPU3AS1Vff") == 0) {
|
||||
/* float atom_max(__global float volatile *p, float val) */
|
||||
implement_atomic_builtin(func, nir_atomic_op_fmax,
|
||||
GLSL_TYPE_FLOAT, nir_var_mem_global);
|
||||
progress = true;
|
||||
} else if (strcmp(func->name, "_Z10atomic_minPU3AS3Vff") == 0) {
|
||||
/* float atomic_min(__shared float volatile *, float) */
|
||||
implement_atomic_builtin(func, nir_atomic_op_fmin,
|
||||
GLSL_TYPE_FLOAT, nir_var_mem_shared);
|
||||
progress = true;
|
||||
} else if (strcmp(func->name, "_Z10atomic_maxPU3AS3Vff") == 0) {
|
||||
/* float atomic_max(__shared float volatile *, float) */
|
||||
implement_atomic_builtin(func, nir_atomic_op_fmax,
|
||||
GLSL_TYPE_FLOAT, nir_var_mem_shared);
|
||||
progress = true;
|
||||
} else if (strcmp(func->name, "intel_sub_group_ballot") == 0) {
|
||||
implement_sub_group_ballot_builtin(func);
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
nir_shader_preserve_all_metadata(nir);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_kernel_intrinsics(nir_shader *nir)
|
||||
{
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
|
||||
bool progress = false;
|
||||
|
||||
unsigned kernel_sysvals_start = 0;
|
||||
unsigned kernel_arg_start = sizeof(struct brw_kernel_sysvals);
|
||||
nir->num_uniforms += kernel_arg_start;
|
||||
|
||||
nir_builder b = nir_builder_create(impl);
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_kernel_input: {
|
||||
b.cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_intrinsic_instr *load =
|
||||
nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
|
||||
load->num_components = intrin->num_components;
|
||||
load->src[0] = nir_src_for_ssa(nir_u2u32(&b, intrin->src[0].ssa));
|
||||
nir_intrinsic_set_base(load, kernel_arg_start);
|
||||
nir_intrinsic_set_range(load, nir->num_uniforms);
|
||||
nir_def_init(&load->instr, &load->def,
|
||||
intrin->def.num_components,
|
||||
intrin->def.bit_size);
|
||||
nir_builder_instr_insert(&b, &load->instr);
|
||||
|
||||
nir_def_rewrite_uses(&intrin->def, &load->def);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_constant_base_ptr: {
|
||||
b.cursor = nir_instr_remove(&intrin->instr);
|
||||
nir_def *const_data_base_addr = nir_pack_64_2x32_split(&b,
|
||||
nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
|
||||
nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
|
||||
nir_def_rewrite_uses(&intrin->def, const_data_base_addr);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_num_workgroups: {
|
||||
b.cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_intrinsic_instr *load =
|
||||
nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
|
||||
load->num_components = 3;
|
||||
load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
|
||||
nir_intrinsic_set_base(load, kernel_sysvals_start +
|
||||
offsetof(struct brw_kernel_sysvals, num_work_groups));
|
||||
nir_intrinsic_set_range(load, 3 * 4);
|
||||
nir_def_init(&load->instr, &load->def, 3, 32);
|
||||
nir_builder_instr_insert(&b, &load->instr);
|
||||
nir_def_rewrite_uses(&intrin->def, &load->def);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (progress) {
|
||||
nir_metadata_preserve(impl, nir_metadata_block_index |
|
||||
nir_metadata_dominance);
|
||||
} else {
|
||||
nir_metadata_preserve(impl, nir_metadata_all);
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_kernel_from_spirv(struct brw_compiler *compiler,
|
||||
struct disk_cache *disk_cache,
|
||||
struct brw_kernel *kernel,
|
||||
void *log_data, void *mem_ctx,
|
||||
const uint32_t *spirv, size_t spirv_size,
|
||||
const char *entrypoint_name,
|
||||
char **error_str)
|
||||
{
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
const nir_shader_compiler_options *nir_options =
|
||||
compiler->nir_options[MESA_SHADER_KERNEL];
|
||||
|
||||
struct spirv_to_nir_options spirv_options = {
|
||||
.environment = NIR_SPIRV_OPENCL,
|
||||
.caps = {
|
||||
.address = true,
|
||||
.float16 = devinfo->ver >= 8,
|
||||
.float64 = devinfo->ver >= 8,
|
||||
.groups = true,
|
||||
.image_write_without_format = true,
|
||||
.int8 = devinfo->ver >= 8,
|
||||
.int16 = devinfo->ver >= 8,
|
||||
.int64 = devinfo->ver >= 8,
|
||||
.int64_atomics = devinfo->ver >= 9,
|
||||
.kernel = true,
|
||||
.linkage = true, /* We receive linked kernel from clc */
|
||||
.float_controls = devinfo->ver >= 8,
|
||||
.generic_pointers = true,
|
||||
.storage_8bit = devinfo->ver >= 8,
|
||||
.storage_16bit = devinfo->ver >= 8,
|
||||
.subgroup_arithmetic = true,
|
||||
.subgroup_basic = true,
|
||||
.subgroup_ballot = true,
|
||||
.subgroup_dispatch = true,
|
||||
.subgroup_quad = true,
|
||||
.subgroup_shuffle = true,
|
||||
.subgroup_vote = true,
|
||||
|
||||
.intel_subgroup_shuffle = true,
|
||||
.intel_subgroup_buffer_block_io = true,
|
||||
},
|
||||
.shared_addr_format = nir_address_format_62bit_generic,
|
||||
.global_addr_format = nir_address_format_62bit_generic,
|
||||
.temp_addr_format = nir_address_format_62bit_generic,
|
||||
.constant_addr_format = nir_address_format_64bit_global,
|
||||
};
|
||||
|
||||
spirv_options.clc_shader = load_clc_shader(compiler, disk_cache,
|
||||
nir_options, &spirv_options);
|
||||
if (spirv_options.clc_shader == NULL) {
|
||||
fprintf(stderr, "ERROR: libclc shader missing."
|
||||
" Consider installing the libclc package\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
assert(spirv_size % 4 == 0);
|
||||
nir_shader *nir =
|
||||
spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
|
||||
entrypoint_name, &spirv_options, nir_options);
|
||||
nir_validate_shader(nir, "after spirv_to_nir");
|
||||
nir_validate_ssa_dominance(nir, "after spirv_to_nir");
|
||||
ralloc_steal(mem_ctx, nir);
|
||||
nir->info.name = ralloc_strdup(nir, entrypoint_name);
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_CS)) {
|
||||
/* Re-index SSA defs so we print more sensible numbers. */
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
nir_index_ssa_defs(impl);
|
||||
}
|
||||
|
||||
fprintf(stderr, "NIR (from SPIR-V) for kernel\n");
|
||||
nir_print_shader(nir, stderr);
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, implement_intel_builtins);
|
||||
NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader);
|
||||
|
||||
/* We have to lower away local constant initializers right before we
|
||||
* inline functions. That way they get properly initialized at the top
|
||||
* of the function and not at the top of its caller.
|
||||
*/
|
||||
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
|
||||
NIR_PASS_V(nir, nir_lower_returns);
|
||||
NIR_PASS_V(nir, nir_inline_functions);
|
||||
NIR_PASS_V(nir, nir_copy_prop);
|
||||
NIR_PASS_V(nir, nir_opt_deref);
|
||||
|
||||
/* Pick off the single entrypoint that we want */
|
||||
nir_remove_non_entrypoints(nir);
|
||||
|
||||
/* Now that we've deleted all but the main function, we can go ahead and
|
||||
* lower the rest of the constant initializers. We do this here so that
|
||||
* nir_remove_dead_variables and split_per_member_structs below see the
|
||||
* corresponding stores.
|
||||
*/
|
||||
NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
|
||||
|
||||
/* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
|
||||
* aligned and so it can just read/write them as vec4s. This results in a
|
||||
* LOT of vec4->vec3 casts on loads and stores. One solution to this
|
||||
* problem is to get rid of all vec3 variables.
|
||||
*/
|
||||
NIR_PASS_V(nir, nir_lower_vec3_to_vec4,
|
||||
nir_var_shader_temp | nir_var_function_temp |
|
||||
nir_var_mem_shared | nir_var_mem_global|
|
||||
nir_var_mem_constant);
|
||||
|
||||
/* We assign explicit types early so that the optimizer can take advantage
|
||||
* of that information and hopefully get rid of some of our memcpys.
|
||||
*/
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
|
||||
nir_var_uniform |
|
||||
nir_var_shader_temp | nir_var_function_temp |
|
||||
nir_var_mem_shared | nir_var_mem_global,
|
||||
glsl_get_cl_type_size_align);
|
||||
|
||||
struct brw_nir_compiler_opts opts = {};
|
||||
brw_preprocess_nir(compiler, nir, &opts);
|
||||
|
||||
int max_arg_idx = -1;
|
||||
nir_foreach_uniform_variable(var, nir) {
|
||||
assert(var->data.location < 256);
|
||||
max_arg_idx = MAX2(max_arg_idx, var->data.location);
|
||||
}
|
||||
|
||||
kernel->args_size = nir->num_uniforms;
|
||||
kernel->arg_count = max_arg_idx + 1;
|
||||
|
||||
/* No bindings */
|
||||
struct brw_kernel_arg_desc *args =
|
||||
rzalloc_array(mem_ctx, struct brw_kernel_arg_desc, kernel->arg_count);
|
||||
kernel->args = args;
|
||||
|
||||
nir_foreach_uniform_variable(var, nir) {
|
||||
struct brw_kernel_arg_desc arg_desc = {
|
||||
.offset = var->data.driver_location,
|
||||
.size = glsl_get_explicit_size(var->type, false),
|
||||
};
|
||||
assert(arg_desc.offset + arg_desc.size <= nir->num_uniforms);
|
||||
|
||||
assert(var->data.location >= 0);
|
||||
args[var->data.location] = arg_desc;
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_all, NULL);
|
||||
|
||||
/* Lower again, this time after dead-variables to get more compact variable
|
||||
* layouts.
|
||||
*/
|
||||
nir->global_mem_size = 0;
|
||||
nir->scratch_size = 0;
|
||||
nir->info.shared_size = 0;
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
|
||||
nir_var_shader_temp | nir_var_function_temp |
|
||||
nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant,
|
||||
glsl_get_cl_type_size_align);
|
||||
if (nir->constant_data_size > 0) {
|
||||
assert(nir->constant_data == NULL);
|
||||
nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
|
||||
nir_gather_explicit_io_initializers(nir, nir->constant_data,
|
||||
nir->constant_data_size,
|
||||
nir_var_mem_constant);
|
||||
}
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_CS)) {
|
||||
/* Re-index SSA defs so we print more sensible numbers. */
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
nir_index_ssa_defs(impl);
|
||||
}
|
||||
|
||||
fprintf(stderr, "NIR (before I/O lowering) for kernel\n");
|
||||
nir_print_shader(nir, stderr);
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_memcpy);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
|
||||
nir_address_format_64bit_global);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
|
||||
nir_address_format_32bit_offset_as_64bit);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io,
|
||||
nir_var_shader_temp | nir_var_function_temp |
|
||||
nir_var_mem_shared | nir_var_mem_global,
|
||||
nir_address_format_62bit_generic);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);
|
||||
|
||||
NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
|
||||
NIR_PASS_V(nir, lower_kernel_intrinsics);
|
||||
|
||||
struct brw_cs_prog_key key = { };
|
||||
|
||||
memset(&kernel->prog_data, 0, sizeof(kernel->prog_data));
|
||||
kernel->prog_data.base.nr_params = DIV_ROUND_UP(nir->num_uniforms, 4);
|
||||
|
||||
struct brw_compile_cs_params params = {
|
||||
.base = {
|
||||
.nir = nir,
|
||||
.stats = kernel->stats,
|
||||
.log_data = log_data,
|
||||
.mem_ctx = mem_ctx,
|
||||
},
|
||||
.key = &key,
|
||||
.prog_data = &kernel->prog_data,
|
||||
};
|
||||
|
||||
kernel->code = brw_compile_cs(compiler, ¶ms);
|
||||
|
||||
if (error_str)
|
||||
*error_str = params.base.error_str;
|
||||
|
||||
return kernel->code != NULL;
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
rebuild_value_from_store(struct util_dynarray *stores,
|
||||
nir_def *value, unsigned read_offset)
|
||||
{
|
||||
unsigned read_size = value->num_components * value->bit_size / 8;
|
||||
|
||||
util_dynarray_foreach(stores, nir_intrinsic_instr *, _store) {
|
||||
nir_intrinsic_instr *store = *_store;
|
||||
|
||||
unsigned write_offset = nir_src_as_uint(store->src[1]);
|
||||
unsigned write_size = nir_src_num_components(store->src[0]) *
|
||||
nir_src_bit_size(store->src[0]) / 8;
|
||||
if (write_offset <= read_offset &&
|
||||
(write_offset + write_size) >= (read_offset + read_size)) {
|
||||
assert(nir_block_dominates(store->instr.block, value->parent_instr->block));
|
||||
assert(write_size == read_size);
|
||||
return store->src[0].ssa;
|
||||
}
|
||||
}
|
||||
unreachable("Matching scratch store not found");
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove temporary variables stored to scratch to be then reloaded
|
||||
* immediately. Remap the load to the store SSA value.
|
||||
*
|
||||
* This workaround is only meant to be applied to shaders in src/intel/shaders
|
||||
* were we know there should be no issue. More complex cases might not work
|
||||
* with this approach.
|
||||
*/
|
||||
static bool
|
||||
nir_remove_llvm17_scratch(nir_shader *nir)
|
||||
{
|
||||
struct util_dynarray scratch_stores;
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
util_dynarray_init(&scratch_stores, mem_ctx);
|
||||
|
||||
nir_foreach_function_impl(func, nir) {
|
||||
nir_foreach_block(block, func) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (intrin->intrinsic != nir_intrinsic_store_scratch)
|
||||
continue;
|
||||
|
||||
nir_const_value *offset = nir_src_as_const_value(intrin->src[1]);
|
||||
if (offset != NULL) {
|
||||
util_dynarray_append(&scratch_stores, nir_intrinsic_instr *, intrin);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool progress = false;
|
||||
if (util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) > 0) {
|
||||
nir_foreach_function_impl(func, nir) {
|
||||
nir_foreach_block(block, func) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (intrin->intrinsic != nir_intrinsic_load_scratch)
|
||||
continue;
|
||||
|
||||
nir_const_value *offset = nir_src_as_const_value(intrin->src[0]);
|
||||
if (offset == NULL)
|
||||
continue;
|
||||
|
||||
nir_def_rewrite_uses(&intrin->def,
|
||||
rebuild_value_from_store(
|
||||
&scratch_stores, &intrin->def,
|
||||
nir_src_as_uint(intrin->src[0])));
|
||||
nir_instr_remove(instr);
|
||||
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
util_dynarray_foreach(&scratch_stores, nir_intrinsic_instr *, _store) {
|
||||
nir_intrinsic_instr *store = *_store;
|
||||
nir_instr_remove(&store->instr);
|
||||
}
|
||||
|
||||
/* Quick sanity check */
|
||||
assert(util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) == 0 ||
|
||||
progress);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static void
|
||||
cleanup_llvm17_scratch(nir_shader *nir)
|
||||
{
|
||||
{
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
NIR_PASS(progress, nir, nir_opt_algebraic);
|
||||
} while (progress);
|
||||
}
|
||||
|
||||
nir_remove_llvm17_scratch(nir);
|
||||
|
||||
{
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
NIR_PASS(progress, nir, nir_opt_algebraic);
|
||||
} while (progress);
|
||||
}
|
||||
}
|
||||
|
||||
nir_shader *
|
||||
brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size,
|
||||
bool llvm17_wa)
|
||||
{
|
||||
struct spirv_to_nir_options spirv_options = {
|
||||
.environment = NIR_SPIRV_OPENCL,
|
||||
.caps = {
|
||||
.address = true,
|
||||
.groups = true,
|
||||
.image_write_without_format = true,
|
||||
.int8 = true,
|
||||
.int16 = true,
|
||||
.int64 = true,
|
||||
.int64_atomics = true,
|
||||
.kernel = true,
|
||||
.linkage = true, /* We receive linked kernel from clc */
|
||||
.float_controls = true,
|
||||
.generic_pointers = true,
|
||||
.storage_8bit = true,
|
||||
.storage_16bit = true,
|
||||
.subgroup_arithmetic = true,
|
||||
.subgroup_basic = true,
|
||||
.subgroup_ballot = true,
|
||||
.subgroup_dispatch = true,
|
||||
.subgroup_quad = true,
|
||||
.subgroup_shuffle = true,
|
||||
.subgroup_vote = true,
|
||||
|
||||
.intel_subgroup_shuffle = true,
|
||||
.intel_subgroup_buffer_block_io = true,
|
||||
},
|
||||
.shared_addr_format = nir_address_format_62bit_generic,
|
||||
.global_addr_format = nir_address_format_62bit_generic,
|
||||
.temp_addr_format = nir_address_format_62bit_generic,
|
||||
.constant_addr_format = nir_address_format_64bit_global,
|
||||
.create_library = true,
|
||||
};
|
||||
|
||||
assert(spirv_size % 4 == 0);
|
||||
nir_shader *nir =
|
||||
spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
|
||||
"library", &spirv_options, &brw_scalar_nir_options);
|
||||
nir_validate_shader(nir, "after spirv_to_nir");
|
||||
nir_validate_ssa_dominance(nir, "after spirv_to_nir");
|
||||
ralloc_steal(mem_ctx, nir);
|
||||
nir->info.name = ralloc_strdup(nir, "library");
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_CS)) {
|
||||
/* Re-index SSA defs so we print more sensible numbers. */
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
nir_index_ssa_defs(impl);
|
||||
}
|
||||
|
||||
fprintf(stderr, "NIR (from SPIR-V) for kernel\n");
|
||||
nir_print_shader(nir, stderr);
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, implement_intel_builtins);
|
||||
NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader);
|
||||
|
||||
/* We have to lower away local constant initializers right before we
|
||||
* inline functions. That way they get properly initialized at the top
|
||||
* of the function and not at the top of its caller.
|
||||
*/
|
||||
NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_shader_temp |
|
||||
nir_var_function_temp));
|
||||
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
|
||||
nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
|
||||
{
|
||||
bool progress;
|
||||
do
|
||||
{
|
||||
progress = false;
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(progress, nir, nir_opt_deref);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_opt_undef);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(progress, nir, nir_opt_algebraic);
|
||||
} while (progress);
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
|
||||
NIR_PASS_V(nir, nir_lower_returns);
|
||||
NIR_PASS_V(nir, nir_inline_functions);
|
||||
|
||||
assert(nir->scratch_size == 0);
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);
|
||||
|
||||
{
|
||||
bool progress;
|
||||
do
|
||||
{
|
||||
progress = false;
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(progress, nir, nir_opt_deref);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_opt_undef);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
NIR_PASS(progress, nir, nir_split_var_copies);
|
||||
NIR_PASS(progress, nir, nir_lower_var_copies);
|
||||
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(progress, nir, nir_opt_algebraic);
|
||||
NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
|
||||
NIR_PASS(progress, nir, nir_opt_dead_cf);
|
||||
NIR_PASS(progress, nir, nir_opt_remove_phis);
|
||||
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
|
||||
NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);
|
||||
NIR_PASS(progress, nir, nir_opt_memcpy);
|
||||
} while (progress);
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, nir_scale_fdiv);
|
||||
|
||||
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
|
||||
nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
|
||||
|
||||
|
||||
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL);
|
||||
|
||||
nir->scratch_size = 0;
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
|
||||
nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp |
|
||||
nir_var_mem_global | nir_var_mem_constant,
|
||||
glsl_get_cl_type_size_align);
|
||||
|
||||
// Lower memcpy - needs to wait until types are sized
|
||||
{
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
NIR_PASS(progress, nir, nir_opt_memcpy);
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(progress, nir, nir_opt_deref);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_split_var_copies);
|
||||
NIR_PASS(progress, nir, nir_lower_var_copies);
|
||||
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
} while (progress);
|
||||
}
|
||||
NIR_PASS_V(nir, nir_lower_memcpy);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io,
|
||||
nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp | nir_var_uniform,
|
||||
nir_address_format_32bit_offset_as_64bit);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_system_values);
|
||||
|
||||
/* Hopefully we can drop this once lower_vars_to_ssa has improved to not
|
||||
* lower everything to scratch.
|
||||
*/
|
||||
if (llvm17_wa)
|
||||
cleanup_llvm17_scratch(nir);
|
||||
|
||||
/* Lower again, this time after dead-variables to get more compact variable
|
||||
* layouts.
|
||||
*/
|
||||
nir->global_mem_size = 0;
|
||||
nir->scratch_size = 0;
|
||||
nir->info.shared_size = 0;
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
|
||||
nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant,
|
||||
glsl_get_cl_type_size_align);
|
||||
if (nir->constant_data_size > 0) {
|
||||
assert(nir->constant_data == NULL);
|
||||
nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
|
||||
nir_gather_explicit_io_initializers(nir, nir->constant_data,
|
||||
nir->constant_data_size,
|
||||
nir_var_mem_constant);
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
|
||||
nir_address_format_64bit_global);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
|
||||
nir_address_format_32bit_offset_as_64bit);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io,
|
||||
nir_var_shader_temp | nir_var_function_temp |
|
||||
nir_var_mem_shared | nir_var_mem_global,
|
||||
nir_address_format_62bit_generic);
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_CS)) {
|
||||
/* Re-index SSA defs so we print more sensible numbers. */
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
nir_index_ssa_defs(impl);
|
||||
}
|
||||
|
||||
fprintf(stderr, "NIR (before I/O lowering) for kernel\n");
|
||||
nir_print_shader(nir, stderr);
|
||||
}
|
||||
|
||||
return nir;
|
||||
}
|
||||
|
|
@ -1,78 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2020 Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_KERNEL_H
|
||||
#define BRW_KERNEL_H
|
||||
|
||||
#include "brw_compiler.h"
|
||||
|
||||
struct disk_cache;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** Software interface for system values in kernels
|
||||
*
|
||||
* These are intended to go at the start of the kernel argument buffer.
|
||||
*/
|
||||
struct brw_kernel_sysvals {
|
||||
uint32_t num_work_groups[3];
|
||||
uint32_t pad[5];
|
||||
};
|
||||
|
||||
struct brw_kernel_arg_desc {
|
||||
uint16_t offset;
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
struct brw_kernel {
|
||||
struct brw_cs_prog_data prog_data;
|
||||
|
||||
struct brw_compile_stats stats[3];
|
||||
|
||||
uint16_t args_size;
|
||||
uint16_t arg_count;
|
||||
const struct brw_kernel_arg_desc *args;
|
||||
|
||||
const void *code;
|
||||
};
|
||||
|
||||
bool
|
||||
brw_kernel_from_spirv(struct brw_compiler *compiler,
|
||||
struct disk_cache *disk_cache,
|
||||
struct brw_kernel *kernel,
|
||||
void *log_data, void *mem_ctx,
|
||||
const uint32_t *spirv, size_t spirv_size,
|
||||
const char *entrypoint_name,
|
||||
char **error_str);
|
||||
|
||||
nir_shader *
|
||||
brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size,
|
||||
bool llvm17_wa);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BRW_KERNEL_H */
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -23,7 +23,6 @@
|
|||
|
||||
#include "intel_nir.h"
|
||||
#include "brw_nir.h"
|
||||
#include "brw_nir_rt.h"
|
||||
#include "brw_shader.h"
|
||||
#include "dev/intel_debug.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
|
|
@ -1770,15 +1769,6 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
|
|||
|
||||
OPT(nir_opt_dce);
|
||||
|
||||
/* The mesh stages require this pass to be called at the last minute,
|
||||
* but if anything is done by it, it will also constant fold, and that
|
||||
* undoes the work done by nir_trivialize_registers, so call it right
|
||||
* before that one instead.
|
||||
*/
|
||||
if (nir->info.stage == MESA_SHADER_MESH ||
|
||||
nir->info.stage == MESA_SHADER_TASK)
|
||||
brw_nir_adjust_payload(nir);
|
||||
|
||||
nir_trivialize_registers(nir);
|
||||
|
||||
/* This is the last pass we run before we start emitting stuff. It
|
||||
|
|
|
|||
|
|
@ -1,818 +0,0 @@
|
|||
/*
|
||||
* Copyright 2023 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file brw_nir_lower_cooperative_matrix.c
|
||||
* Lower cooperative matrix to subgroup operations.
|
||||
*
|
||||
* All supported matrix types are assumed to have either 8 rows or 8
|
||||
* columns. The other dimension of the matrix is typically 8 times the number
|
||||
* of data elements that can be stored in a 32-bit dword. Matrix data is
|
||||
* indexed by a combination of an array element and a subgroup invocation ID.
|
||||
*
|
||||
* Two layouts for matrix data are used. In the first layout,
|
||||
* subgroupShuffle(slice[N], ...) accesses row N of the matrix. This will be
|
||||
* called row-major hereafter. In the other layout,
|
||||
* subgroupShuffle(slice[...], M) accesses column M of the matrix. This will
|
||||
* be called column-major hereafter. In cases where a single 32-bit value is
|
||||
* stored in each entry, these layouts are identical.
|
||||
*
|
||||
* The subtle difference arises when multiple values are packed into a single
|
||||
* 32-bit dword. If two 16-bit values are packed in a single 32-bit value in
|
||||
* column-major, subgroupShuffle(slice[0], 1) holds matrix entries m[1][1] and
|
||||
* m[2][1] (in m[row][column] notation). In row-major, that same shuffle holds
|
||||
* m[0][2] and m[0][3].
|
||||
*
|
||||
* There is an alternate way to think about the matrix layouts. Every matrix
|
||||
* size supported by the Intel driver is either Sx8 (e.g., 16x8 for float16 B
|
||||
* matrix) or Sx8T (e.g., 8x32 for int8 A matrix). The A matrix and B matrix
|
||||
* layouts are such that a single 8 dword register hold an entire row of the
|
||||
* matrix.
|
||||
*
|
||||
* Consider a matrix stored starting in register g32. In an A matrix, the
|
||||
* packed dwords of g32 contain only the data for a single row of the
|
||||
* matrix. g32 is row 0, g33 is row 1, etc. In a B matrix, the packed dwords
|
||||
* of g(32+N).X contain only the data for a single column of the
|
||||
* matrix. g[32:40].0 is column 0, g[32:40].1 is column 1, etc.
|
||||
*
|
||||
* This leads to some shenanigans in \c lower_cmat_load_store.
|
||||
*
|
||||
* In the common case, A, C, and result matrices are stored row major while B
|
||||
* matrices are stored column major. This arrangement facilitates efficient
|
||||
* dot product operations using DPAS or DP4A instructions.
|
||||
*
|
||||
* Future optimizations are possible when row and column major are
|
||||
* flipped. That is, efficient dot products are also possible when A, C, and
|
||||
* result matrices are column major while B is row major.
|
||||
*/
|
||||
|
||||
#include "brw_nir.h"
|
||||
|
||||
struct lower_cmat_state {
|
||||
nir_shader *shader;
|
||||
|
||||
struct hash_table *slice_coop_types;
|
||||
|
||||
struct hash_table *vars_to_slice;
|
||||
|
||||
unsigned subgroup_size;
|
||||
};
|
||||
|
||||
static void
|
||||
print_coop_types(struct lower_cmat_state *state)
|
||||
{
|
||||
fprintf(stderr, "--- Slices to Cooperative Matrix type table\n");
|
||||
hash_table_foreach(state->slice_coop_types, e) {
|
||||
nir_variable *var = (void *)e->key;
|
||||
const struct glsl_type *t = e->data;
|
||||
fprintf(stderr, "%p: %s -> %s\n", var, var->name, glsl_get_type_name(t));
|
||||
}
|
||||
fprintf(stderr, "\n\n");
|
||||
}
|
||||
|
||||
static const struct glsl_type *
|
||||
get_coop_type_for_slice(struct lower_cmat_state *state, nir_deref_instr *deref)
|
||||
{
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
struct hash_entry *entry = _mesa_hash_table_search(state->slice_coop_types, var);
|
||||
|
||||
assert(entry != NULL);
|
||||
|
||||
return entry->data;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_cmat_filter(const nir_instr *instr, const void *_state)
|
||||
{
|
||||
if (instr->type == nir_instr_type_deref) {
|
||||
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
||||
return glsl_type_is_cmat(deref->type);
|
||||
}
|
||||
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_cmat_construct:
|
||||
case nir_intrinsic_cmat_load:
|
||||
case nir_intrinsic_cmat_store:
|
||||
case nir_intrinsic_cmat_length:
|
||||
case nir_intrinsic_cmat_muladd:
|
||||
case nir_intrinsic_cmat_unary_op:
|
||||
case nir_intrinsic_cmat_binary_op:
|
||||
case nir_intrinsic_cmat_scalar_op:
|
||||
case nir_intrinsic_cmat_bitcast:
|
||||
case nir_intrinsic_cmat_insert:
|
||||
case nir_intrinsic_cmat_extract:
|
||||
case nir_intrinsic_cmat_copy:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get number of matrix elements packed in each component of the slice.
|
||||
*/
|
||||
static unsigned
|
||||
get_packing_factor(const struct glsl_cmat_description desc,
|
||||
const struct glsl_type *slice_type)
|
||||
{
|
||||
const struct glsl_type *slice_element_type = glsl_without_array(slice_type);
|
||||
|
||||
assert(!glsl_type_is_cmat(slice_type));
|
||||
|
||||
assert(glsl_get_bit_size(slice_element_type) >= glsl_base_type_get_bit_size(desc.element_type));
|
||||
assert(glsl_get_bit_size(slice_element_type) % glsl_base_type_get_bit_size(desc.element_type) == 0);
|
||||
|
||||
return glsl_get_bit_size(slice_element_type) / glsl_base_type_get_bit_size(desc.element_type);
|
||||
}
|
||||
|
||||
static const struct glsl_type *
|
||||
get_slice_type_from_desc(const struct lower_cmat_state *state,
|
||||
const struct glsl_cmat_description desc)
|
||||
{
|
||||
enum glsl_base_type base_type;
|
||||
|
||||
/* Number of matrix elements stored by each subgroup invocation. If the
|
||||
* data is packed, the slice size will be less than this.
|
||||
*/
|
||||
const unsigned elements_per_invocation =
|
||||
(desc.rows * desc.cols) / state->subgroup_size;
|
||||
|
||||
assert(elements_per_invocation > 0);
|
||||
|
||||
const unsigned element_bits = 32;
|
||||
const unsigned bits = glsl_base_type_get_bit_size(desc.element_type);
|
||||
unsigned packing_factor = MIN2(elements_per_invocation,
|
||||
element_bits / bits);
|
||||
|
||||
/* Adjust the packing factor so that each row of the matrix fills and
|
||||
* entire GRF.
|
||||
*
|
||||
* The in-register layout of B matrices is different, so those are handled
|
||||
* more like column major (for row major matrices). See the file comment
|
||||
* for more details.
|
||||
*/
|
||||
const unsigned actual_cols = desc.use != GLSL_CMAT_USE_B ? desc.cols : desc.rows;
|
||||
while ((actual_cols / packing_factor) < 8) {
|
||||
assert(packing_factor > 1);
|
||||
packing_factor /= 2;
|
||||
}
|
||||
|
||||
switch (desc.element_type) {
|
||||
case GLSL_TYPE_FLOAT:
|
||||
base_type = GLSL_TYPE_FLOAT;
|
||||
break;
|
||||
case GLSL_TYPE_UINT:
|
||||
case GLSL_TYPE_FLOAT16:
|
||||
case GLSL_TYPE_UINT8:
|
||||
case GLSL_TYPE_UINT16:
|
||||
base_type = glsl_get_base_type(glsl_uintN_t_type(packing_factor * bits));
|
||||
break;
|
||||
case GLSL_TYPE_INT:
|
||||
case GLSL_TYPE_INT8:
|
||||
case GLSL_TYPE_INT16:
|
||||
base_type = glsl_get_base_type(glsl_intN_t_type(packing_factor * bits));
|
||||
break;
|
||||
default:
|
||||
unreachable("Invalid cooperative matrix element type.");
|
||||
}
|
||||
|
||||
unsigned len = elements_per_invocation / packing_factor;
|
||||
|
||||
/* Supported matrix sizes are designed to fill either 4 or 8 SIMD8
|
||||
* registers. That means:
|
||||
*
|
||||
* 4 regsiters 8 registers
|
||||
* SIMD32 len = 1 len = 2
|
||||
* SIMD16 len = 2 len = 4
|
||||
* SIMD8 len = 4 len = 8
|
||||
*
|
||||
* If configurations are added that result in other values of len, at the
|
||||
* very least this assertion will need to be updated. The only value of len
|
||||
* that makes sense to add would be 16, and that would be a lot of
|
||||
* registers.
|
||||
*/
|
||||
assert(len == 1 || len == 2 || len == 4 || len == 8);
|
||||
|
||||
const struct glsl_type *slice_type = glsl_vector_type(base_type, len);
|
||||
|
||||
assert(packing_factor == get_packing_factor(desc, slice_type));
|
||||
|
||||
return slice_type;
|
||||
}
|
||||
|
||||
static const struct glsl_type *
|
||||
get_slice_type(const struct lower_cmat_state *state,
|
||||
const struct glsl_type *type)
|
||||
{
|
||||
if (glsl_type_is_array(type)) {
|
||||
const struct glsl_type *slice_type =
|
||||
get_slice_type(state, glsl_get_array_element(type));
|
||||
|
||||
return glsl_array_type(slice_type, glsl_array_size(type), 0);
|
||||
}
|
||||
|
||||
assert(glsl_type_is_cmat(type));
|
||||
|
||||
return get_slice_type_from_desc(state,
|
||||
*glsl_get_cmat_description(type));
|
||||
}
|
||||
|
||||
static nir_deref_instr *
|
||||
create_local_slice(struct lower_cmat_state *state, nir_builder *b,
|
||||
const struct glsl_type *mat_type, const char *name)
|
||||
{
|
||||
const struct glsl_type *slice_type = get_slice_type(state, mat_type);
|
||||
nir_variable *slice_var = nir_local_variable_create(b->impl, slice_type, name);
|
||||
_mesa_hash_table_insert(state->slice_coop_types, slice_var, (void *)mat_type);
|
||||
return nir_build_deref_var(b, slice_var);
|
||||
}
|
||||
|
||||
static void
|
||||
lower_cmat_load_store(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
struct lower_cmat_state *state)
|
||||
{
|
||||
const bool load = intrin->intrinsic == nir_intrinsic_cmat_load;
|
||||
const unsigned mat_src = load ? 0 : 1;
|
||||
const unsigned ptr_src = load ? 1 : 0;
|
||||
|
||||
nir_deref_instr *slice = nir_src_as_deref(intrin->src[mat_src]);
|
||||
const struct glsl_type *mat_type = get_coop_type_for_slice(state, slice);
|
||||
const struct glsl_cmat_description *desc = glsl_get_cmat_description(mat_type);
|
||||
|
||||
nir_def *results[NIR_MAX_VEC_COMPONENTS];
|
||||
const unsigned num_components = glsl_get_vector_elements(slice->type);
|
||||
const unsigned packing_factor = get_packing_factor(*desc, slice->type);
|
||||
|
||||
nir_deref_instr *pointer = nir_src_as_deref(intrin->src[ptr_src]);
|
||||
|
||||
if ((nir_intrinsic_matrix_layout(intrin) == GLSL_MATRIX_LAYOUT_ROW_MAJOR) ==
|
||||
(desc->use != GLSL_CMAT_USE_B)) {
|
||||
nir_def *stride = nir_udiv_imm(b, intrin->src[2].ssa, packing_factor);
|
||||
|
||||
const struct glsl_type *element_type =
|
||||
glsl_scalar_type(glsl_get_base_type(slice->type));
|
||||
|
||||
pointer = nir_build_deref_cast(b, &pointer->def, pointer->modes,
|
||||
element_type,
|
||||
glsl_get_bit_size(element_type) / 8);
|
||||
|
||||
nir_def *invocation = nir_load_subgroup_invocation(b);
|
||||
nir_def *base_offset;
|
||||
nir_def *step;
|
||||
|
||||
if (desc->use != GLSL_CMAT_USE_B) {
|
||||
base_offset = nir_iadd(b,
|
||||
nir_imul(b,
|
||||
nir_udiv_imm(b, invocation, 8),
|
||||
stride),
|
||||
nir_umod_imm(b, invocation, 8));
|
||||
|
||||
step = nir_imul_imm(b, stride, state->subgroup_size / 8);
|
||||
} else {
|
||||
base_offset = nir_iadd(b,
|
||||
nir_imul(b,
|
||||
nir_umod_imm(b, invocation, 8),
|
||||
stride),
|
||||
nir_udiv_imm(b, invocation, 8));
|
||||
|
||||
step = nir_imm_int(b, state->subgroup_size / 8);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
nir_def *offset = nir_imul_imm(b, step, i);
|
||||
|
||||
nir_deref_instr *memory_deref =
|
||||
nir_build_deref_ptr_as_array(b, pointer,
|
||||
nir_i2iN(b,
|
||||
nir_iadd(b,
|
||||
base_offset,
|
||||
offset),
|
||||
pointer->def.bit_size));
|
||||
|
||||
if (load) {
|
||||
results[i] = nir_load_deref(b, memory_deref);
|
||||
} else {
|
||||
nir_def *src = nir_channel(b, nir_load_deref(b, slice), i);
|
||||
nir_store_deref(b, memory_deref, src, 0x1);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
nir_def *stride = intrin->src[2].ssa;
|
||||
|
||||
const struct glsl_type *element_type = glsl_scalar_type(desc->element_type);
|
||||
const unsigned element_bits = glsl_base_type_get_bit_size(desc->element_type);
|
||||
const unsigned element_stride = element_bits / 8;
|
||||
|
||||
pointer = nir_build_deref_cast(b, &pointer->def, pointer->modes, element_type,
|
||||
element_stride);
|
||||
|
||||
nir_def *invocation_div_8 = nir_udiv_imm(b, nir_load_subgroup_invocation(b), 8);
|
||||
nir_def *invocation_mod_8 = nir_umod_imm(b, nir_load_subgroup_invocation(b), 8);
|
||||
|
||||
nir_def *packed_stride = nir_imul_imm(b, stride, packing_factor);
|
||||
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
const unsigned i_offset = i * (state->subgroup_size / 8);
|
||||
nir_def *v[4];
|
||||
|
||||
for (unsigned j = 0; j < packing_factor; j++) {
|
||||
nir_def *j_offset = nir_imul_imm(b, stride, j);
|
||||
nir_def *offset;
|
||||
|
||||
if (desc->use != GLSL_CMAT_USE_B) {
|
||||
offset = nir_iadd(b,
|
||||
nir_iadd(b,
|
||||
nir_imul(b,
|
||||
invocation_mod_8,
|
||||
packed_stride),
|
||||
invocation_div_8),
|
||||
nir_iadd_imm(b, j_offset, i_offset));
|
||||
} else {
|
||||
offset = nir_iadd(b,
|
||||
nir_iadd(b,
|
||||
nir_imul(b,
|
||||
invocation_div_8,
|
||||
packed_stride),
|
||||
invocation_mod_8),
|
||||
nir_iadd(b,
|
||||
nir_imul_imm(b,
|
||||
packed_stride,
|
||||
i_offset),
|
||||
j_offset));
|
||||
}
|
||||
|
||||
nir_deref_instr *memory_deref =
|
||||
nir_build_deref_ptr_as_array(b, pointer,
|
||||
nir_i2iN(b,
|
||||
offset,
|
||||
pointer->def.bit_size));
|
||||
|
||||
if (load) {
|
||||
v[j] = nir_load_deref(b, memory_deref);
|
||||
} else {
|
||||
nir_def *src = nir_channel(b, nir_load_deref(b, slice), i);
|
||||
|
||||
nir_def *v =
|
||||
nir_channel(b, nir_unpack_bits(b, src, element_bits), j);
|
||||
|
||||
nir_store_deref(b, memory_deref, v, 0x1);
|
||||
}
|
||||
}
|
||||
|
||||
if (load) {
|
||||
results[i] = nir_pack_bits(b, nir_vec(b, v, packing_factor),
|
||||
packing_factor * element_bits);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (load)
|
||||
nir_store_deref(b, slice, nir_vec(b, results, num_components),
|
||||
nir_component_mask(num_components));
|
||||
}
|
||||
|
||||
static void
|
||||
lower_cmat_unary_op(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
struct lower_cmat_state *state)
|
||||
{
|
||||
nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
|
||||
nir_deref_instr *src_slice = nir_src_as_deref(intrin->src[1]);
|
||||
nir_def *results[NIR_MAX_VEC_COMPONENTS];
|
||||
const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
|
||||
|
||||
const struct glsl_type *dst_mat_type =
|
||||
get_coop_type_for_slice(state, dst_slice);
|
||||
const struct glsl_type *src_mat_type =
|
||||
get_coop_type_for_slice(state, src_slice);
|
||||
|
||||
const struct glsl_cmat_description dst_desc =
|
||||
*glsl_get_cmat_description(dst_mat_type);
|
||||
|
||||
const struct glsl_cmat_description src_desc =
|
||||
*glsl_get_cmat_description(src_mat_type);
|
||||
|
||||
const unsigned dst_bits = glsl_base_type_bit_size(dst_desc.element_type);
|
||||
const unsigned src_bits = glsl_base_type_bit_size(src_desc.element_type);
|
||||
|
||||
/* The type of the returned slice may be different from the type of the
|
||||
* input slice.
|
||||
*/
|
||||
const unsigned dst_packing_factor =
|
||||
get_packing_factor(dst_desc, dst_slice->type);
|
||||
|
||||
const unsigned src_packing_factor =
|
||||
get_packing_factor(src_desc, src_slice->type);
|
||||
|
||||
const nir_op op = nir_intrinsic_alu_op(intrin);
|
||||
|
||||
/* There are three possible cases:
|
||||
*
|
||||
* 1. dst_packing_factor == src_packing_factor. This is the common case,
|
||||
* and handling it is straightforward.
|
||||
*
|
||||
* 2. dst_packing_factor > src_packing_factor. This occurs when converting a
|
||||
* float32_t matrix slice to a packed float16_t slice. Loop over the size
|
||||
* of the destination slice, but read multiple entries from the source
|
||||
* slice on each iteration.
|
||||
*
|
||||
* 3. dst_packing_factor < src_packing_factor. This occurs when converting a
|
||||
* packed int8_t matrix slice to an int32_t slice. Loop over the size of
|
||||
* the source slice, but write multiple entries to the destination slice
|
||||
* on each iteration.
|
||||
*
|
||||
* Handle all cases by iterating over the total (non-packed) number of
|
||||
* elements in the slice. When dst_packing_factor values have been
|
||||
* calculated, store them.
|
||||
*/
|
||||
assert((dst_packing_factor * glsl_get_vector_elements(dst_slice->type)) ==
|
||||
(src_packing_factor * glsl_get_vector_elements(src_slice->type)));
|
||||
|
||||
/* Stores at most dst_packing_factor partial results. */
|
||||
nir_def *v[4];
|
||||
assert(dst_packing_factor <= 4);
|
||||
|
||||
for (unsigned i = 0; i < num_components * dst_packing_factor; i++) {
|
||||
const unsigned dst_chan_index = i % dst_packing_factor;
|
||||
const unsigned src_chan_index = i % src_packing_factor;
|
||||
const unsigned dst_index = i / dst_packing_factor;
|
||||
const unsigned src_index = i / src_packing_factor;
|
||||
|
||||
nir_def *src =
|
||||
nir_channel(b,
|
||||
nir_unpack_bits(b,
|
||||
nir_channel(b,
|
||||
nir_load_deref(b, src_slice),
|
||||
src_index),
|
||||
src_bits),
|
||||
src_chan_index);
|
||||
|
||||
v[dst_chan_index] = nir_build_alu1(b, op, src);
|
||||
|
||||
if (dst_chan_index == (dst_packing_factor - 1)) {
|
||||
results[dst_index] =
|
||||
nir_pack_bits(b, nir_vec(b, v, dst_packing_factor),
|
||||
dst_packing_factor * dst_bits);
|
||||
}
|
||||
}
|
||||
|
||||
nir_store_deref(b, dst_slice, nir_vec(b, results, num_components),
|
||||
nir_component_mask(num_components));
|
||||
}
|
||||
|
||||
static void
|
||||
lower_cmat_binary_op(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
struct lower_cmat_state *state)
|
||||
{
|
||||
nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
|
||||
nir_deref_instr *src_a_slice = nir_src_as_deref(intrin->src[1]);
|
||||
nir_deref_instr *src_b_slice = nir_src_as_deref(intrin->src[2]);
|
||||
|
||||
nir_def *src_a = nir_load_deref(b, src_a_slice);
|
||||
nir_def *src_b = nir_load_deref(b, src_b_slice);
|
||||
nir_def *results[NIR_MAX_VEC_COMPONENTS];
|
||||
const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
|
||||
|
||||
const struct glsl_type *dst_mat_type = get_coop_type_for_slice(state, dst_slice);
|
||||
ASSERTED const struct glsl_type *src_a_mat_type = get_coop_type_for_slice(state, src_a_slice);
|
||||
ASSERTED const struct glsl_type *src_b_mat_type = get_coop_type_for_slice(state, src_b_slice);
|
||||
|
||||
const struct glsl_cmat_description desc =
|
||||
*glsl_get_cmat_description(dst_mat_type);
|
||||
|
||||
assert(dst_mat_type == src_a_mat_type);
|
||||
assert(dst_mat_type == src_b_mat_type);
|
||||
|
||||
const unsigned bits = glsl_base_type_bit_size(desc.element_type);
|
||||
const unsigned packing_factor = get_packing_factor(desc, dst_slice->type);
|
||||
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
nir_def *val_a = nir_channel(b, src_a, i);
|
||||
nir_def *val_b = nir_channel(b, src_b, i);
|
||||
|
||||
results[i] =
|
||||
nir_pack_bits(b, nir_build_alu2(b, nir_intrinsic_alu_op(intrin),
|
||||
nir_unpack_bits(b, val_a, bits),
|
||||
nir_unpack_bits(b, val_b, bits)),
|
||||
packing_factor * bits);
|
||||
}
|
||||
|
||||
nir_store_deref(b, dst_slice, nir_vec(b, results, num_components),
|
||||
nir_component_mask(num_components));
|
||||
}
|
||||
|
||||
static void
|
||||
lower_cmat_scalar_op(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
struct lower_cmat_state *state)
|
||||
{
|
||||
nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
|
||||
nir_deref_instr *src_slice = nir_src_as_deref(intrin->src[1]);
|
||||
nir_def *scalar = intrin->src[2].ssa;
|
||||
|
||||
nir_def *src = nir_load_deref(b, src_slice);
|
||||
nir_def *results[NIR_MAX_VEC_COMPONENTS];
|
||||
const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
|
||||
|
||||
ASSERTED const struct glsl_type *dst_mat_type = get_coop_type_for_slice(state, dst_slice);
|
||||
ASSERTED const struct glsl_type *src_mat_type = get_coop_type_for_slice(state, src_slice);
|
||||
assert(dst_mat_type == src_mat_type);
|
||||
|
||||
const struct glsl_cmat_description desc =
|
||||
*glsl_get_cmat_description(dst_mat_type);
|
||||
|
||||
const unsigned bits = glsl_base_type_bit_size(desc.element_type);
|
||||
const unsigned packing_factor = get_packing_factor(desc, dst_slice->type);
|
||||
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
nir_def *val = nir_channel(b, src, i);
|
||||
|
||||
results[i] =
|
||||
nir_pack_bits(b, nir_build_alu2(b, nir_intrinsic_alu_op(intrin),
|
||||
nir_unpack_bits(b, val, bits),
|
||||
scalar),
|
||||
packing_factor * bits);
|
||||
}
|
||||
|
||||
nir_store_deref(b, dst_slice, nir_vec(b, results, num_components),
|
||||
nir_component_mask(num_components));
|
||||
}
|
||||
|
||||
static nir_deref_instr *
|
||||
lower_cmat_deref(nir_builder *b, nir_deref_instr *deref,
|
||||
struct lower_cmat_state *state)
|
||||
{
|
||||
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
||||
if (parent) {
|
||||
assert(deref->deref_type == nir_deref_type_array);
|
||||
parent = lower_cmat_deref(b, parent, state);
|
||||
return nir_build_deref_array(b, parent, deref->arr.index.ssa);
|
||||
} else {
|
||||
assert(deref->deref_type == nir_deref_type_var);
|
||||
assert(deref->var);
|
||||
assert(glsl_type_is_cmat(glsl_without_array(deref->var->type)));
|
||||
|
||||
struct hash_entry *entry = _mesa_hash_table_search(state->vars_to_slice, deref->var);
|
||||
assert(entry);
|
||||
return nir_build_deref_var(b, (nir_variable *)entry->data);
|
||||
}
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
lower_cmat_instr(nir_builder *b, nir_instr *instr, void *_state)
|
||||
{
|
||||
struct lower_cmat_state *state = _state;
|
||||
|
||||
if (instr->type == nir_instr_type_deref) {
|
||||
nir_deref_instr *deref = lower_cmat_deref(b, nir_instr_as_deref(instr), state);
|
||||
return &deref->def;
|
||||
}
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_cmat_load:
|
||||
case nir_intrinsic_cmat_store:
|
||||
lower_cmat_load_store(b, intrin, state);
|
||||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
|
||||
case nir_intrinsic_cmat_construct: {
|
||||
nir_deref_instr *slice = nir_src_as_deref(intrin->src[0]);
|
||||
nir_def *src = intrin->src[1].ssa;
|
||||
|
||||
const struct glsl_type *mat_type = get_coop_type_for_slice(state, slice);
|
||||
const struct glsl_cmat_description desc =
|
||||
*glsl_get_cmat_description(mat_type);
|
||||
const unsigned packing_factor = get_packing_factor(desc, slice->type);
|
||||
|
||||
if (packing_factor > 1) {
|
||||
src = nir_pack_bits(b, nir_replicate(b, src, packing_factor),
|
||||
packing_factor * glsl_base_type_get_bit_size(desc.element_type));
|
||||
}
|
||||
|
||||
const unsigned num_components = glsl_get_vector_elements(slice->type);
|
||||
|
||||
nir_store_deref(b, slice, nir_replicate(b, src, num_components),
|
||||
nir_component_mask(num_components));
|
||||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
}
|
||||
|
||||
case nir_intrinsic_cmat_unary_op:
|
||||
lower_cmat_unary_op(b, intrin, state);
|
||||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
|
||||
case nir_intrinsic_cmat_binary_op:
|
||||
lower_cmat_binary_op(b, intrin, state);
|
||||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
|
||||
case nir_intrinsic_cmat_scalar_op:
|
||||
lower_cmat_scalar_op(b, intrin, state);
|
||||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
|
||||
case nir_intrinsic_cmat_length: {
|
||||
const struct glsl_cmat_description desc = nir_intrinsic_cmat_desc(intrin);
|
||||
const struct glsl_type *mat_type = glsl_cmat_type(&desc);
|
||||
const struct glsl_type *slice_type = get_slice_type(state, mat_type);
|
||||
return nir_imm_intN_t(b, (get_packing_factor(desc, slice_type) *
|
||||
glsl_get_vector_elements(slice_type)), 32);
|
||||
}
|
||||
|
||||
case nir_intrinsic_cmat_muladd: {
|
||||
nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
|
||||
nir_deref_instr *A_slice = nir_src_as_deref(intrin->src[1]);
|
||||
nir_deref_instr *B_slice = nir_src_as_deref(intrin->src[2]);
|
||||
nir_deref_instr *accum_slice = nir_src_as_deref(intrin->src[3]);
|
||||
|
||||
const struct glsl_type *dst_mat_type = get_coop_type_for_slice(state, dst_slice);
|
||||
const struct glsl_cmat_description dst_desc = *glsl_get_cmat_description(dst_mat_type);
|
||||
|
||||
const struct glsl_type *src_mat_type = get_coop_type_for_slice(state, A_slice);
|
||||
const struct glsl_cmat_description src_desc = *glsl_get_cmat_description(src_mat_type);
|
||||
|
||||
const unsigned packing_factor = get_packing_factor(dst_desc, dst_slice->type);
|
||||
const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
|
||||
|
||||
nir_def *result =
|
||||
nir_dpas_intel(b,
|
||||
packing_factor * glsl_base_type_get_bit_size(dst_desc.element_type),
|
||||
nir_load_deref(b, A_slice),
|
||||
nir_load_deref(b, B_slice),
|
||||
nir_load_deref(b, accum_slice),
|
||||
.dest_type = nir_get_nir_type_for_glsl_base_type(dst_desc.element_type),
|
||||
.src_type = nir_get_nir_type_for_glsl_base_type(src_desc.element_type),
|
||||
.saturate = nir_intrinsic_saturate(intrin),
|
||||
.cmat_signed_mask = nir_intrinsic_cmat_signed_mask(intrin),
|
||||
.systolic_depth = 8,
|
||||
.repeat_count = 8);
|
||||
|
||||
nir_store_deref(b, dst_slice, result,
|
||||
nir_component_mask(num_components));
|
||||
|
||||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
}
|
||||
|
||||
case nir_intrinsic_cmat_bitcast: {
|
||||
nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
|
||||
nir_deref_instr *src_slice = nir_src_as_deref(intrin->src[1]);
|
||||
|
||||
const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
|
||||
|
||||
assert(glsl_get_vector_elements(src_slice->type) == num_components);
|
||||
|
||||
nir_store_deref(b, dst_slice, nir_load_deref(b, src_slice),
|
||||
nir_component_mask(num_components));
|
||||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
}
|
||||
|
||||
case nir_intrinsic_cmat_copy:
|
||||
nir_copy_deref(b,
|
||||
nir_src_as_deref(intrin->src[0]),
|
||||
nir_src_as_deref(intrin->src[1]));
|
||||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
|
||||
case nir_intrinsic_cmat_insert: {
|
||||
nir_deref_instr *dst_slice = nir_src_as_deref(intrin->src[0]);
|
||||
nir_def *scalar = intrin->src[1].ssa;
|
||||
nir_deref_instr *src_slice = nir_src_as_deref(intrin->src[2]);
|
||||
const nir_src dst_index = intrin->src[3];
|
||||
|
||||
const struct glsl_type *dst_mat_type = get_coop_type_for_slice(state, dst_slice);
|
||||
ASSERTED const struct glsl_type *src_mat_type = get_coop_type_for_slice(state, src_slice);
|
||||
assert(dst_mat_type == src_mat_type);
|
||||
|
||||
const struct glsl_cmat_description desc =
|
||||
*glsl_get_cmat_description(dst_mat_type);
|
||||
|
||||
const unsigned bits = glsl_base_type_bit_size(desc.element_type);
|
||||
const unsigned packing_factor = get_packing_factor(desc, dst_slice->type);
|
||||
const unsigned num_components = glsl_get_vector_elements(dst_slice->type);
|
||||
|
||||
nir_def *slice_index = nir_udiv_imm(b, dst_index.ssa, packing_factor);
|
||||
nir_def *vector_index = nir_umod_imm(b, dst_index.ssa, packing_factor);
|
||||
nir_def *results[NIR_MAX_VEC_COMPONENTS];
|
||||
|
||||
const int slice_constant_index = nir_src_is_const(dst_index)
|
||||
? nir_src_as_uint(dst_index) / packing_factor
|
||||
: -1;
|
||||
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
nir_def *val = nir_channel(b, nir_load_deref(b, src_slice), i);
|
||||
nir_def *insert;
|
||||
|
||||
if (slice_constant_index < 0 || slice_constant_index == i) {
|
||||
if (packing_factor == 1) {
|
||||
insert = scalar;
|
||||
} else {
|
||||
nir_def *unpacked = nir_unpack_bits(b, val, bits);
|
||||
nir_def *v = nir_vector_insert(b, unpacked, scalar, vector_index);
|
||||
|
||||
insert = nir_pack_bits(b, v, bits * packing_factor);
|
||||
}
|
||||
} else {
|
||||
insert = val;
|
||||
}
|
||||
|
||||
results[i] = slice_constant_index < 0
|
||||
? nir_bcsel(b, nir_ieq_imm(b, slice_index, i), insert, val)
|
||||
: insert;
|
||||
}
|
||||
|
||||
nir_store_deref(b, dst_slice, nir_vec(b, results, num_components),
|
||||
nir_component_mask(num_components));
|
||||
|
||||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
}
|
||||
|
||||
case nir_intrinsic_cmat_extract: {
|
||||
nir_deref_instr *slice = nir_src_as_deref(intrin->src[0]);
|
||||
const struct glsl_type *mat_type = get_coop_type_for_slice(state, slice);
|
||||
nir_def *index = intrin->src[1].ssa;
|
||||
|
||||
const struct glsl_cmat_description desc =
|
||||
*glsl_get_cmat_description(mat_type);
|
||||
|
||||
const unsigned bits = glsl_base_type_bit_size(desc.element_type);
|
||||
const unsigned packing_factor = get_packing_factor(desc, slice->type);
|
||||
|
||||
nir_def *src =
|
||||
nir_vector_extract(b, nir_load_deref(b, slice),
|
||||
nir_udiv_imm(b, index, packing_factor));
|
||||
|
||||
if (packing_factor == 1) {
|
||||
return src;
|
||||
} else {
|
||||
return nir_vector_extract(b,
|
||||
nir_unpack_bits(b, src, bits),
|
||||
nir_umod_imm(b, index, packing_factor));
|
||||
}
|
||||
|
||||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("invalid cooperative matrix intrinsic");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
create_slice_var(struct lower_cmat_state *state, nir_variable *var,
|
||||
nir_function_impl *impl)
|
||||
{
|
||||
// TODO: without array
|
||||
const struct glsl_type *mat_type = glsl_without_array(var->type);
|
||||
|
||||
assert(glsl_type_is_cmat(mat_type));
|
||||
assert((!impl && var->data.mode == nir_var_shader_temp) ||
|
||||
( impl && var->data.mode == nir_var_function_temp));
|
||||
|
||||
const struct glsl_type *slice_type = get_slice_type(state, var->type);
|
||||
const char *slice_name = ralloc_asprintf(state->shader, "%s_slice", var->name);
|
||||
nir_variable *slice_var = impl ?
|
||||
nir_local_variable_create(impl, slice_type, slice_name) :
|
||||
nir_variable_create(state->shader, var->data.mode, slice_type, slice_name);
|
||||
|
||||
_mesa_hash_table_insert(state->vars_to_slice, var, slice_var);
|
||||
_mesa_hash_table_insert(state->slice_coop_types, slice_var, (void *)mat_type);
|
||||
}
|
||||
|
||||
bool
|
||||
brw_nir_lower_cmat(nir_shader *shader, unsigned subgroup_size)
|
||||
{
|
||||
void *temp_ctx = ralloc_context(NULL);
|
||||
|
||||
struct lower_cmat_state state = {
|
||||
.shader = shader,
|
||||
.slice_coop_types = _mesa_pointer_hash_table_create(temp_ctx),
|
||||
.vars_to_slice = _mesa_pointer_hash_table_create(temp_ctx),
|
||||
.subgroup_size = subgroup_size,
|
||||
};
|
||||
|
||||
/* Create a slice array for each variable and add a map from the original
|
||||
* variable back to it, so it can be reached during lowering.
|
||||
*
|
||||
* TODO: Cooperative matrix inside struct?
|
||||
*/
|
||||
nir_foreach_variable_in_shader(var, shader) {
|
||||
if (glsl_type_is_cmat(glsl_without_array(var->type)))
|
||||
create_slice_var(&state, var, NULL);
|
||||
}
|
||||
nir_foreach_function(func, shader) {
|
||||
nir_foreach_function_temp_variable(var, func->impl) {
|
||||
if (glsl_type_is_cmat(glsl_without_array(var->type)))
|
||||
create_slice_var(&state, var, func->impl);
|
||||
}
|
||||
}
|
||||
|
||||
bool progress = nir_shader_lower_instructions(shader,
|
||||
lower_cmat_filter,
|
||||
lower_cmat_instr,
|
||||
&state);
|
||||
|
||||
ralloc_free(temp_ctx);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
|
@ -1,273 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_nir_rt.h"
|
||||
#include "brw_nir_rt_builder.h"
|
||||
|
||||
static nir_function_impl *
|
||||
lower_any_hit_for_intersection(nir_shader *any_hit)
|
||||
{
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(any_hit);
|
||||
|
||||
/* Any-hit shaders need three parameters */
|
||||
assert(impl->function->num_params == 0);
|
||||
nir_parameter params[] = {
|
||||
{
|
||||
/* A pointer to a boolean value for whether or not the hit was
|
||||
* accepted.
|
||||
*/
|
||||
.num_components = 1,
|
||||
.bit_size = 32,
|
||||
},
|
||||
{
|
||||
/* The hit T value */
|
||||
.num_components = 1,
|
||||
.bit_size = 32,
|
||||
},
|
||||
{
|
||||
/* The hit kind */
|
||||
.num_components = 1,
|
||||
.bit_size = 32,
|
||||
},
|
||||
};
|
||||
impl->function->num_params = ARRAY_SIZE(params);
|
||||
impl->function->params =
|
||||
ralloc_array(any_hit, nir_parameter, ARRAY_SIZE(params));
|
||||
memcpy(impl->function->params, params, sizeof(params));
|
||||
|
||||
nir_builder build = nir_builder_at(nir_before_impl(impl));
|
||||
nir_builder *b = &build;
|
||||
|
||||
nir_def *commit_ptr = nir_load_param(b, 0);
|
||||
nir_def *hit_t = nir_load_param(b, 1);
|
||||
nir_def *hit_kind = nir_load_param(b, 2);
|
||||
|
||||
nir_deref_instr *commit =
|
||||
nir_build_deref_cast(b, commit_ptr, nir_var_function_temp,
|
||||
glsl_bool_type(), 0);
|
||||
|
||||
nir_foreach_block_safe(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_ignore_ray_intersection:
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
/* We put the newly emitted code inside a dummy if because it's
|
||||
* going to contain a jump instruction and we don't want to
|
||||
* deal with that mess here. It'll get dealt with by our
|
||||
* control-flow optimization passes.
|
||||
*/
|
||||
nir_store_deref(b, commit, nir_imm_false(b), 0x1);
|
||||
nir_push_if(b, nir_imm_true(b));
|
||||
nir_jump(b, nir_jump_return);
|
||||
nir_pop_if(b, NULL);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_terminate_ray:
|
||||
/* The "normal" handling of terminateRay works fine in
|
||||
* intersection shaders.
|
||||
*/
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_t_max:
|
||||
nir_def_rewrite_uses(&intrin->def,
|
||||
hit_t);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_hit_kind:
|
||||
nir_def_rewrite_uses(&intrin->def,
|
||||
hit_kind);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_instr_type_jump: {
|
||||
/* Stomp any halts to returns since they only return from the
|
||||
* any-hit shader and not necessarily from the intersection
|
||||
* shader. This is safe to do because we've already asserted
|
||||
* that we only have the one function.
|
||||
*/
|
||||
nir_jump_instr *jump = nir_instr_as_jump(instr);
|
||||
if (jump->type == nir_jump_halt)
|
||||
jump->type = nir_jump_return;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nir_validate_shader(any_hit, "after initial any-hit lowering");
|
||||
|
||||
nir_lower_returns_impl(impl);
|
||||
|
||||
nir_validate_shader(any_hit, "after lowering returns");
|
||||
|
||||
return impl;
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_lower_intersection_shader(nir_shader *intersection,
|
||||
const nir_shader *any_hit,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
void *dead_ctx = ralloc_context(intersection);
|
||||
|
||||
nir_function_impl *any_hit_impl = NULL;
|
||||
struct hash_table *any_hit_var_remap = NULL;
|
||||
if (any_hit) {
|
||||
nir_shader *any_hit_tmp = nir_shader_clone(dead_ctx, any_hit);
|
||||
NIR_PASS_V(any_hit_tmp, nir_opt_dce);
|
||||
any_hit_impl = lower_any_hit_for_intersection(any_hit_tmp);
|
||||
any_hit_var_remap = _mesa_pointer_hash_table_create(dead_ctx);
|
||||
}
|
||||
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(intersection);
|
||||
|
||||
nir_builder build = nir_builder_at(nir_before_impl(impl));
|
||||
nir_builder *b = &build;
|
||||
|
||||
nir_def *t_addr = brw_nir_rt_mem_hit_addr(b, false /* committed */);
|
||||
nir_variable *commit =
|
||||
nir_local_variable_create(impl, glsl_bool_type(), "ray_commit");
|
||||
nir_store_var(b, commit, nir_imm_false(b), 0x1);
|
||||
|
||||
assert(impl->end_block->predecessors->entries == 1);
|
||||
set_foreach(impl->end_block->predecessors, block_entry) {
|
||||
struct nir_block *block = (void *)block_entry->key;
|
||||
b->cursor = nir_after_block_before_jump(block);
|
||||
nir_push_if(b, nir_load_var(b, commit));
|
||||
{
|
||||
/* Set the "valid" bit in mem_hit */
|
||||
nir_def *ray_addr = brw_nir_rt_mem_hit_addr(b, false /* committed */);
|
||||
nir_def *flags_dw_addr = nir_iadd_imm(b, ray_addr, 12);
|
||||
nir_store_global(b, flags_dw_addr, 4,
|
||||
nir_ior(b, nir_load_global(b, flags_dw_addr, 4, 1, 32),
|
||||
nir_imm_int(b, 1 << 16)), 0x1 /* write_mask */);
|
||||
|
||||
nir_accept_ray_intersection(b);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_ignore_ray_intersection(b);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
break;
|
||||
}
|
||||
|
||||
nir_foreach_block_safe(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_report_ray_intersection: {
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
nir_def *hit_t = intrin->src[0].ssa;
|
||||
nir_def *hit_kind = intrin->src[1].ssa;
|
||||
nir_def *min_t = nir_load_ray_t_min(b);
|
||||
|
||||
struct brw_nir_rt_mem_ray_defs ray_def;
|
||||
brw_nir_rt_load_mem_ray(b, &ray_def, BRW_RT_BVH_LEVEL_WORLD);
|
||||
|
||||
struct brw_nir_rt_mem_hit_defs hit_in = {};
|
||||
brw_nir_rt_load_mem_hit(b, &hit_in, false);
|
||||
|
||||
nir_def *max_t = ray_def.t_far;
|
||||
|
||||
/* bool commit_tmp = false; */
|
||||
nir_variable *commit_tmp =
|
||||
nir_local_variable_create(impl, glsl_bool_type(),
|
||||
"commit_tmp");
|
||||
nir_store_var(b, commit_tmp, nir_imm_false(b), 0x1);
|
||||
|
||||
nir_push_if(b, nir_iand(b, nir_fge(b, hit_t, min_t),
|
||||
nir_fge(b, max_t, hit_t)));
|
||||
{
|
||||
/* Any-hit defaults to commit */
|
||||
nir_store_var(b, commit_tmp, nir_imm_true(b), 0x1);
|
||||
|
||||
if (any_hit_impl != NULL) {
|
||||
nir_push_if(b, nir_inot(b, nir_load_leaf_opaque_intel(b)));
|
||||
{
|
||||
nir_def *params[] = {
|
||||
&nir_build_deref_var(b, commit_tmp)->def,
|
||||
hit_t,
|
||||
hit_kind,
|
||||
};
|
||||
nir_inline_function_impl(b, any_hit_impl, params,
|
||||
any_hit_var_remap);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
nir_push_if(b, nir_load_var(b, commit_tmp));
|
||||
{
|
||||
nir_store_var(b, commit, nir_imm_true(b), 0x1);
|
||||
|
||||
nir_def *ray_addr =
|
||||
brw_nir_rt_mem_ray_addr(b, brw_nir_rt_stack_addr(b), BRW_RT_BVH_LEVEL_WORLD);
|
||||
|
||||
nir_store_global(b, nir_iadd_imm(b, ray_addr, 16 + 12), 4, hit_t, 0x1);
|
||||
nir_store_global(b, t_addr, 4,
|
||||
nir_vec2(b, nir_fmin(b, hit_t, hit_in.t), hit_kind),
|
||||
0x3);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_def *accepted = nir_load_var(b, commit_tmp);
|
||||
nir_def_rewrite_uses(&intrin->def,
|
||||
accepted);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
nir_metadata_preserve(impl, nir_metadata_none);
|
||||
|
||||
/* We did some inlining; have to re-index SSA defs */
|
||||
nir_index_ssa_defs(impl);
|
||||
|
||||
ralloc_free(dead_ctx);
|
||||
}
|
||||
|
|
@ -1,567 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2021 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_nir_rt.h"
|
||||
#include "brw_nir_rt_builder.h"
|
||||
|
||||
#include "nir_deref.h"
|
||||
|
||||
#include "util/macros.h"
|
||||
|
||||
struct lowering_state {
|
||||
const struct intel_device_info *devinfo;
|
||||
|
||||
nir_function_impl *impl;
|
||||
|
||||
struct hash_table *queries;
|
||||
uint32_t n_queries;
|
||||
|
||||
struct brw_nir_rt_globals_defs globals;
|
||||
nir_def *rq_globals;
|
||||
};
|
||||
|
||||
struct brw_ray_query {
|
||||
nir_variable *opaque_var;
|
||||
nir_variable *internal_var;
|
||||
uint32_t id;
|
||||
};
|
||||
|
||||
#define SIZEOF_QUERY_STATE (sizeof(uint32_t))
|
||||
|
||||
static bool
|
||||
need_spill_fill(struct lowering_state *state)
|
||||
{
|
||||
return state->n_queries > 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* This pass converts opaque RayQuery structures from SPIRV into a vec3 where
|
||||
* the first 2 elements store a global address for the query and the third
|
||||
* element is an incremented counter on the number of executed
|
||||
* nir_intrinsic_rq_proceed.
|
||||
*/
|
||||
|
||||
static void
|
||||
register_opaque_var(nir_variable *opaque_var, struct lowering_state *state)
|
||||
{
|
||||
struct hash_entry *entry = _mesa_hash_table_search(state->queries, opaque_var);
|
||||
assert(entry == NULL);
|
||||
|
||||
struct brw_ray_query *rq = rzalloc(state->queries, struct brw_ray_query);
|
||||
rq->opaque_var = opaque_var;
|
||||
rq->id = state->n_queries;
|
||||
|
||||
unsigned aoa_size = glsl_get_aoa_size(opaque_var->type);
|
||||
state->n_queries += MAX2(1, aoa_size);
|
||||
|
||||
_mesa_hash_table_insert(state->queries, opaque_var, rq);
|
||||
}
|
||||
|
||||
static void
|
||||
create_internal_var(struct brw_ray_query *rq, struct lowering_state *state)
|
||||
{
|
||||
const struct glsl_type *opaque_type = rq->opaque_var->type;
|
||||
const struct glsl_type *internal_type = glsl_uint16_t_type();
|
||||
|
||||
while (glsl_type_is_array(opaque_type)) {
|
||||
assert(!glsl_type_is_unsized_array(opaque_type));
|
||||
internal_type = glsl_array_type(internal_type,
|
||||
glsl_array_size(opaque_type),
|
||||
0);
|
||||
opaque_type = glsl_get_array_element(opaque_type);
|
||||
}
|
||||
|
||||
rq->internal_var = nir_local_variable_create(state->impl,
|
||||
internal_type,
|
||||
NULL);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static nir_def *
|
||||
get_ray_query_shadow_addr(nir_builder *b,
|
||||
nir_deref_instr *deref,
|
||||
struct lowering_state *state,
|
||||
nir_deref_instr **out_state_deref)
|
||||
{
|
||||
nir_deref_path path;
|
||||
nir_deref_path_init(&path, deref, NULL);
|
||||
assert(path.path[0]->deref_type == nir_deref_type_var);
|
||||
|
||||
nir_variable *opaque_var = nir_deref_instr_get_variable(path.path[0]);
|
||||
struct hash_entry *entry = _mesa_hash_table_search(state->queries, opaque_var);
|
||||
assert(entry);
|
||||
|
||||
struct brw_ray_query *rq = entry->data;
|
||||
|
||||
/* Base address in the shadow memory of the variable associated with this
|
||||
* ray query variable.
|
||||
*/
|
||||
nir_def *base_addr =
|
||||
nir_iadd_imm(b, state->globals.resume_sbt_addr,
|
||||
brw_rt_ray_queries_shadow_stack_size(state->devinfo) * rq->id);
|
||||
|
||||
bool spill_fill = need_spill_fill(state);
|
||||
*out_state_deref = nir_build_deref_var(b, rq->internal_var);
|
||||
|
||||
if (!spill_fill)
|
||||
return NULL;
|
||||
|
||||
/* Just emit code and let constant-folding go to town */
|
||||
nir_deref_instr **p = &path.path[1];
|
||||
for (; *p; p++) {
|
||||
if ((*p)->deref_type == nir_deref_type_array) {
|
||||
nir_def *index = (*p)->arr.index.ssa;
|
||||
|
||||
/**/
|
||||
*out_state_deref = nir_build_deref_array(b, *out_state_deref, index);
|
||||
|
||||
/**/
|
||||
uint64_t size = MAX2(1, glsl_get_aoa_size((*p)->type)) *
|
||||
brw_rt_ray_queries_shadow_stack_size(state->devinfo);
|
||||
|
||||
nir_def *mul = nir_amul_imm(b, nir_i2i64(b, index), size);
|
||||
|
||||
base_addr = nir_iadd(b, base_addr, mul);
|
||||
} else {
|
||||
unreachable("Unsupported deref type");
|
||||
}
|
||||
}
|
||||
|
||||
nir_deref_path_finish(&path);
|
||||
|
||||
/* Add the lane offset to the shadow memory address */
|
||||
nir_def *lane_offset =
|
||||
nir_imul_imm(
|
||||
b,
|
||||
nir_iadd(
|
||||
b,
|
||||
nir_imul(
|
||||
b,
|
||||
brw_load_btd_dss_id(b),
|
||||
brw_nir_rt_load_num_simd_lanes_per_dss(b, state->devinfo)),
|
||||
brw_nir_rt_sync_stack_id(b)),
|
||||
BRW_RT_SIZEOF_SHADOW_RAY_QUERY);
|
||||
|
||||
return nir_iadd(b, base_addr, nir_i2i64(b, lane_offset));
|
||||
}
|
||||
|
||||
static void
|
||||
update_trace_ctrl_level(nir_builder *b,
|
||||
nir_deref_instr *state_deref,
|
||||
nir_def **out_old_ctrl,
|
||||
nir_def **out_old_level,
|
||||
nir_def *new_ctrl,
|
||||
nir_def *new_level)
|
||||
{
|
||||
nir_def *old_value = nir_load_deref(b, state_deref);
|
||||
nir_def *old_ctrl = nir_ishr_imm(b, old_value, 2);
|
||||
nir_def *old_level = nir_iand_imm(b, old_value, 0x3);
|
||||
|
||||
if (out_old_ctrl)
|
||||
*out_old_ctrl = old_ctrl;
|
||||
if (out_old_level)
|
||||
*out_old_level = old_level;
|
||||
|
||||
if (new_ctrl)
|
||||
new_ctrl = nir_i2i16(b, new_ctrl);
|
||||
if (new_level)
|
||||
new_level = nir_i2i16(b, new_level);
|
||||
|
||||
if (new_ctrl || new_level) {
|
||||
if (!new_ctrl)
|
||||
new_ctrl = old_ctrl;
|
||||
if (!new_level)
|
||||
new_level = old_level;
|
||||
|
||||
nir_def *new_value = nir_ior(b, nir_ishl_imm(b, new_ctrl, 2), new_level);
|
||||
nir_store_deref(b, state_deref, new_value, 0x1);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fill_query(nir_builder *b,
|
||||
nir_def *hw_stack_addr,
|
||||
nir_def *shadow_stack_addr,
|
||||
nir_def *ctrl)
|
||||
{
|
||||
brw_nir_memcpy_global(b, hw_stack_addr, 64, shadow_stack_addr, 64,
|
||||
BRW_RT_SIZEOF_RAY_QUERY);
|
||||
}
|
||||
|
||||
static void
|
||||
spill_query(nir_builder *b,
|
||||
nir_def *hw_stack_addr,
|
||||
nir_def *shadow_stack_addr)
|
||||
{
|
||||
brw_nir_memcpy_global(b, shadow_stack_addr, 64, hw_stack_addr, 64,
|
||||
BRW_RT_SIZEOF_RAY_QUERY);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lower_ray_query_intrinsic(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
struct lowering_state *state)
|
||||
{
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_deref_instr *ctrl_level_deref;
|
||||
nir_def *shadow_stack_addr =
|
||||
get_ray_query_shadow_addr(b, deref, state, &ctrl_level_deref);
|
||||
nir_def *hw_stack_addr =
|
||||
brw_nir_rt_sync_stack_addr(b, state->globals.base_mem_addr, state->devinfo);
|
||||
nir_def *stack_addr = shadow_stack_addr ? shadow_stack_addr : hw_stack_addr;
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_rq_initialize: {
|
||||
nir_def *as_addr = intrin->src[1].ssa;
|
||||
nir_def *ray_flags = intrin->src[2].ssa;
|
||||
/* From the SPIR-V spec:
|
||||
*
|
||||
* "Only the 8 least-significant bits of Cull Mask are used by
|
||||
* this instruction - other bits are ignored.
|
||||
*
|
||||
* Only the 16 least-significant bits of Miss Index are used by
|
||||
* this instruction - other bits are ignored."
|
||||
*/
|
||||
nir_def *cull_mask = nir_iand_imm(b, intrin->src[3].ssa, 0xff);
|
||||
nir_def *ray_orig = intrin->src[4].ssa;
|
||||
nir_def *ray_t_min = intrin->src[5].ssa;
|
||||
nir_def *ray_dir = intrin->src[6].ssa;
|
||||
nir_def *ray_t_max = intrin->src[7].ssa;
|
||||
|
||||
nir_def *root_node_ptr =
|
||||
brw_nir_rt_acceleration_structure_to_root_node(b, as_addr);
|
||||
|
||||
struct brw_nir_rt_mem_ray_defs ray_defs = {
|
||||
.root_node_ptr = root_node_ptr,
|
||||
.ray_flags = nir_u2u16(b, ray_flags),
|
||||
.ray_mask = cull_mask,
|
||||
.orig = ray_orig,
|
||||
.t_near = ray_t_min,
|
||||
.dir = ray_dir,
|
||||
.t_far = ray_t_max,
|
||||
};
|
||||
|
||||
nir_def *ray_addr =
|
||||
brw_nir_rt_mem_ray_addr(b, stack_addr, BRW_RT_BVH_LEVEL_WORLD);
|
||||
|
||||
brw_nir_rt_query_mark_init(b, stack_addr);
|
||||
brw_nir_rt_store_mem_ray_query_at_addr(b, ray_addr, &ray_defs);
|
||||
|
||||
update_trace_ctrl_level(b, ctrl_level_deref,
|
||||
NULL, NULL,
|
||||
nir_imm_int(b, GEN_RT_TRACE_RAY_INITAL),
|
||||
nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_rq_proceed: {
|
||||
nir_def *not_done =
|
||||
nir_inot(b, brw_nir_rt_query_done(b, stack_addr));
|
||||
nir_def *not_done_then, *not_done_else;
|
||||
|
||||
nir_push_if(b, not_done);
|
||||
{
|
||||
nir_def *ctrl, *level;
|
||||
update_trace_ctrl_level(b, ctrl_level_deref,
|
||||
&ctrl, &level,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
/* Mark the query as done because handing it over to the HW for
|
||||
* processing. If the HW make any progress, it will write back some
|
||||
* data and as a side effect, clear the "done" bit. If no progress is
|
||||
* made, HW does not write anything back and we can use this bit to
|
||||
* detect that.
|
||||
*/
|
||||
brw_nir_rt_query_mark_done(b, stack_addr);
|
||||
|
||||
if (shadow_stack_addr)
|
||||
fill_query(b, hw_stack_addr, shadow_stack_addr, ctrl);
|
||||
|
||||
nir_trace_ray_intel(b, state->rq_globals, level, ctrl, .synchronous = true);
|
||||
|
||||
struct brw_nir_rt_mem_hit_defs hit_in = {};
|
||||
brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, hw_stack_addr, false);
|
||||
|
||||
if (shadow_stack_addr)
|
||||
spill_query(b, hw_stack_addr, shadow_stack_addr);
|
||||
|
||||
update_trace_ctrl_level(b, ctrl_level_deref,
|
||||
NULL, NULL,
|
||||
nir_imm_int(b, GEN_RT_TRACE_RAY_CONTINUE),
|
||||
hit_in.bvh_level);
|
||||
|
||||
not_done_then = nir_inot(b, hit_in.done);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
not_done_else = nir_imm_false(b);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
not_done = nir_if_phi(b, not_done_then, not_done_else);
|
||||
nir_def_rewrite_uses(&intrin->def, not_done);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_rq_confirm_intersection: {
|
||||
brw_nir_memcpy_global(b,
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, true), 16,
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, false), 16,
|
||||
BRW_RT_SIZEOF_HIT_INFO);
|
||||
update_trace_ctrl_level(b, ctrl_level_deref,
|
||||
NULL, NULL,
|
||||
nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT),
|
||||
nir_imm_int(b, BRW_RT_BVH_LEVEL_OBJECT));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_rq_generate_intersection: {
|
||||
brw_nir_rt_generate_hit_addr(b, stack_addr, intrin->src[1].ssa);
|
||||
update_trace_ctrl_level(b, ctrl_level_deref,
|
||||
NULL, NULL,
|
||||
nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT),
|
||||
nir_imm_int(b, BRW_RT_BVH_LEVEL_OBJECT));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_rq_terminate: {
|
||||
brw_nir_rt_query_mark_done(b, stack_addr);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_rq_load: {
|
||||
const bool committed = nir_intrinsic_committed(intrin);
|
||||
|
||||
struct brw_nir_rt_mem_ray_defs world_ray_in = {};
|
||||
struct brw_nir_rt_mem_ray_defs object_ray_in = {};
|
||||
struct brw_nir_rt_mem_hit_defs hit_in = {};
|
||||
brw_nir_rt_load_mem_ray_from_addr(b, &world_ray_in, stack_addr,
|
||||
BRW_RT_BVH_LEVEL_WORLD);
|
||||
brw_nir_rt_load_mem_ray_from_addr(b, &object_ray_in, stack_addr,
|
||||
BRW_RT_BVH_LEVEL_OBJECT);
|
||||
brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, stack_addr, committed);
|
||||
|
||||
nir_def *sysval = NULL;
|
||||
switch (nir_intrinsic_ray_query_value(intrin)) {
|
||||
case nir_ray_query_value_intersection_type:
|
||||
if (committed) {
|
||||
/* Values we want to generate :
|
||||
*
|
||||
* RayQueryCommittedIntersectionNoneEXT = 0U <= hit_in.valid == false
|
||||
* RayQueryCommittedIntersectionTriangleEXT = 1U <= hit_in.leaf_type == BRW_RT_BVH_NODE_TYPE_QUAD (4)
|
||||
* RayQueryCommittedIntersectionGeneratedEXT = 2U <= hit_in.leaf_type == BRW_RT_BVH_NODE_TYPE_PROCEDURAL (3)
|
||||
*/
|
||||
sysval =
|
||||
nir_bcsel(b, nir_ieq_imm(b, hit_in.leaf_type, 4),
|
||||
nir_imm_int(b, 1), nir_imm_int(b, 2));
|
||||
sysval =
|
||||
nir_bcsel(b, hit_in.valid,
|
||||
sysval, nir_imm_int(b, 0));
|
||||
} else {
|
||||
/* 0 -> triangle, 1 -> AABB */
|
||||
sysval =
|
||||
nir_b2i32(b,
|
||||
nir_ieq_imm(b, hit_in.leaf_type,
|
||||
BRW_RT_BVH_NODE_TYPE_PROCEDURAL));
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_t:
|
||||
sysval = hit_in.t;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_instance_custom_index: {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
|
||||
sysval = leaf.instance_id;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_ray_query_value_intersection_instance_id: {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
|
||||
sysval = leaf.instance_index;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_ray_query_value_intersection_instance_sbt_index: {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
|
||||
sysval = leaf.contribution_to_hit_group_index;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_ray_query_value_intersection_geometry_index: {
|
||||
nir_def *geometry_index_dw =
|
||||
nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
|
||||
1, 32);
|
||||
sysval = nir_iand_imm(b, geometry_index_dw, BITFIELD_MASK(29));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_ray_query_value_intersection_primitive_index:
|
||||
sysval = brw_nir_rt_load_primitive_id_from_hit(b, NULL /* is_procedural */, &hit_in);
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_barycentrics:
|
||||
sysval = hit_in.tri_bary;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_front_face:
|
||||
sysval = hit_in.front_face;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_object_ray_direction:
|
||||
sysval = world_ray_in.dir;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_object_ray_origin:
|
||||
sysval = world_ray_in.orig;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_object_to_world: {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
|
||||
sysval = leaf.object_to_world[nir_intrinsic_column(intrin)];
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_ray_query_value_intersection_world_to_object: {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
|
||||
sysval = leaf.world_to_object[nir_intrinsic_column(intrin)];
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_ray_query_value_intersection_candidate_aabb_opaque:
|
||||
sysval = hit_in.front_face;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_tmin:
|
||||
sysval = world_ray_in.t_near;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_flags:
|
||||
sysval = nir_u2u32(b, world_ray_in.ray_flags);
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_world_ray_direction:
|
||||
sysval = world_ray_in.dir;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_world_ray_origin:
|
||||
sysval = world_ray_in.orig;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_triangle_vertex_positions: {
|
||||
struct brw_nir_rt_bvh_primitive_leaf_positions_defs pos;
|
||||
brw_nir_rt_load_bvh_primitive_leaf_positions(b, &pos, hit_in.prim_leaf_ptr);
|
||||
sysval = pos.positions[nir_intrinsic_column(intrin)];
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Invalid ray query");
|
||||
}
|
||||
|
||||
assert(sysval);
|
||||
nir_def_rewrite_uses(&intrin->def, sysval);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Invalid intrinsic");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lower_ray_query_impl(nir_function_impl *impl, struct lowering_state *state)
|
||||
{
|
||||
nir_builder _b, *b = &_b;
|
||||
_b = nir_builder_at(nir_before_impl(impl));
|
||||
|
||||
state->rq_globals = nir_load_ray_query_global_intel(b);
|
||||
|
||||
brw_nir_rt_load_globals_addr(b, &state->globals, state->rq_globals);
|
||||
|
||||
nir_foreach_block_safe(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic != nir_intrinsic_rq_initialize &&
|
||||
intrin->intrinsic != nir_intrinsic_rq_terminate &&
|
||||
intrin->intrinsic != nir_intrinsic_rq_proceed &&
|
||||
intrin->intrinsic != nir_intrinsic_rq_generate_intersection &&
|
||||
intrin->intrinsic != nir_intrinsic_rq_confirm_intersection &&
|
||||
intrin->intrinsic != nir_intrinsic_rq_load)
|
||||
continue;
|
||||
|
||||
lower_ray_query_intrinsic(b, intrin, state);
|
||||
}
|
||||
}
|
||||
|
||||
nir_metadata_preserve(impl, nir_metadata_none);
|
||||
}
|
||||
|
||||
bool
|
||||
brw_nir_lower_ray_queries(nir_shader *shader,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
assert(exec_list_length(&shader->functions) == 1);
|
||||
|
||||
struct lowering_state state = {
|
||||
.devinfo = devinfo,
|
||||
.impl = nir_shader_get_entrypoint(shader),
|
||||
.queries = _mesa_pointer_hash_table_create(NULL),
|
||||
};
|
||||
|
||||
/* Map all query variable to internal type variables */
|
||||
nir_foreach_function_temp_variable(var, state.impl)
|
||||
register_opaque_var(var, &state);
|
||||
hash_table_foreach(state.queries, entry)
|
||||
create_internal_var(entry->data, &state);
|
||||
|
||||
bool progress = state.n_queries > 0;
|
||||
|
||||
if (progress) {
|
||||
lower_ray_query_impl(state.impl, &state);
|
||||
|
||||
nir_remove_dead_derefs(shader);
|
||||
nir_remove_dead_variables(shader,
|
||||
nir_var_shader_temp | nir_var_function_temp,
|
||||
NULL);
|
||||
|
||||
nir_metadata_preserve(state.impl, nir_metadata_none);
|
||||
}
|
||||
|
||||
ralloc_free(state.queries);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
|
@ -1,386 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_nir_rt.h"
|
||||
#include "brw_nir_rt_builder.h"
|
||||
|
||||
static nir_def *
|
||||
build_leaf_is_procedural(nir_builder *b, struct brw_nir_rt_mem_hit_defs *hit)
|
||||
{
|
||||
switch (b->shader->info.stage) {
|
||||
case MESA_SHADER_ANY_HIT:
|
||||
/* Any-hit shaders are always compiled into intersection shaders for
|
||||
* procedural geometry. If we got here in an any-hit shader, it's for
|
||||
* triangles.
|
||||
*/
|
||||
return nir_imm_false(b);
|
||||
|
||||
case MESA_SHADER_INTERSECTION:
|
||||
return nir_imm_true(b);
|
||||
|
||||
default:
|
||||
return nir_ieq_imm(b, hit->leaf_type,
|
||||
BRW_RT_BVH_NODE_TYPE_PROCEDURAL);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lower_rt_intrinsics_impl(nir_function_impl *impl,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
nir_builder build = nir_builder_at(nir_before_impl(impl));
|
||||
nir_builder *b = &build;
|
||||
|
||||
struct brw_nir_rt_globals_defs globals;
|
||||
brw_nir_rt_load_globals(b, &globals);
|
||||
|
||||
nir_def *hotzone_addr = brw_nir_rt_sw_hotzone_addr(b, devinfo);
|
||||
nir_def *hotzone = nir_load_global(b, hotzone_addr, 16, 4, 32);
|
||||
|
||||
gl_shader_stage stage = b->shader->info.stage;
|
||||
struct brw_nir_rt_mem_ray_defs world_ray_in = {};
|
||||
struct brw_nir_rt_mem_ray_defs object_ray_in = {};
|
||||
struct brw_nir_rt_mem_hit_defs hit_in = {};
|
||||
switch (stage) {
|
||||
case MESA_SHADER_ANY_HIT:
|
||||
case MESA_SHADER_CLOSEST_HIT:
|
||||
case MESA_SHADER_INTERSECTION:
|
||||
brw_nir_rt_load_mem_hit(b, &hit_in,
|
||||
stage == MESA_SHADER_CLOSEST_HIT);
|
||||
brw_nir_rt_load_mem_ray(b, &object_ray_in,
|
||||
BRW_RT_BVH_LEVEL_OBJECT);
|
||||
FALLTHROUGH;
|
||||
|
||||
case MESA_SHADER_MISS:
|
||||
brw_nir_rt_load_mem_ray(b, &world_ray_in,
|
||||
BRW_RT_BVH_LEVEL_WORLD);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
nir_def *thread_stack_base_addr = brw_nir_rt_sw_stack_addr(b, devinfo);
|
||||
nir_def *stack_base_offset = nir_channel(b, hotzone, 0);
|
||||
nir_def *stack_base_addr =
|
||||
nir_iadd(b, thread_stack_base_addr, nir_u2u64(b, stack_base_offset));
|
||||
ASSERTED bool seen_scratch_base_ptr_load = false;
|
||||
ASSERTED bool found_resume = false;
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
b->cursor = nir_after_instr(&intrin->instr);
|
||||
|
||||
nir_def *sysval = NULL;
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_scratch_base_ptr:
|
||||
assert(nir_intrinsic_base(intrin) == 1);
|
||||
seen_scratch_base_ptr_load = true;
|
||||
sysval = stack_base_addr;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_btd_stack_push_intel: {
|
||||
int32_t stack_size = nir_intrinsic_stack_size(intrin);
|
||||
if (stack_size > 0) {
|
||||
nir_def *child_stack_offset =
|
||||
nir_iadd_imm(b, stack_base_offset, stack_size);
|
||||
nir_store_global(b, hotzone_addr, 16, child_stack_offset, 0x1);
|
||||
}
|
||||
nir_instr_remove(instr);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_rt_resume:
|
||||
/* This is the first "interesting" instruction */
|
||||
assert(block == nir_start_block(impl));
|
||||
assert(!seen_scratch_base_ptr_load);
|
||||
found_resume = true;
|
||||
|
||||
int32_t stack_size = nir_intrinsic_stack_size(intrin);
|
||||
if (stack_size > 0) {
|
||||
stack_base_offset =
|
||||
nir_iadd_imm(b, stack_base_offset, -stack_size);
|
||||
nir_store_global(b, hotzone_addr, 16, stack_base_offset, 0x1);
|
||||
stack_base_addr = nir_iadd(b, thread_stack_base_addr,
|
||||
nir_u2u64(b, stack_base_offset));
|
||||
}
|
||||
nir_instr_remove(instr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_uniform: {
|
||||
/* We don't want to lower this in the launch trampoline. */
|
||||
if (stage == MESA_SHADER_COMPUTE)
|
||||
break;
|
||||
|
||||
sysval = brw_nir_load_global_const(b, intrin,
|
||||
nir_load_btd_global_arg_addr_intel(b),
|
||||
BRW_RT_PUSH_CONST_OFFSET);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_ray_launch_id:
|
||||
sysval = nir_channels(b, hotzone, 0xe);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_launch_size:
|
||||
sysval = globals.launch_size;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_world_origin:
|
||||
sysval = world_ray_in.orig;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_world_direction:
|
||||
sysval = world_ray_in.dir;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_object_origin:
|
||||
sysval = object_ray_in.orig;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_object_direction:
|
||||
sysval = object_ray_in.dir;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_t_min:
|
||||
/* It shouldn't matter which we pull this from */
|
||||
sysval = world_ray_in.t_near;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_t_max:
|
||||
if (stage == MESA_SHADER_MISS)
|
||||
sysval = world_ray_in.t_far;
|
||||
else
|
||||
sysval = hit_in.t;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
sysval = brw_nir_rt_load_primitive_id_from_hit(b,
|
||||
build_leaf_is_procedural(b, &hit_in),
|
||||
&hit_in);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_instance_id: {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
|
||||
sysval = leaf.instance_index;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_ray_object_to_world: {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
|
||||
sysval = leaf.object_to_world[nir_intrinsic_column(intrin)];
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_ray_world_to_object: {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
|
||||
sysval = leaf.world_to_object[nir_intrinsic_column(intrin)];
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_ray_hit_kind: {
|
||||
nir_def *tri_hit_kind =
|
||||
nir_bcsel(b, hit_in.front_face,
|
||||
nir_imm_int(b, BRW_RT_HIT_KIND_FRONT_FACE),
|
||||
nir_imm_int(b, BRW_RT_HIT_KIND_BACK_FACE));
|
||||
sysval = nir_bcsel(b, build_leaf_is_procedural(b, &hit_in),
|
||||
hit_in.aabb_hit_kind, tri_hit_kind);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_ray_flags:
|
||||
/* We need to fetch the original ray flags we stored in the
|
||||
* leaf pointer, because the actual ray flags we get here
|
||||
* will include any flags passed on the pipeline at creation
|
||||
* time, and the spec for IncomingRayFlagsKHR says:
|
||||
* Setting pipeline flags on the raytracing pipeline must not
|
||||
* cause any corresponding flags to be set in variables with
|
||||
* this decoration.
|
||||
*/
|
||||
sysval = nir_u2u32(b, world_ray_in.inst_leaf_ptr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_cull_mask:
|
||||
sysval = nir_u2u32(b, world_ray_in.ray_mask);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_geometry_index: {
|
||||
nir_def *geometry_index_dw =
|
||||
nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
|
||||
1, 32);
|
||||
sysval = nir_iand_imm(b, geometry_index_dw, BITFIELD_MASK(29));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_ray_instance_custom_index: {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
|
||||
sysval = leaf.instance_id;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_shader_record_ptr:
|
||||
/* We can't handle this intrinsic in resume shaders because the
|
||||
* handle we get there won't be from the original SBT. The shader
|
||||
* call lowering/splitting pass should have ensured that this
|
||||
* value was spilled from the initial shader and unspilled in any
|
||||
* resume shaders that need it.
|
||||
*/
|
||||
assert(!found_resume);
|
||||
sysval = nir_load_btd_local_arg_addr_intel(b);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_base_mem_addr_intel:
|
||||
sysval = globals.base_mem_addr;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_hw_stack_size_intel:
|
||||
sysval = nir_imul_imm(b, globals.hw_stack_size, 64);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_sw_stack_size_intel:
|
||||
sysval = nir_imul_imm(b, globals.sw_stack_size, 64);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
|
||||
sysval = globals.num_dss_rt_stacks;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_hit_sbt_addr_intel:
|
||||
sysval = globals.hit_sbt_addr;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_hit_sbt_stride_intel:
|
||||
sysval = globals.hit_sbt_stride;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_miss_sbt_addr_intel:
|
||||
sysval = globals.miss_sbt_addr;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_miss_sbt_stride_intel:
|
||||
sysval = globals.miss_sbt_stride;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_callable_sbt_addr_intel:
|
||||
sysval = globals.call_sbt_addr;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_callable_sbt_stride_intel:
|
||||
sysval = globals.call_sbt_stride;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_btd_resume_sbt_addr_intel:
|
||||
sysval = nir_pack_64_2x32_split(b,
|
||||
nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW),
|
||||
nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_leaf_procedural_intel:
|
||||
sysval = build_leaf_is_procedural(b, &hit_in);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_triangle_vertex_positions: {
|
||||
struct brw_nir_rt_bvh_primitive_leaf_positions_defs pos;
|
||||
brw_nir_rt_load_bvh_primitive_leaf_positions(b, &pos, hit_in.prim_leaf_ptr);
|
||||
sysval = pos.positions[nir_intrinsic_column(intrin)];
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_leaf_opaque_intel: {
|
||||
if (stage == MESA_SHADER_INTERSECTION) {
|
||||
/* In intersection shaders, the opaque bit is passed to us in
|
||||
* the front_face bit.
|
||||
*/
|
||||
sysval = hit_in.front_face;
|
||||
} else {
|
||||
nir_def *flags_dw =
|
||||
nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
|
||||
1, 32);
|
||||
sysval = nir_i2b(b, nir_iand_imm(b, flags_dw, 1u << 30));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
||||
progress = true;
|
||||
|
||||
if (sysval) {
|
||||
nir_def_rewrite_uses(&intrin->def,
|
||||
sysval);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nir_metadata_preserve(impl,
|
||||
progress ?
|
||||
nir_metadata_none :
|
||||
(nir_metadata_block_index |
|
||||
nir_metadata_dominance));
|
||||
}
|
||||
|
||||
/** Lower ray-tracing system values and intrinsics
|
||||
*
|
||||
* In most 3D shader stages, intrinsics are a fairly thin wrapper around
|
||||
* hardware functionality and system values represent magic bits that come
|
||||
* into the shader from FF hardware. Ray-tracing, however, looks a bit more
|
||||
* like the OpenGL 1.0 world where the underlying hardware is simple and most
|
||||
* of the API implementation is software.
|
||||
*
|
||||
* In particular, most things that are treated as system values (or built-ins
|
||||
* in SPIR-V) don't get magically dropped into registers for us. Instead, we
|
||||
* have to fetch them from the relevant data structures shared with the
|
||||
* ray-tracing hardware. Most come from either the RT_DISPATCH_GLOBALS or
|
||||
* from one of the MemHit data structures. Some, such as primitive_id require
|
||||
* us to fetch the leaf address from the MemHit struct and then manually read
|
||||
* the data out of the BVH. Instead of trying to emit all this code deep in
|
||||
* the back-end where we can't effectively optimize it, we lower it all to
|
||||
* global memory access in NIR.
|
||||
*
|
||||
* Once this pass is complete, the only real system values left are the two
|
||||
* argument pointer system values for BTD dispatch: btd_local_arg_addr and
|
||||
* btd_global_arg_addr.
|
||||
*/
|
||||
void
|
||||
brw_nir_lower_rt_intrinsics(nir_shader *nir,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
lower_rt_intrinsics_impl(impl, devinfo);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,329 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2020 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_nir_rt.h"
|
||||
#include "brw_nir_rt_builder.h"
|
||||
#include "nir_phi_builder.h"
|
||||
|
||||
UNUSED static bool
|
||||
no_load_scratch_base_ptr_intrinsic(nir_shader *shader)
|
||||
{
|
||||
nir_foreach_function_impl(impl, shader) {
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic == nir_intrinsic_load_scratch_base_ptr)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Insert the appropriate return instruction at the end of the shader */
|
||||
void
|
||||
brw_nir_lower_shader_returns(nir_shader *shader)
|
||||
{
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
||||
|
||||
/* Reserve scratch space at the start of the shader's per-thread scratch
|
||||
* space for the return BINDLESS_SHADER_RECORD address and data payload.
|
||||
* When a shader is called, the calling shader will write the return BSR
|
||||
* address in this region of the callee's scratch space.
|
||||
*
|
||||
* We could also put it at the end of the caller's scratch space. However,
|
||||
* doing this way means that a shader never accesses its caller's scratch
|
||||
* space unless given an explicit pointer (such as for ray payloads). It
|
||||
* also makes computing the address easier given that we want to apply an
|
||||
* alignment to the scratch offset to ensure we can make alignment
|
||||
* assumptions in the called shader.
|
||||
*
|
||||
* This isn't needed for ray-gen shaders because they end the thread and
|
||||
* never return to the calling trampoline shader.
|
||||
*/
|
||||
assert(no_load_scratch_base_ptr_intrinsic(shader));
|
||||
if (shader->info.stage != MESA_SHADER_RAYGEN)
|
||||
shader->scratch_size += BRW_BTD_STACK_CALLEE_DATA_SIZE;
|
||||
|
||||
nir_builder b = nir_builder_create(impl);
|
||||
|
||||
set_foreach(impl->end_block->predecessors, block_entry) {
|
||||
struct nir_block *block = (void *)block_entry->key;
|
||||
b.cursor = nir_after_block_before_jump(block);
|
||||
|
||||
switch (shader->info.stage) {
|
||||
case MESA_SHADER_RAYGEN:
|
||||
/* A raygen shader is always the root of the shader call tree. When
|
||||
* it ends, we retire the bindless stack ID and no further shaders
|
||||
* will be executed.
|
||||
*/
|
||||
assert(impl->end_block->predecessors->entries == 1);
|
||||
brw_nir_btd_retire(&b);
|
||||
break;
|
||||
|
||||
case MESA_SHADER_ANY_HIT:
|
||||
/* The default action of an any-hit shader is to accept the ray
|
||||
* intersection. Any-hit shaders may have more than one exit. Only
|
||||
* the final "normal" exit will actually need to accept the
|
||||
* intersection as any others should come from nir_jump_halt
|
||||
* instructions inserted after ignore_ray_intersection or
|
||||
* terminate_ray or the like. However, inserting an accept after
|
||||
* the ignore or terminate is safe because it'll get deleted later.
|
||||
*/
|
||||
nir_accept_ray_intersection(&b);
|
||||
break;
|
||||
|
||||
case MESA_SHADER_CALLABLE:
|
||||
case MESA_SHADER_MISS:
|
||||
case MESA_SHADER_CLOSEST_HIT:
|
||||
/* Callable, miss, and closest-hit shaders don't take any special
|
||||
* action at the end. They simply return back to the previous shader
|
||||
* in the call stack.
|
||||
*/
|
||||
assert(impl->end_block->predecessors->entries == 1);
|
||||
brw_nir_btd_return(&b);
|
||||
break;
|
||||
|
||||
case MESA_SHADER_INTERSECTION:
|
||||
/* This will be handled by brw_nir_lower_intersection_shader */
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid callable shader stage");
|
||||
}
|
||||
}
|
||||
|
||||
nir_metadata_preserve(impl, nir_metadata_block_index |
|
||||
nir_metadata_dominance);
|
||||
}
|
||||
|
||||
static void
|
||||
store_resume_addr(nir_builder *b, nir_intrinsic_instr *call)
|
||||
{
|
||||
uint32_t call_idx = nir_intrinsic_call_idx(call);
|
||||
uint32_t offset = nir_intrinsic_stack_size(call);
|
||||
|
||||
/* First thing on the called shader's stack is the resume address
|
||||
* followed by a pointer to the payload.
|
||||
*/
|
||||
nir_def *resume_record_addr =
|
||||
nir_iadd_imm(b, nir_load_btd_resume_sbt_addr_intel(b),
|
||||
call_idx * BRW_BTD_RESUME_SBT_STRIDE);
|
||||
/* By the time we get here, any remaining shader/function memory
|
||||
* pointers have been lowered to SSA values.
|
||||
*/
|
||||
nir_def *payload_addr =
|
||||
nir_get_shader_call_payload_src(call)->ssa;
|
||||
brw_nir_rt_store_scratch(b, offset, BRW_BTD_STACK_ALIGN,
|
||||
nir_vec2(b, resume_record_addr, payload_addr),
|
||||
0xf /* write_mask */);
|
||||
|
||||
nir_btd_stack_push_intel(b, offset);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_shader_trace_ray_instr(struct nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
struct brw_bs_prog_key *key = data;
|
||||
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
/* Leave nir_intrinsic_rt_resume to be lowered by
|
||||
* brw_nir_lower_rt_intrinsics()
|
||||
*/
|
||||
nir_intrinsic_instr *call = nir_instr_as_intrinsic(instr);
|
||||
if (call->intrinsic != nir_intrinsic_rt_trace_ray)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_instr_remove(instr);
|
||||
|
||||
store_resume_addr(b, call);
|
||||
|
||||
nir_def *as_addr = call->src[0].ssa;
|
||||
nir_def *ray_flags = call->src[1].ssa;
|
||||
/* From the SPIR-V spec:
|
||||
*
|
||||
* "Only the 8 least-significant bits of Cull Mask are used by this
|
||||
* instruction - other bits are ignored.
|
||||
*
|
||||
* Only the 4 least-significant bits of SBT Offset and SBT Stride are
|
||||
* used by this instruction - other bits are ignored.
|
||||
*
|
||||
* Only the 16 least-significant bits of Miss Index are used by this
|
||||
* instruction - other bits are ignored."
|
||||
*/
|
||||
nir_def *cull_mask = nir_iand_imm(b, call->src[2].ssa, 0xff);
|
||||
nir_def *sbt_offset = nir_iand_imm(b, call->src[3].ssa, 0xf);
|
||||
nir_def *sbt_stride = nir_iand_imm(b, call->src[4].ssa, 0xf);
|
||||
nir_def *miss_index = nir_iand_imm(b, call->src[5].ssa, 0xffff);
|
||||
nir_def *ray_orig = call->src[6].ssa;
|
||||
nir_def *ray_t_min = call->src[7].ssa;
|
||||
nir_def *ray_dir = call->src[8].ssa;
|
||||
nir_def *ray_t_max = call->src[9].ssa;
|
||||
|
||||
nir_def *root_node_ptr =
|
||||
brw_nir_rt_acceleration_structure_to_root_node(b, as_addr);
|
||||
|
||||
/* The hardware packet requires an address to the first element of the
|
||||
* hit SBT.
|
||||
*
|
||||
* In order to calculate this, we must multiply the "SBT Offset"
|
||||
* provided to OpTraceRay by the SBT stride provided for the hit SBT in
|
||||
* the call to vkCmdTraceRay() and add that to the base address of the
|
||||
* hit SBT. This stride is not to be confused with the "SBT Stride"
|
||||
* provided to OpTraceRay which is in units of this stride. It's a
|
||||
* rather terrible overload of the word "stride". The hardware docs
|
||||
* calls the SPIR-V stride value the "shader index multiplier" which is
|
||||
* a much more sane name.
|
||||
*/
|
||||
nir_def *hit_sbt_stride_B =
|
||||
nir_load_ray_hit_sbt_stride_intel(b);
|
||||
nir_def *hit_sbt_offset_B =
|
||||
nir_imul(b, sbt_offset, nir_u2u32(b, hit_sbt_stride_B));
|
||||
nir_def *hit_sbt_addr =
|
||||
nir_iadd(b, nir_load_ray_hit_sbt_addr_intel(b),
|
||||
nir_u2u64(b, hit_sbt_offset_B));
|
||||
|
||||
/* The hardware packet takes an address to the miss BSR. */
|
||||
nir_def *miss_sbt_stride_B =
|
||||
nir_load_ray_miss_sbt_stride_intel(b);
|
||||
nir_def *miss_sbt_offset_B =
|
||||
nir_imul(b, miss_index, nir_u2u32(b, miss_sbt_stride_B));
|
||||
nir_def *miss_sbt_addr =
|
||||
nir_iadd(b, nir_load_ray_miss_sbt_addr_intel(b),
|
||||
nir_u2u64(b, miss_sbt_offset_B));
|
||||
|
||||
struct brw_nir_rt_mem_ray_defs ray_defs = {
|
||||
.root_node_ptr = root_node_ptr,
|
||||
/* Combine the shader value given to traceRayEXT() with the pipeline
|
||||
* creation value VkPipelineCreateFlags.
|
||||
*/
|
||||
.ray_flags = nir_ior_imm(b, nir_u2u16(b, ray_flags), key->pipeline_ray_flags),
|
||||
.ray_mask = cull_mask,
|
||||
.hit_group_sr_base_ptr = hit_sbt_addr,
|
||||
.hit_group_sr_stride = nir_u2u16(b, hit_sbt_stride_B),
|
||||
.miss_sr_ptr = miss_sbt_addr,
|
||||
.orig = ray_orig,
|
||||
.t_near = ray_t_min,
|
||||
.dir = ray_dir,
|
||||
.t_far = ray_t_max,
|
||||
.shader_index_multiplier = sbt_stride,
|
||||
/* The instance leaf pointer is unused in the top level BVH traversal
|
||||
* since we always start from the root node. We can reuse that field to
|
||||
* store the ray_flags handed to traceRayEXT(). This will be reloaded
|
||||
* when the shader accesses gl_IncomingRayFlagsEXT (see
|
||||
* nir_intrinsic_load_ray_flags brw_nir_lower_rt_intrinsic.c)
|
||||
*/
|
||||
.inst_leaf_ptr = nir_u2u64(b, ray_flags),
|
||||
};
|
||||
brw_nir_rt_store_mem_ray(b, &ray_defs, BRW_RT_BVH_LEVEL_WORLD);
|
||||
|
||||
nir_trace_ray_intel(b,
|
||||
nir_load_btd_global_arg_addr_intel(b),
|
||||
nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD),
|
||||
nir_imm_int(b, GEN_RT_TRACE_RAY_INITAL),
|
||||
.synchronous = false);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_shader_call_instr(struct nir_builder *b, nir_intrinsic_instr *call,
|
||||
void *data)
|
||||
{
|
||||
if (call->intrinsic != nir_intrinsic_rt_execute_callable)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_instr_remove(&call->instr);
|
||||
|
||||
store_resume_addr(b, call);
|
||||
|
||||
nir_def *sbt_offset32 =
|
||||
nir_imul(b, call->src[0].ssa,
|
||||
nir_u2u32(b, nir_load_callable_sbt_stride_intel(b)));
|
||||
nir_def *sbt_addr =
|
||||
nir_iadd(b, nir_load_callable_sbt_addr_intel(b),
|
||||
nir_u2u64(b, sbt_offset32));
|
||||
brw_nir_btd_spawn(b, sbt_addr);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_nir_lower_shader_calls(nir_shader *shader, struct brw_bs_prog_key *key)
|
||||
{
|
||||
bool a = nir_shader_instructions_pass(shader,
|
||||
lower_shader_trace_ray_instr,
|
||||
nir_metadata_none,
|
||||
key);
|
||||
bool b = nir_shader_intrinsics_pass(shader, lower_shader_call_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
NULL);
|
||||
return a || b;
|
||||
}
|
||||
|
||||
/** Creates a trivial return shader
|
||||
*
|
||||
* In most cases this shader doesn't actually do anything. It just needs to
|
||||
* return to the caller.
|
||||
*
|
||||
* By default, our HW has the ability to handle the fact that a shader is not
|
||||
* available and will execute the next following shader in the tracing call.
|
||||
* For instance, a RAYGEN shader traces a ray, the tracing generates a hit,
|
||||
* but there is no ANYHIT shader available. The HW should follow up by
|
||||
* execution the CLOSESTHIT shader.
|
||||
*
|
||||
* This default behavior can be changed through the RT_CTRL register
|
||||
* (privileged access) and when NULL shader checks are disabled, the HW will
|
||||
* instead call the call stack handler (this shader). This is what i915 is
|
||||
* doing as part of Wa_14013202645.
|
||||
*
|
||||
* In order to ensure the call to the CLOSESTHIT shader, this shader needs to
|
||||
* commit the ray and will not proceed with the BTD return. Similarly when the
|
||||
* same thing happen with the INTERSECTION shader, we should just carry on the
|
||||
* ray traversal with the continue operation.
|
||||
*
|
||||
*/
|
||||
nir_shader *
|
||||
brw_nir_create_trivial_return_shader(const struct brw_compiler *compiler,
|
||||
void *mem_ctx)
|
||||
{
|
||||
const nir_shader_compiler_options *nir_options =
|
||||
compiler->nir_options[MESA_SHADER_CALLABLE];
|
||||
|
||||
nir_builder _b = nir_builder_init_simple_shader(MESA_SHADER_CALLABLE,
|
||||
nir_options,
|
||||
"RT Trivial Return");
|
||||
nir_builder *b = &_b;
|
||||
|
||||
ralloc_steal(mem_ctx, b->shader);
|
||||
nir_shader *nir = b->shader;
|
||||
|
||||
NIR_PASS_V(nir, brw_nir_lower_shader_returns);
|
||||
|
||||
return nir;
|
||||
}
|
||||
|
|
@ -1,536 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2020 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "intel_nir.h"
|
||||
#include "brw_nir_rt.h"
|
||||
#include "brw_nir_rt_builder.h"
|
||||
#include "intel_nir.h"
|
||||
|
||||
static bool
|
||||
resize_deref(nir_builder *b, nir_deref_instr *deref,
|
||||
unsigned num_components, unsigned bit_size)
|
||||
{
|
||||
if (deref->def.num_components == num_components &&
|
||||
deref->def.bit_size == bit_size)
|
||||
return false;
|
||||
|
||||
/* NIR requires array indices have to match the deref bit size */
|
||||
if (deref->def.bit_size != bit_size &&
|
||||
(deref->deref_type == nir_deref_type_array ||
|
||||
deref->deref_type == nir_deref_type_ptr_as_array)) {
|
||||
b->cursor = nir_before_instr(&deref->instr);
|
||||
nir_def *idx;
|
||||
if (nir_src_is_const(deref->arr.index)) {
|
||||
idx = nir_imm_intN_t(b, nir_src_as_int(deref->arr.index), bit_size);
|
||||
} else {
|
||||
idx = nir_i2iN(b, deref->arr.index.ssa, bit_size);
|
||||
}
|
||||
nir_src_rewrite(&deref->arr.index, idx);
|
||||
}
|
||||
|
||||
deref->def.num_components = num_components;
|
||||
deref->def.bit_size = bit_size;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_rt_io_derefs(nir_shader *shader)
|
||||
{
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
||||
|
||||
bool progress = false;
|
||||
|
||||
unsigned num_shader_call_vars = 0;
|
||||
nir_foreach_variable_with_modes(var, shader, nir_var_shader_call_data)
|
||||
num_shader_call_vars++;
|
||||
|
||||
unsigned num_ray_hit_attrib_vars = 0;
|
||||
nir_foreach_variable_with_modes(var, shader, nir_var_ray_hit_attrib)
|
||||
num_ray_hit_attrib_vars++;
|
||||
|
||||
/* At most one payload is allowed because it's an input. Technically, this
|
||||
* is also true for hit attribute variables. However, after we inline an
|
||||
* any-hit shader into an intersection shader, we can end up with multiple
|
||||
* hit attribute variables. They'll end up mapping to a cast from the same
|
||||
* base pointer so this is fine.
|
||||
*/
|
||||
assert(num_shader_call_vars <= 1);
|
||||
|
||||
nir_builder b = nir_builder_at(nir_before_impl(impl));
|
||||
|
||||
nir_def *call_data_addr = NULL;
|
||||
if (num_shader_call_vars > 0) {
|
||||
assert(shader->scratch_size >= BRW_BTD_STACK_CALLEE_DATA_SIZE);
|
||||
call_data_addr =
|
||||
brw_nir_rt_load_scratch(&b, BRW_BTD_STACK_CALL_DATA_PTR_OFFSET, 8,
|
||||
1, 64);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
gl_shader_stage stage = shader->info.stage;
|
||||
nir_def *hit_attrib_addr = NULL;
|
||||
if (num_ray_hit_attrib_vars > 0) {
|
||||
assert(stage == MESA_SHADER_ANY_HIT ||
|
||||
stage == MESA_SHADER_CLOSEST_HIT ||
|
||||
stage == MESA_SHADER_INTERSECTION);
|
||||
nir_def *hit_addr =
|
||||
brw_nir_rt_mem_hit_addr(&b, stage == MESA_SHADER_CLOSEST_HIT);
|
||||
/* The vec2 barycentrics are in 2nd and 3rd dwords of MemHit */
|
||||
nir_def *bary_addr = nir_iadd_imm(&b, hit_addr, 4);
|
||||
hit_attrib_addr = nir_bcsel(&b, nir_load_leaf_procedural_intel(&b),
|
||||
brw_nir_rt_hit_attrib_data_addr(&b),
|
||||
bary_addr);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_deref)
|
||||
continue;
|
||||
|
||||
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
||||
if (nir_deref_mode_is(deref, nir_var_shader_call_data)) {
|
||||
deref->modes = nir_var_function_temp;
|
||||
if (deref->deref_type == nir_deref_type_var) {
|
||||
b.cursor = nir_before_instr(&deref->instr);
|
||||
nir_deref_instr *cast =
|
||||
nir_build_deref_cast(&b, call_data_addr,
|
||||
nir_var_function_temp,
|
||||
deref->var->type, 0);
|
||||
nir_def_rewrite_uses(&deref->def,
|
||||
&cast->def);
|
||||
nir_instr_remove(&deref->instr);
|
||||
progress = true;
|
||||
}
|
||||
} else if (nir_deref_mode_is(deref, nir_var_ray_hit_attrib)) {
|
||||
deref->modes = nir_var_function_temp;
|
||||
if (deref->deref_type == nir_deref_type_var) {
|
||||
b.cursor = nir_before_instr(&deref->instr);
|
||||
nir_deref_instr *cast =
|
||||
nir_build_deref_cast(&b, hit_attrib_addr,
|
||||
nir_var_function_temp,
|
||||
deref->type, 0);
|
||||
nir_def_rewrite_uses(&deref->def,
|
||||
&cast->def);
|
||||
nir_instr_remove(&deref->instr);
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* We're going to lower all function_temp memory to scratch using
|
||||
* 64-bit addresses. We need to resize all our derefs first or else
|
||||
* nir_lower_explicit_io will have a fit.
|
||||
*/
|
||||
if (nir_deref_mode_is(deref, nir_var_function_temp) &&
|
||||
resize_deref(&b, deref, 1, 64))
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (progress) {
|
||||
nir_metadata_preserve(impl, nir_metadata_block_index |
|
||||
nir_metadata_dominance);
|
||||
} else {
|
||||
nir_metadata_preserve(impl, nir_metadata_all);
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
/** Lowers ray-tracing shader I/O and scratch access
|
||||
*
|
||||
* SPV_KHR_ray_tracing adds three new types of I/O, each of which need their
|
||||
* own bit of special care:
|
||||
*
|
||||
* - Shader payload data: This is represented by the IncomingCallableData
|
||||
* and IncomingRayPayload storage classes which are both represented by
|
||||
* nir_var_call_data in NIR. There is at most one of these per-shader and
|
||||
* they contain payload data passed down the stack from the parent shader
|
||||
* when it calls executeCallable() or traceRay(). In our implementation,
|
||||
* the actual storage lives in the calling shader's scratch space and we're
|
||||
* passed a pointer to it.
|
||||
*
|
||||
* - Hit attribute data: This is represented by the HitAttribute storage
|
||||
* class in SPIR-V and nir_var_ray_hit_attrib in NIR. For triangle
|
||||
* geometry, it's supposed to contain two floats which are the barycentric
|
||||
* coordinates. For AABS/procedural geometry, it contains the hit data
|
||||
* written out by the intersection shader. In our implementation, it's a
|
||||
* 64-bit pointer which points either to the u/v area of the relevant
|
||||
* MemHit data structure or the space right after the HW ray stack entry.
|
||||
*
|
||||
* - Shader record buffer data: This allows read-only access to the data
|
||||
* stored in the SBT right after the bindless shader handles. It's
|
||||
* effectively a UBO with a magic address. Coming out of spirv_to_nir,
|
||||
* we get a nir_intrinsic_load_shader_record_ptr which is cast to a
|
||||
* nir_var_mem_global deref and all access happens through that. The
|
||||
* shader_record_ptr system value is handled in brw_nir_lower_rt_intrinsics
|
||||
* and we assume nir_lower_explicit_io is called elsewhere thanks to
|
||||
* VK_KHR_buffer_device_address so there's really nothing to do here.
|
||||
*
|
||||
* We also handle lowering any remaining function_temp variables to scratch at
|
||||
* this point. This gets rid of any remaining arrays and also takes care of
|
||||
* the sending side of ray payloads where we pass pointers to a function_temp
|
||||
* variable down the call stack.
|
||||
*/
|
||||
static void
|
||||
lower_rt_io_and_scratch(nir_shader *nir)
|
||||
{
|
||||
/* First, we to ensure all the I/O variables have explicit types. Because
|
||||
* these are shader-internal and don't come in from outside, they don't
|
||||
* have an explicit memory layout and we have to assign them one.
|
||||
*/
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
|
||||
nir_var_function_temp |
|
||||
nir_var_shader_call_data |
|
||||
nir_var_ray_hit_attrib,
|
||||
glsl_get_natural_size_align_bytes);
|
||||
|
||||
/* Now patch any derefs to I/O vars */
|
||||
NIR_PASS_V(nir, lower_rt_io_derefs);
|
||||
|
||||
/* Finally, lower any remaining function_temp, mem_constant, or
|
||||
* ray_hit_attrib access to 64-bit global memory access.
|
||||
*/
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io,
|
||||
nir_var_function_temp |
|
||||
nir_var_mem_constant |
|
||||
nir_var_ray_hit_attrib,
|
||||
nir_address_format_64bit_global);
|
||||
}
|
||||
|
||||
static void
|
||||
build_terminate_ray(nir_builder *b)
|
||||
{
|
||||
nir_def *skip_closest_hit = nir_test_mask(b, nir_load_ray_flags(b),
|
||||
BRW_RT_RAY_FLAG_SKIP_CLOSEST_HIT_SHADER);
|
||||
nir_push_if(b, skip_closest_hit);
|
||||
{
|
||||
/* The shader that calls traceRay() is unable to access any ray hit
|
||||
* information except for that which is explicitly written into the ray
|
||||
* payload by shaders invoked during the trace. If there's no closest-
|
||||
* hit shader, then accepting the hit has no observable effect; it's
|
||||
* just extra memory traffic for no reason.
|
||||
*/
|
||||
brw_nir_btd_return(b);
|
||||
nir_jump(b, nir_jump_halt);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
/* The closest hit shader is in the same shader group as the any-hit
|
||||
* shader that we're currently in. We can get the address for its SBT
|
||||
* handle by looking at the shader record pointer and subtracting the
|
||||
* size of a SBT handle. The BINDLESS_SHADER_RECORD for a closest hit
|
||||
* shader is the first one in the SBT handle.
|
||||
*/
|
||||
nir_def *closest_hit =
|
||||
nir_iadd_imm(b, nir_load_shader_record_ptr(b),
|
||||
-BRW_RT_SBT_HANDLE_SIZE);
|
||||
|
||||
brw_nir_rt_commit_hit(b);
|
||||
brw_nir_btd_spawn(b, closest_hit);
|
||||
nir_jump(b, nir_jump_halt);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
/** Lowers away ray walk intrinsics
|
||||
*
|
||||
* This lowers terminate_ray, ignore_ray_intersection, and the NIR-specific
|
||||
* accept_ray_intersection intrinsics to the appropriate Intel-specific
|
||||
* intrinsics.
|
||||
*/
|
||||
static bool
|
||||
lower_ray_walk_intrinsics(nir_shader *shader,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
assert(shader->info.stage == MESA_SHADER_ANY_HIT ||
|
||||
shader->info.stage == MESA_SHADER_INTERSECTION);
|
||||
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
||||
|
||||
nir_builder b = nir_builder_create(impl);
|
||||
|
||||
bool progress = false;
|
||||
nir_foreach_block_safe(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_ignore_ray_intersection: {
|
||||
b.cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
/* We put the newly emitted code inside a dummy if because it's
|
||||
* going to contain a jump instruction and we don't want to deal
|
||||
* with that mess here. It'll get dealt with by our control-flow
|
||||
* optimization passes.
|
||||
*/
|
||||
nir_push_if(&b, nir_imm_true(&b));
|
||||
nir_trace_ray_intel(&b,
|
||||
nir_load_btd_global_arg_addr_intel(&b),
|
||||
nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT),
|
||||
nir_imm_int(&b, GEN_RT_TRACE_RAY_CONTINUE),
|
||||
.synchronous = false);
|
||||
nir_jump(&b, nir_jump_halt);
|
||||
nir_pop_if(&b, NULL);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_accept_ray_intersection: {
|
||||
b.cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_def *terminate = nir_test_mask(&b, nir_load_ray_flags(&b),
|
||||
BRW_RT_RAY_FLAG_TERMINATE_ON_FIRST_HIT);
|
||||
nir_push_if(&b, terminate);
|
||||
{
|
||||
build_terminate_ray(&b);
|
||||
}
|
||||
nir_push_else(&b, NULL);
|
||||
{
|
||||
nir_trace_ray_intel(&b,
|
||||
nir_load_btd_global_arg_addr_intel(&b),
|
||||
nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT),
|
||||
nir_imm_int(&b, GEN_RT_TRACE_RAY_COMMIT),
|
||||
.synchronous = false);
|
||||
nir_jump(&b, nir_jump_halt);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_terminate_ray: {
|
||||
b.cursor = nir_instr_remove(&intrin->instr);
|
||||
build_terminate_ray(&b);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (progress) {
|
||||
nir_metadata_preserve(impl, nir_metadata_none);
|
||||
} else {
|
||||
nir_metadata_preserve(impl, nir_metadata_all);
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_lower_raygen(nir_shader *nir)
|
||||
{
|
||||
assert(nir->info.stage == MESA_SHADER_RAYGEN);
|
||||
NIR_PASS_V(nir, brw_nir_lower_shader_returns);
|
||||
lower_rt_io_and_scratch(nir);
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_lower_any_hit(nir_shader *nir, const struct intel_device_info *devinfo)
|
||||
{
|
||||
assert(nir->info.stage == MESA_SHADER_ANY_HIT);
|
||||
NIR_PASS_V(nir, brw_nir_lower_shader_returns);
|
||||
NIR_PASS_V(nir, lower_ray_walk_intrinsics, devinfo);
|
||||
lower_rt_io_and_scratch(nir);
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_lower_closest_hit(nir_shader *nir)
|
||||
{
|
||||
assert(nir->info.stage == MESA_SHADER_CLOSEST_HIT);
|
||||
NIR_PASS_V(nir, brw_nir_lower_shader_returns);
|
||||
lower_rt_io_and_scratch(nir);
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_lower_miss(nir_shader *nir)
|
||||
{
|
||||
assert(nir->info.stage == MESA_SHADER_MISS);
|
||||
NIR_PASS_V(nir, brw_nir_lower_shader_returns);
|
||||
lower_rt_io_and_scratch(nir);
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_lower_callable(nir_shader *nir)
|
||||
{
|
||||
assert(nir->info.stage == MESA_SHADER_CALLABLE);
|
||||
NIR_PASS_V(nir, brw_nir_lower_shader_returns);
|
||||
lower_rt_io_and_scratch(nir);
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_lower_combined_intersection_any_hit(nir_shader *intersection,
|
||||
const nir_shader *any_hit,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
assert(intersection->info.stage == MESA_SHADER_INTERSECTION);
|
||||
assert(any_hit == NULL || any_hit->info.stage == MESA_SHADER_ANY_HIT);
|
||||
NIR_PASS_V(intersection, brw_nir_lower_shader_returns);
|
||||
NIR_PASS_V(intersection, brw_nir_lower_intersection_shader,
|
||||
any_hit, devinfo);
|
||||
NIR_PASS_V(intersection, lower_ray_walk_intrinsics, devinfo);
|
||||
lower_rt_io_and_scratch(intersection);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
build_load_uniform(nir_builder *b, unsigned offset,
|
||||
unsigned num_components, unsigned bit_size)
|
||||
{
|
||||
return nir_load_uniform(b, num_components, bit_size, nir_imm_int(b, 0),
|
||||
.base = offset,
|
||||
.range = num_components * bit_size / 8);
|
||||
}
|
||||
|
||||
#define load_trampoline_param(b, name, num_components, bit_size) \
|
||||
build_load_uniform((b), offsetof(struct brw_rt_raygen_trampoline_params, name), \
|
||||
(num_components), (bit_size))
|
||||
|
||||
nir_shader *
|
||||
brw_nir_create_raygen_trampoline(const struct brw_compiler *compiler,
|
||||
void *mem_ctx)
|
||||
{
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
const nir_shader_compiler_options *nir_options =
|
||||
compiler->nir_options[MESA_SHADER_COMPUTE];
|
||||
|
||||
STATIC_ASSERT(sizeof(struct brw_rt_raygen_trampoline_params) == 32);
|
||||
|
||||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
|
||||
nir_options,
|
||||
"RT Ray-Gen Trampoline");
|
||||
ralloc_steal(mem_ctx, b.shader);
|
||||
|
||||
b.shader->info.workgroup_size_variable = true;
|
||||
|
||||
/* The RT global data and raygen BINDLESS_SHADER_RECORD addresses are
|
||||
* passed in as push constants in the first register. We deal with the
|
||||
* raygen BSR address here; the global data we'll deal with later.
|
||||
*/
|
||||
b.shader->num_uniforms = 32;
|
||||
nir_def *raygen_param_bsr_addr =
|
||||
load_trampoline_param(&b, raygen_bsr_addr, 1, 64);
|
||||
nir_def *is_indirect =
|
||||
nir_i2b(&b, load_trampoline_param(&b, is_indirect, 1, 8));
|
||||
nir_def *local_shift =
|
||||
nir_u2u32(&b, load_trampoline_param(&b, local_group_size_log2, 3, 8));
|
||||
|
||||
nir_def *raygen_indirect_bsr_addr;
|
||||
nir_push_if(&b, is_indirect);
|
||||
{
|
||||
raygen_indirect_bsr_addr =
|
||||
nir_load_global_constant(&b, raygen_param_bsr_addr,
|
||||
8 /* align */,
|
||||
1 /* components */,
|
||||
64 /* bit_size */);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
nir_def *raygen_bsr_addr =
|
||||
nir_if_phi(&b, raygen_indirect_bsr_addr, raygen_param_bsr_addr);
|
||||
|
||||
nir_def *global_id = nir_load_workgroup_id_zero_base(&b);
|
||||
nir_def *simd_channel = nir_load_subgroup_invocation(&b);
|
||||
nir_def *local_x =
|
||||
nir_ubfe(&b, simd_channel, nir_imm_int(&b, 0),
|
||||
nir_channel(&b, local_shift, 0));
|
||||
nir_def *local_y =
|
||||
nir_ubfe(&b, simd_channel, nir_channel(&b, local_shift, 0),
|
||||
nir_channel(&b, local_shift, 1));
|
||||
nir_def *local_z =
|
||||
nir_ubfe(&b, simd_channel,
|
||||
nir_iadd(&b, nir_channel(&b, local_shift, 0),
|
||||
nir_channel(&b, local_shift, 1)),
|
||||
nir_channel(&b, local_shift, 2));
|
||||
nir_def *launch_id =
|
||||
nir_iadd(&b, nir_ishl(&b, global_id, local_shift),
|
||||
nir_vec3(&b, local_x, local_y, local_z));
|
||||
|
||||
nir_def *launch_size = nir_load_ray_launch_size(&b);
|
||||
nir_push_if(&b, nir_ball(&b, nir_ult(&b, launch_id, launch_size)));
|
||||
{
|
||||
nir_store_global(&b, brw_nir_rt_sw_hotzone_addr(&b, devinfo), 16,
|
||||
nir_vec4(&b, nir_imm_int(&b, 0), /* Stack ptr */
|
||||
nir_channel(&b, launch_id, 0),
|
||||
nir_channel(&b, launch_id, 1),
|
||||
nir_channel(&b, launch_id, 2)),
|
||||
0xf /* write mask */);
|
||||
|
||||
brw_nir_btd_spawn(&b, raygen_bsr_addr);
|
||||
}
|
||||
nir_push_else(&b, NULL);
|
||||
{
|
||||
/* Even though these invocations aren't being used for anything, the
|
||||
* hardware allocated stack IDs for them. They need to retire them.
|
||||
*/
|
||||
brw_nir_btd_retire(&b);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
nir_shader *nir = b.shader;
|
||||
nir->info.name = ralloc_strdup(nir, "RT: TraceRay trampoline");
|
||||
nir_validate_shader(nir, "in brw_nir_create_raygen_trampoline");
|
||||
|
||||
struct brw_nir_compiler_opts opts = {};
|
||||
brw_preprocess_nir(compiler, nir, &opts);
|
||||
|
||||
NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);
|
||||
|
||||
b = nir_builder_create(nir_shader_get_entrypoint(b.shader));
|
||||
/* brw_nir_lower_rt_intrinsics will leave us with a btd_global_arg_addr
|
||||
* intrinsic which doesn't exist in compute shaders. We also created one
|
||||
* above when we generated the BTD spawn intrinsic. Now we go through and
|
||||
* replace them with a uniform load.
|
||||
*/
|
||||
nir_foreach_block(block, b.impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic != nir_intrinsic_load_btd_global_arg_addr_intel)
|
||||
continue;
|
||||
|
||||
b.cursor = nir_before_instr(&intrin->instr);
|
||||
nir_def *global_arg_addr =
|
||||
load_trampoline_param(&b, rt_disp_globals_addr, 1, 64);
|
||||
nir_def_rewrite_uses(&intrin->def,
|
||||
global_arg_addr);
|
||||
nir_instr_remove(instr);
|
||||
}
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
|
||||
|
||||
const bool is_scalar = true;
|
||||
brw_nir_optimize(nir, is_scalar, devinfo);
|
||||
|
||||
return nir;
|
||||
}
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2020 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_NIR_RT_H
|
||||
#define BRW_NIR_RT_H
|
||||
|
||||
#include "brw_nir.h"
|
||||
#include "brw_rt.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void brw_nir_lower_raygen(nir_shader *nir);
|
||||
void brw_nir_lower_any_hit(nir_shader *nir,
|
||||
const struct intel_device_info *devinfo);
|
||||
void brw_nir_lower_closest_hit(nir_shader *nir);
|
||||
void brw_nir_lower_miss(nir_shader *nir);
|
||||
void brw_nir_lower_callable(nir_shader *nir);
|
||||
void brw_nir_lower_combined_intersection_any_hit(nir_shader *intersection,
|
||||
const nir_shader *any_hit,
|
||||
const struct intel_device_info *devinfo);
|
||||
|
||||
/* We reserve the first 16B of the stack for callee data pointers */
|
||||
#define BRW_BTD_STACK_RESUME_BSR_ADDR_OFFSET 0
|
||||
#define BRW_BTD_STACK_CALL_DATA_PTR_OFFSET 8
|
||||
#define BRW_BTD_STACK_CALLEE_DATA_SIZE 16
|
||||
|
||||
/* We require the stack to be 8B aligned at the start of a shader */
|
||||
#define BRW_BTD_STACK_ALIGN 8
|
||||
|
||||
bool brw_nir_lower_ray_queries(nir_shader *shader,
|
||||
const struct intel_device_info *devinfo);
|
||||
|
||||
void brw_nir_lower_shader_returns(nir_shader *shader);
|
||||
|
||||
bool brw_nir_lower_shader_calls(nir_shader *shader, struct brw_bs_prog_key *key);
|
||||
|
||||
void brw_nir_lower_rt_intrinsics(nir_shader *shader,
|
||||
const struct intel_device_info *devinfo);
|
||||
void brw_nir_lower_intersection_shader(nir_shader *intersection,
|
||||
const nir_shader *any_hit,
|
||||
const struct intel_device_info *devinfo);
|
||||
|
||||
nir_shader *
|
||||
brw_nir_create_raygen_trampoline(const struct brw_compiler *compiler,
|
||||
void *mem_ctx);
|
||||
nir_shader *
|
||||
brw_nir_create_trivial_return_shader(const struct brw_compiler *compiler,
|
||||
void *mem_ctx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BRW_NIR_RT_H */
|
||||
|
|
@ -1,990 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2020 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_NIR_RT_BUILDER_H
|
||||
#define BRW_NIR_RT_BUILDER_H
|
||||
|
||||
/* This file provides helpers to access memory based data structures that the
|
||||
* RT hardware reads/writes and their locations.
|
||||
*
|
||||
* See also "Memory Based Data Structures for Ray Tracing" (BSpec 47547) and
|
||||
* "Ray Tracing Address Computation for Memory Resident Structures" (BSpec
|
||||
* 47550).
|
||||
*/
|
||||
|
||||
#include "brw_rt.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
#define is_access_for_builder(b) \
|
||||
((b)->shader->info.stage == MESA_SHADER_FRAGMENT ? \
|
||||
ACCESS_INCLUDE_HELPERS : 0)
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_load(nir_builder *b, nir_def *addr, unsigned align,
|
||||
unsigned components, unsigned bit_size)
|
||||
{
|
||||
return nir_build_load_global(b, components, bit_size, addr,
|
||||
.align_mul = align,
|
||||
.access = is_access_for_builder(b));
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_store(nir_builder *b, nir_def *addr, unsigned align,
|
||||
nir_def *value, unsigned write_mask)
|
||||
{
|
||||
nir_build_store_global(b, value, addr,
|
||||
.align_mul = align,
|
||||
.write_mask = (write_mask) &
|
||||
BITFIELD_MASK(value->num_components),
|
||||
.access = is_access_for_builder(b));
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_load_const(nir_builder *b, unsigned components,
|
||||
nir_def *addr, nir_def *pred)
|
||||
{
|
||||
return nir_load_global_const_block_intel(b, components, addr, pred);
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_load_btd_dss_id(nir_builder *b)
|
||||
{
|
||||
return nir_load_topology_id_intel(b, .base = BRW_TOPOLOGY_ID_DSS);
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_load_num_simd_lanes_per_dss(nir_builder *b,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
return nir_imm_int(b, devinfo->num_thread_per_eu *
|
||||
devinfo->max_eus_per_subslice *
|
||||
16 /* The RT computation is based off SIMD16 */);
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_load_eu_thread_simd(nir_builder *b)
|
||||
{
|
||||
return nir_load_topology_id_intel(b, .base = BRW_TOPOLOGY_ID_EU_THREAD_SIMD);
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_async_stack_id(nir_builder *b)
|
||||
{
|
||||
return nir_iadd(b, nir_umul_32x16(b, nir_load_ray_num_dss_rt_stacks_intel(b),
|
||||
brw_load_btd_dss_id(b)),
|
||||
nir_load_btd_stack_id_intel(b));
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_sync_stack_id(nir_builder *b)
|
||||
{
|
||||
return brw_load_eu_thread_simd(b);
|
||||
}
|
||||
|
||||
/* We have our own load/store scratch helpers because they emit a global
|
||||
* memory read or write based on the scratch_base_ptr system value rather
|
||||
* than a load/store_scratch intrinsic.
|
||||
*/
|
||||
static inline nir_def *
|
||||
brw_nir_rt_load_scratch(nir_builder *b, uint32_t offset, unsigned align,
|
||||
unsigned num_components, unsigned bit_size)
|
||||
{
|
||||
nir_def *addr =
|
||||
nir_iadd_imm(b, nir_load_scratch_base_ptr(b, 1, 64, 1), offset);
|
||||
return brw_nir_rt_load(b, addr, MIN2(align, BRW_BTD_STACK_ALIGN),
|
||||
num_components, bit_size);
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_store_scratch(nir_builder *b, uint32_t offset, unsigned align,
|
||||
nir_def *value, nir_component_mask_t write_mask)
|
||||
{
|
||||
nir_def *addr =
|
||||
nir_iadd_imm(b, nir_load_scratch_base_ptr(b, 1, 64, 1), offset);
|
||||
brw_nir_rt_store(b, addr, MIN2(align, BRW_BTD_STACK_ALIGN),
|
||||
value, write_mask);
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_btd_spawn(nir_builder *b, nir_def *record_addr)
|
||||
{
|
||||
nir_btd_spawn_intel(b, nir_load_btd_global_arg_addr_intel(b), record_addr);
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_btd_retire(nir_builder *b)
|
||||
{
|
||||
nir_btd_retire_intel(b);
|
||||
}
|
||||
|
||||
/** This is a pseudo-op which does a bindless return
|
||||
*
|
||||
* It loads the return address from the stack and calls btd_spawn to spawn the
|
||||
* resume shader.
|
||||
*/
|
||||
static inline void
|
||||
brw_nir_btd_return(struct nir_builder *b)
|
||||
{
|
||||
nir_def *resume_addr =
|
||||
brw_nir_rt_load_scratch(b, BRW_BTD_STACK_RESUME_BSR_ADDR_OFFSET,
|
||||
8 /* align */, 1, 64);
|
||||
brw_nir_btd_spawn(b, resume_addr);
|
||||
}
|
||||
|
||||
static inline void
|
||||
assert_def_size(nir_def *def, unsigned num_components, unsigned bit_size)
|
||||
{
|
||||
assert(def->num_components == num_components);
|
||||
assert(def->bit_size == bit_size);
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_num_rt_stacks(nir_builder *b,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
return nir_imul_imm(b, nir_load_ray_num_dss_rt_stacks_intel(b),
|
||||
intel_device_info_dual_subslice_id_bound(devinfo));
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_sw_hotzone_addr(nir_builder *b,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
nir_def *offset32 =
|
||||
nir_imul_imm(b, brw_nir_rt_async_stack_id(b),
|
||||
BRW_RT_SIZEOF_HOTZONE);
|
||||
|
||||
offset32 = nir_iadd(b, offset32, nir_ineg(b,
|
||||
nir_imul_imm(b, brw_nir_num_rt_stacks(b, devinfo),
|
||||
BRW_RT_SIZEOF_HOTZONE)));
|
||||
|
||||
return nir_iadd(b, nir_load_ray_base_mem_addr_intel(b),
|
||||
nir_i2i64(b, offset32));
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_sync_stack_addr(nir_builder *b,
|
||||
nir_def *base_mem_addr,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
/* For Ray queries (Synchronous Ray Tracing), the formula is similar but
|
||||
* goes down from rtMemBasePtr :
|
||||
*
|
||||
* syncBase = RTDispatchGlobals.rtMemBasePtr
|
||||
* - (DSSID * NUM_SIMD_LANES_PER_DSS + SyncStackID + 1)
|
||||
* * syncStackSize
|
||||
*
|
||||
* We assume that we can calculate a 32-bit offset first and then add it
|
||||
* to the 64-bit base address at the end.
|
||||
*/
|
||||
nir_def *offset32 =
|
||||
nir_imul(b,
|
||||
nir_iadd(b,
|
||||
nir_imul(b, brw_load_btd_dss_id(b),
|
||||
brw_nir_rt_load_num_simd_lanes_per_dss(b, devinfo)),
|
||||
nir_iadd_imm(b, brw_nir_rt_sync_stack_id(b), 1)),
|
||||
nir_imm_int(b, BRW_RT_SIZEOF_RAY_QUERY));
|
||||
return nir_isub(b, base_mem_addr, nir_u2u64(b, offset32));
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_stack_addr(nir_builder *b)
|
||||
{
|
||||
/* From the BSpec "Address Computation for Memory Based Data Structures:
|
||||
* Ray and TraversalStack (Async Ray Tracing)":
|
||||
*
|
||||
* stackBase = RTDispatchGlobals.rtMemBasePtr
|
||||
* + (DSSID * RTDispatchGlobals.numDSSRTStacks + stackID)
|
||||
* * RTDispatchGlobals.stackSizePerRay // 64B aligned
|
||||
*
|
||||
* We assume that we can calculate a 32-bit offset first and then add it
|
||||
* to the 64-bit base address at the end.
|
||||
*/
|
||||
nir_def *offset32 =
|
||||
nir_imul(b, brw_nir_rt_async_stack_id(b),
|
||||
nir_load_ray_hw_stack_size_intel(b));
|
||||
return nir_iadd(b, nir_load_ray_base_mem_addr_intel(b),
|
||||
nir_u2u64(b, offset32));
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_mem_hit_addr_from_addr(nir_builder *b,
|
||||
nir_def *stack_addr,
|
||||
bool committed)
|
||||
{
|
||||
return nir_iadd_imm(b, stack_addr, committed ? 0 : BRW_RT_SIZEOF_HIT_INFO);
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_mem_hit_addr(nir_builder *b, bool committed)
|
||||
{
|
||||
return nir_iadd_imm(b, brw_nir_rt_stack_addr(b),
|
||||
committed ? 0 : BRW_RT_SIZEOF_HIT_INFO);
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_hit_attrib_data_addr(nir_builder *b)
|
||||
{
|
||||
return nir_iadd_imm(b, brw_nir_rt_stack_addr(b),
|
||||
BRW_RT_OFFSETOF_HIT_ATTRIB_DATA);
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_mem_ray_addr(nir_builder *b,
|
||||
nir_def *stack_addr,
|
||||
enum brw_rt_bvh_level bvh_level)
|
||||
{
|
||||
/* From the BSpec "Address Computation for Memory Based Data Structures:
|
||||
* Ray and TraversalStack (Async Ray Tracing)":
|
||||
*
|
||||
* rayBase = stackBase + sizeof(HitInfo) * 2 // 64B aligned
|
||||
* rayPtr = rayBase + bvhLevel * sizeof(Ray); // 64B aligned
|
||||
*
|
||||
* In Vulkan, we always have exactly two levels of BVH: World and Object.
|
||||
*/
|
||||
uint32_t offset = BRW_RT_SIZEOF_HIT_INFO * 2 +
|
||||
bvh_level * BRW_RT_SIZEOF_RAY;
|
||||
return nir_iadd_imm(b, stack_addr, offset);
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_sw_stack_addr(nir_builder *b,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
nir_def *addr = nir_load_ray_base_mem_addr_intel(b);
|
||||
|
||||
nir_def *offset32 = nir_imul(b, brw_nir_num_rt_stacks(b, devinfo),
|
||||
nir_load_ray_hw_stack_size_intel(b));
|
||||
addr = nir_iadd(b, addr, nir_u2u64(b, offset32));
|
||||
|
||||
nir_def *offset_in_stack =
|
||||
nir_imul(b, nir_u2u64(b, brw_nir_rt_async_stack_id(b)),
|
||||
nir_u2u64(b, nir_load_ray_sw_stack_size_intel(b)));
|
||||
|
||||
return nir_iadd(b, addr, offset_in_stack);
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
nir_unpack_64_4x16_split_z(nir_builder *b, nir_def *val)
|
||||
{
|
||||
return nir_unpack_32_2x16_split_x(b, nir_unpack_64_2x32_split_y(b, val));
|
||||
}
|
||||
|
||||
struct brw_nir_rt_globals_defs {
|
||||
nir_def *base_mem_addr;
|
||||
nir_def *call_stack_handler_addr;
|
||||
nir_def *hw_stack_size;
|
||||
nir_def *num_dss_rt_stacks;
|
||||
nir_def *hit_sbt_addr;
|
||||
nir_def *hit_sbt_stride;
|
||||
nir_def *miss_sbt_addr;
|
||||
nir_def *miss_sbt_stride;
|
||||
nir_def *sw_stack_size;
|
||||
nir_def *launch_size;
|
||||
nir_def *call_sbt_addr;
|
||||
nir_def *call_sbt_stride;
|
||||
nir_def *resume_sbt_addr;
|
||||
};
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_load_globals_addr(nir_builder *b,
|
||||
struct brw_nir_rt_globals_defs *defs,
|
||||
nir_def *addr)
|
||||
{
|
||||
nir_def *data;
|
||||
data = brw_nir_rt_load_const(b, 16, addr, nir_imm_true(b));
|
||||
defs->base_mem_addr = nir_pack_64_2x32(b, nir_trim_vector(b, data, 2));
|
||||
|
||||
defs->call_stack_handler_addr =
|
||||
nir_pack_64_2x32(b, nir_channels(b, data, 0x3 << 2));
|
||||
|
||||
defs->hw_stack_size = nir_channel(b, data, 4);
|
||||
defs->num_dss_rt_stacks = nir_iand_imm(b, nir_channel(b, data, 5), 0xffff);
|
||||
defs->hit_sbt_addr =
|
||||
nir_pack_64_2x32_split(b, nir_channel(b, data, 8),
|
||||
nir_extract_i16(b, nir_channel(b, data, 9),
|
||||
nir_imm_int(b, 0)));
|
||||
defs->hit_sbt_stride =
|
||||
nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 9));
|
||||
defs->miss_sbt_addr =
|
||||
nir_pack_64_2x32_split(b, nir_channel(b, data, 10),
|
||||
nir_extract_i16(b, nir_channel(b, data, 11),
|
||||
nir_imm_int(b, 0)));
|
||||
defs->miss_sbt_stride =
|
||||
nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 11));
|
||||
defs->sw_stack_size = nir_channel(b, data, 12);
|
||||
defs->launch_size = nir_channels(b, data, 0x7u << 13);
|
||||
|
||||
data = brw_nir_rt_load_const(b, 8, nir_iadd_imm(b, addr, 64), nir_imm_true(b));
|
||||
defs->call_sbt_addr =
|
||||
nir_pack_64_2x32_split(b, nir_channel(b, data, 0),
|
||||
nir_extract_i16(b, nir_channel(b, data, 1),
|
||||
nir_imm_int(b, 0)));
|
||||
defs->call_sbt_stride =
|
||||
nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 1));
|
||||
|
||||
defs->resume_sbt_addr =
|
||||
nir_pack_64_2x32(b, nir_channels(b, data, 0x3 << 2));
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_load_globals(nir_builder *b,
|
||||
struct brw_nir_rt_globals_defs *defs)
|
||||
{
|
||||
brw_nir_rt_load_globals_addr(b, defs, nir_load_btd_global_arg_addr_intel(b));
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_unpack_leaf_ptr(nir_builder *b, nir_def *vec2)
|
||||
{
|
||||
/* Hit record leaf pointers are 42-bit and assumed to be in 64B chunks.
|
||||
* This leaves 22 bits at the top for other stuff.
|
||||
*/
|
||||
nir_def *ptr64 = nir_imul_imm(b, nir_pack_64_2x32(b, vec2), 64);
|
||||
|
||||
/* The top 16 bits (remember, we shifted by 6 already) contain garbage
|
||||
* that we need to get rid of.
|
||||
*/
|
||||
nir_def *ptr_lo = nir_unpack_64_2x32_split_x(b, ptr64);
|
||||
nir_def *ptr_hi = nir_unpack_64_2x32_split_y(b, ptr64);
|
||||
ptr_hi = nir_extract_i16(b, ptr_hi, nir_imm_int(b, 0));
|
||||
return nir_pack_64_2x32_split(b, ptr_lo, ptr_hi);
|
||||
}
|
||||
|
||||
/**
|
||||
* MemHit memory layout (BSpec 47547) :
|
||||
*
|
||||
* name bits description
|
||||
* - t 32 hit distance of current hit (or initial traversal distance)
|
||||
* - u 32 barycentric hit coordinates
|
||||
* - v 32 barycentric hit coordinates
|
||||
* - primIndexDelta 16 prim index delta for compressed meshlets and quads
|
||||
* - valid 1 set if there is a hit
|
||||
* - leafType 3 type of node primLeafPtr is pointing to
|
||||
* - primLeafIndex 4 index of the hit primitive inside the leaf
|
||||
* - bvhLevel 3 the instancing level at which the hit occured
|
||||
* - frontFace 1 whether we hit the front-facing side of a triangle (also used to pass opaque flag when calling intersection shaders)
|
||||
* - pad0 4 unused bits
|
||||
* - primLeafPtr 42 pointer to BVH leaf node (multiple of 64 bytes)
|
||||
* - hitGroupRecPtr0 22 LSB of hit group record of the hit triangle (multiple of 16 bytes)
|
||||
* - instLeafPtr 42 pointer to BVH instance leaf node (in multiple of 64 bytes)
|
||||
* - hitGroupRecPtr1 22 MSB of hit group record of the hit triangle (multiple of 32 bytes)
|
||||
*/
|
||||
struct brw_nir_rt_mem_hit_defs {
|
||||
nir_def *t;
|
||||
nir_def *tri_bary; /**< Only valid for triangle geometry */
|
||||
nir_def *aabb_hit_kind; /**< Only valid for AABB geometry */
|
||||
nir_def *valid;
|
||||
nir_def *leaf_type;
|
||||
nir_def *prim_index_delta;
|
||||
nir_def *prim_leaf_index;
|
||||
nir_def *bvh_level;
|
||||
nir_def *front_face;
|
||||
nir_def *done; /**< Only for ray queries */
|
||||
nir_def *prim_leaf_ptr;
|
||||
nir_def *inst_leaf_ptr;
|
||||
};
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_load_mem_hit_from_addr(nir_builder *b,
|
||||
struct brw_nir_rt_mem_hit_defs *defs,
|
||||
nir_def *stack_addr,
|
||||
bool committed)
|
||||
{
|
||||
nir_def *hit_addr =
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, committed);
|
||||
|
||||
nir_def *data = brw_nir_rt_load(b, hit_addr, 16, 4, 32);
|
||||
defs->t = nir_channel(b, data, 0);
|
||||
defs->aabb_hit_kind = nir_channel(b, data, 1);
|
||||
defs->tri_bary = nir_channels(b, data, 0x6);
|
||||
nir_def *bitfield = nir_channel(b, data, 3);
|
||||
defs->prim_index_delta =
|
||||
nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 0), nir_imm_int(b, 16));
|
||||
defs->valid = nir_i2b(b, nir_iand_imm(b, bitfield, 1u << 16));
|
||||
defs->leaf_type =
|
||||
nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 17), nir_imm_int(b, 3));
|
||||
defs->prim_leaf_index =
|
||||
nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 20), nir_imm_int(b, 4));
|
||||
defs->bvh_level =
|
||||
nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 24), nir_imm_int(b, 3));
|
||||
defs->front_face = nir_i2b(b, nir_iand_imm(b, bitfield, 1 << 27));
|
||||
defs->done = nir_i2b(b, nir_iand_imm(b, bitfield, 1 << 28));
|
||||
|
||||
data = brw_nir_rt_load(b, nir_iadd_imm(b, hit_addr, 16), 16, 4, 32);
|
||||
defs->prim_leaf_ptr =
|
||||
brw_nir_rt_unpack_leaf_ptr(b, nir_channels(b, data, 0x3 << 0));
|
||||
defs->inst_leaf_ptr =
|
||||
brw_nir_rt_unpack_leaf_ptr(b, nir_channels(b, data, 0x3 << 2));
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_load_mem_hit(nir_builder *b,
|
||||
struct brw_nir_rt_mem_hit_defs *defs,
|
||||
bool committed)
|
||||
{
|
||||
brw_nir_rt_load_mem_hit_from_addr(b, defs, brw_nir_rt_stack_addr(b),
|
||||
committed);
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_memcpy_global(nir_builder *b,
|
||||
nir_def *dst_addr, uint32_t dst_align,
|
||||
nir_def *src_addr, uint32_t src_align,
|
||||
uint32_t size)
|
||||
{
|
||||
/* We're going to copy in 16B chunks */
|
||||
assert(size % 16 == 0);
|
||||
dst_align = MIN2(dst_align, 16);
|
||||
src_align = MIN2(src_align, 16);
|
||||
|
||||
for (unsigned offset = 0; offset < size; offset += 16) {
|
||||
nir_def *data =
|
||||
brw_nir_rt_load(b, nir_iadd_imm(b, src_addr, offset), 16,
|
||||
4, 32);
|
||||
brw_nir_rt_store(b, nir_iadd_imm(b, dst_addr, offset), 16,
|
||||
data, 0xf /* write_mask */);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_memclear_global(nir_builder *b,
|
||||
nir_def *dst_addr, uint32_t dst_align,
|
||||
uint32_t size)
|
||||
{
|
||||
/* We're going to copy in 16B chunks */
|
||||
assert(size % 16 == 0);
|
||||
dst_align = MIN2(dst_align, 16);
|
||||
|
||||
nir_def *zero = nir_imm_ivec4(b, 0, 0, 0, 0);
|
||||
for (unsigned offset = 0; offset < size; offset += 16) {
|
||||
brw_nir_rt_store(b, nir_iadd_imm(b, dst_addr, offset), dst_align,
|
||||
zero, 0xf /* write_mask */);
|
||||
}
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_query_done(nir_builder *b, nir_def *stack_addr)
|
||||
{
|
||||
struct brw_nir_rt_mem_hit_defs hit_in = {};
|
||||
brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, stack_addr,
|
||||
false /* committed */);
|
||||
|
||||
return hit_in.done;
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_set_dword_bit_at(nir_builder *b,
|
||||
nir_def *addr,
|
||||
uint32_t addr_offset,
|
||||
uint32_t bit)
|
||||
{
|
||||
nir_def *dword_addr = nir_iadd_imm(b, addr, addr_offset);
|
||||
nir_def *dword = brw_nir_rt_load(b, dword_addr, 4, 1, 32);
|
||||
brw_nir_rt_store(b, dword_addr, 4, nir_ior_imm(b, dword, 1u << bit), 0x1);
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_query_mark_done(nir_builder *b, nir_def *stack_addr)
|
||||
{
|
||||
brw_nir_rt_set_dword_bit_at(b,
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr,
|
||||
false /* committed */),
|
||||
4 * 3 /* dword offset */, 28 /* bit */);
|
||||
}
|
||||
|
||||
/* This helper clears the 3rd dword of the MemHit structure where the valid
|
||||
* bit is located.
|
||||
*/
|
||||
static inline void
|
||||
brw_nir_rt_query_mark_init(nir_builder *b, nir_def *stack_addr)
|
||||
{
|
||||
nir_def *dword_addr;
|
||||
|
||||
for (uint32_t i = 0; i < 2; i++) {
|
||||
dword_addr =
|
||||
nir_iadd_imm(b,
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr,
|
||||
i == 0 /* committed */),
|
||||
4 * 3 /* dword offset */);
|
||||
brw_nir_rt_store(b, dword_addr, 4, nir_imm_int(b, 0), 0x1);
|
||||
}
|
||||
}
|
||||
|
||||
/* This helper is pretty much a memcpy of uncommitted into committed hit
|
||||
* structure, just adding the valid bit.
|
||||
*/
|
||||
static inline void
|
||||
brw_nir_rt_commit_hit_addr(nir_builder *b, nir_def *stack_addr)
|
||||
{
|
||||
nir_def *dst_addr =
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, true /* committed */);
|
||||
nir_def *src_addr =
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, false /* committed */);
|
||||
|
||||
for (unsigned offset = 0; offset < BRW_RT_SIZEOF_HIT_INFO; offset += 16) {
|
||||
nir_def *data =
|
||||
brw_nir_rt_load(b, nir_iadd_imm(b, src_addr, offset), 16, 4, 32);
|
||||
|
||||
if (offset == 0) {
|
||||
data = nir_vec4(b,
|
||||
nir_channel(b, data, 0),
|
||||
nir_channel(b, data, 1),
|
||||
nir_channel(b, data, 2),
|
||||
nir_ior_imm(b,
|
||||
nir_channel(b, data, 3),
|
||||
0x1 << 16 /* valid */));
|
||||
|
||||
/* Also write the potential hit as we change it. */
|
||||
brw_nir_rt_store(b, nir_iadd_imm(b, src_addr, offset), 16,
|
||||
data, 0xf /* write_mask */);
|
||||
}
|
||||
|
||||
brw_nir_rt_store(b, nir_iadd_imm(b, dst_addr, offset), 16,
|
||||
data, 0xf /* write_mask */);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_commit_hit(nir_builder *b)
|
||||
{
|
||||
nir_def *stack_addr = brw_nir_rt_stack_addr(b);
|
||||
brw_nir_rt_commit_hit_addr(b, stack_addr);
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_generate_hit_addr(nir_builder *b, nir_def *stack_addr, nir_def *t_val)
|
||||
{
|
||||
nir_def *committed_addr =
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, true /* committed */);
|
||||
nir_def *potential_addr =
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, false /* committed */);
|
||||
|
||||
/* Set:
|
||||
*
|
||||
* potential.t = t_val;
|
||||
* potential.valid = true;
|
||||
*/
|
||||
nir_def *potential_hit_dwords_0_3 =
|
||||
brw_nir_rt_load(b, potential_addr, 16, 4, 32);
|
||||
potential_hit_dwords_0_3 =
|
||||
nir_vec4(b,
|
||||
t_val,
|
||||
nir_channel(b, potential_hit_dwords_0_3, 1),
|
||||
nir_channel(b, potential_hit_dwords_0_3, 2),
|
||||
nir_ior_imm(b, nir_channel(b, potential_hit_dwords_0_3, 3),
|
||||
(0x1 << 16) /* valid */));
|
||||
brw_nir_rt_store(b, potential_addr, 16, potential_hit_dwords_0_3, 0xf /* write_mask */);
|
||||
|
||||
/* Set:
|
||||
*
|
||||
* committed.t = t_val;
|
||||
* committed.u = 0.0f;
|
||||
* committed.v = 0.0f;
|
||||
* committed.valid = true;
|
||||
* committed.leaf_type = potential.leaf_type;
|
||||
* committed.bvh_level = BRW_RT_BVH_LEVEL_OBJECT;
|
||||
* committed.front_face = false;
|
||||
* committed.prim_leaf_index = 0;
|
||||
* committed.done = false;
|
||||
*/
|
||||
nir_def *committed_hit_dwords_0_3 =
|
||||
brw_nir_rt_load(b, committed_addr, 16, 4, 32);
|
||||
committed_hit_dwords_0_3 =
|
||||
nir_vec4(b,
|
||||
t_val,
|
||||
nir_imm_float(b, 0.0f),
|
||||
nir_imm_float(b, 0.0f),
|
||||
nir_ior_imm(b,
|
||||
nir_ior_imm(b, nir_channel(b, potential_hit_dwords_0_3, 3), 0x000e0000),
|
||||
(0x1 << 16) /* valid */ |
|
||||
(BRW_RT_BVH_LEVEL_OBJECT << 24) /* leaf_type */));
|
||||
brw_nir_rt_store(b, committed_addr, 16, committed_hit_dwords_0_3, 0xf /* write_mask */);
|
||||
|
||||
/* Set:
|
||||
*
|
||||
* committed.prim_leaf_ptr = potential.prim_leaf_ptr;
|
||||
* committed.inst_leaf_ptr = potential.inst_leaf_ptr;
|
||||
*/
|
||||
brw_nir_memcpy_global(b,
|
||||
nir_iadd_imm(b, committed_addr, 16), 16,
|
||||
nir_iadd_imm(b, potential_addr, 16), 16,
|
||||
16);
|
||||
}
|
||||
|
||||
struct brw_nir_rt_mem_ray_defs {
|
||||
nir_def *orig;
|
||||
nir_def *dir;
|
||||
nir_def *t_near;
|
||||
nir_def *t_far;
|
||||
nir_def *root_node_ptr;
|
||||
nir_def *ray_flags;
|
||||
nir_def *hit_group_sr_base_ptr;
|
||||
nir_def *hit_group_sr_stride;
|
||||
nir_def *miss_sr_ptr;
|
||||
nir_def *shader_index_multiplier;
|
||||
nir_def *inst_leaf_ptr;
|
||||
nir_def *ray_mask;
|
||||
};
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_store_mem_ray_query_at_addr(nir_builder *b,
|
||||
nir_def *ray_addr,
|
||||
const struct brw_nir_rt_mem_ray_defs *defs)
|
||||
{
|
||||
assert_def_size(defs->orig, 3, 32);
|
||||
assert_def_size(defs->dir, 3, 32);
|
||||
brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 0), 16,
|
||||
nir_vec4(b, nir_channel(b, defs->orig, 0),
|
||||
nir_channel(b, defs->orig, 1),
|
||||
nir_channel(b, defs->orig, 2),
|
||||
nir_channel(b, defs->dir, 0)),
|
||||
~0 /* write mask */);
|
||||
|
||||
assert_def_size(defs->t_near, 1, 32);
|
||||
assert_def_size(defs->t_far, 1, 32);
|
||||
brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 16), 16,
|
||||
nir_vec4(b, nir_channel(b, defs->dir, 1),
|
||||
nir_channel(b, defs->dir, 2),
|
||||
defs->t_near,
|
||||
defs->t_far),
|
||||
~0 /* write mask */);
|
||||
|
||||
assert_def_size(defs->root_node_ptr, 1, 64);
|
||||
assert_def_size(defs->ray_flags, 1, 16);
|
||||
brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 32), 16,
|
||||
nir_vec2(b, nir_unpack_64_2x32_split_x(b, defs->root_node_ptr),
|
||||
nir_pack_32_2x16_split(b,
|
||||
nir_unpack_64_4x16_split_z(b, defs->root_node_ptr),
|
||||
defs->ray_flags)),
|
||||
0x3 /* write mask */);
|
||||
|
||||
/* leaf_ptr is optional */
|
||||
nir_def *inst_leaf_ptr;
|
||||
if (defs->inst_leaf_ptr) {
|
||||
inst_leaf_ptr = defs->inst_leaf_ptr;
|
||||
} else {
|
||||
inst_leaf_ptr = nir_imm_int64(b, 0);
|
||||
}
|
||||
|
||||
assert_def_size(inst_leaf_ptr, 1, 64);
|
||||
assert_def_size(defs->ray_mask, 1, 32);
|
||||
brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 56), 8,
|
||||
nir_vec2(b, nir_unpack_64_2x32_split_x(b, inst_leaf_ptr),
|
||||
nir_pack_32_2x16_split(b,
|
||||
nir_unpack_64_4x16_split_z(b, inst_leaf_ptr),
|
||||
nir_unpack_32_2x16_split_x(b, defs->ray_mask))),
|
||||
~0 /* write mask */);
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_store_mem_ray(nir_builder *b,
|
||||
const struct brw_nir_rt_mem_ray_defs *defs,
|
||||
enum brw_rt_bvh_level bvh_level)
|
||||
{
|
||||
nir_def *ray_addr =
|
||||
brw_nir_rt_mem_ray_addr(b, brw_nir_rt_stack_addr(b), bvh_level);
|
||||
|
||||
assert_def_size(defs->orig, 3, 32);
|
||||
assert_def_size(defs->dir, 3, 32);
|
||||
brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 0), 16,
|
||||
nir_vec4(b, nir_channel(b, defs->orig, 0),
|
||||
nir_channel(b, defs->orig, 1),
|
||||
nir_channel(b, defs->orig, 2),
|
||||
nir_channel(b, defs->dir, 0)),
|
||||
~0 /* write mask */);
|
||||
|
||||
assert_def_size(defs->t_near, 1, 32);
|
||||
assert_def_size(defs->t_far, 1, 32);
|
||||
brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 16), 16,
|
||||
nir_vec4(b, nir_channel(b, defs->dir, 1),
|
||||
nir_channel(b, defs->dir, 2),
|
||||
defs->t_near,
|
||||
defs->t_far),
|
||||
~0 /* write mask */);
|
||||
|
||||
assert_def_size(defs->root_node_ptr, 1, 64);
|
||||
assert_def_size(defs->ray_flags, 1, 16);
|
||||
assert_def_size(defs->hit_group_sr_base_ptr, 1, 64);
|
||||
assert_def_size(defs->hit_group_sr_stride, 1, 16);
|
||||
brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 32), 16,
|
||||
nir_vec4(b, nir_unpack_64_2x32_split_x(b, defs->root_node_ptr),
|
||||
nir_pack_32_2x16_split(b,
|
||||
nir_unpack_64_4x16_split_z(b, defs->root_node_ptr),
|
||||
defs->ray_flags),
|
||||
nir_unpack_64_2x32_split_x(b, defs->hit_group_sr_base_ptr),
|
||||
nir_pack_32_2x16_split(b,
|
||||
nir_unpack_64_4x16_split_z(b, defs->hit_group_sr_base_ptr),
|
||||
defs->hit_group_sr_stride)),
|
||||
~0 /* write mask */);
|
||||
|
||||
/* leaf_ptr is optional */
|
||||
nir_def *inst_leaf_ptr;
|
||||
if (defs->inst_leaf_ptr) {
|
||||
inst_leaf_ptr = defs->inst_leaf_ptr;
|
||||
} else {
|
||||
inst_leaf_ptr = nir_imm_int64(b, 0);
|
||||
}
|
||||
|
||||
assert_def_size(defs->miss_sr_ptr, 1, 64);
|
||||
assert_def_size(defs->shader_index_multiplier, 1, 32);
|
||||
assert_def_size(inst_leaf_ptr, 1, 64);
|
||||
assert_def_size(defs->ray_mask, 1, 32);
|
||||
brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 48), 16,
|
||||
nir_vec4(b, nir_unpack_64_2x32_split_x(b, defs->miss_sr_ptr),
|
||||
nir_pack_32_2x16_split(b,
|
||||
nir_unpack_64_4x16_split_z(b, defs->miss_sr_ptr),
|
||||
nir_unpack_32_2x16_split_x(b,
|
||||
nir_ishl(b, defs->shader_index_multiplier,
|
||||
nir_imm_int(b, 8)))),
|
||||
nir_unpack_64_2x32_split_x(b, inst_leaf_ptr),
|
||||
nir_pack_32_2x16_split(b,
|
||||
nir_unpack_64_4x16_split_z(b, inst_leaf_ptr),
|
||||
nir_unpack_32_2x16_split_x(b, defs->ray_mask))),
|
||||
~0 /* write mask */);
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_load_mem_ray_from_addr(nir_builder *b,
|
||||
struct brw_nir_rt_mem_ray_defs *defs,
|
||||
nir_def *ray_base_addr,
|
||||
enum brw_rt_bvh_level bvh_level)
|
||||
{
|
||||
nir_def *ray_addr = brw_nir_rt_mem_ray_addr(b,
|
||||
ray_base_addr,
|
||||
bvh_level);
|
||||
|
||||
nir_def *data[4] = {
|
||||
brw_nir_rt_load(b, nir_iadd_imm(b, ray_addr, 0), 16, 4, 32),
|
||||
brw_nir_rt_load(b, nir_iadd_imm(b, ray_addr, 16), 16, 4, 32),
|
||||
brw_nir_rt_load(b, nir_iadd_imm(b, ray_addr, 32), 16, 4, 32),
|
||||
brw_nir_rt_load(b, nir_iadd_imm(b, ray_addr, 48), 16, 4, 32),
|
||||
};
|
||||
|
||||
defs->orig = nir_trim_vector(b, data[0], 3);
|
||||
defs->dir = nir_vec3(b, nir_channel(b, data[0], 3),
|
||||
nir_channel(b, data[1], 0),
|
||||
nir_channel(b, data[1], 1));
|
||||
defs->t_near = nir_channel(b, data[1], 2);
|
||||
defs->t_far = nir_channel(b, data[1], 3);
|
||||
defs->root_node_ptr =
|
||||
nir_pack_64_2x32_split(b, nir_channel(b, data[2], 0),
|
||||
nir_extract_i16(b, nir_channel(b, data[2], 1),
|
||||
nir_imm_int(b, 0)));
|
||||
defs->ray_flags =
|
||||
nir_unpack_32_2x16_split_y(b, nir_channel(b, data[2], 1));
|
||||
defs->hit_group_sr_base_ptr =
|
||||
nir_pack_64_2x32_split(b, nir_channel(b, data[2], 2),
|
||||
nir_extract_i16(b, nir_channel(b, data[2], 3),
|
||||
nir_imm_int(b, 0)));
|
||||
defs->hit_group_sr_stride =
|
||||
nir_unpack_32_2x16_split_y(b, nir_channel(b, data[2], 3));
|
||||
defs->miss_sr_ptr =
|
||||
nir_pack_64_2x32_split(b, nir_channel(b, data[3], 0),
|
||||
nir_extract_i16(b, nir_channel(b, data[3], 1),
|
||||
nir_imm_int(b, 0)));
|
||||
defs->shader_index_multiplier =
|
||||
nir_ushr(b, nir_unpack_32_2x16_split_y(b, nir_channel(b, data[3], 1)),
|
||||
nir_imm_int(b, 8));
|
||||
defs->inst_leaf_ptr =
|
||||
nir_pack_64_2x32_split(b, nir_channel(b, data[3], 2),
|
||||
nir_extract_i16(b, nir_channel(b, data[3], 3),
|
||||
nir_imm_int(b, 0)));
|
||||
defs->ray_mask =
|
||||
nir_unpack_32_2x16_split_y(b, nir_channel(b, data[3], 3));
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_load_mem_ray(nir_builder *b,
|
||||
struct brw_nir_rt_mem_ray_defs *defs,
|
||||
enum brw_rt_bvh_level bvh_level)
|
||||
{
|
||||
brw_nir_rt_load_mem_ray_from_addr(b, defs, brw_nir_rt_stack_addr(b),
|
||||
bvh_level);
|
||||
}
|
||||
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs {
|
||||
nir_def *shader_index;
|
||||
nir_def *contribution_to_hit_group_index;
|
||||
nir_def *world_to_object[4];
|
||||
nir_def *instance_id;
|
||||
nir_def *instance_index;
|
||||
nir_def *object_to_world[4];
|
||||
};
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_load_bvh_instance_leaf(nir_builder *b,
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs *defs,
|
||||
nir_def *leaf_addr)
|
||||
{
|
||||
nir_def *leaf_desc = brw_nir_rt_load(b, leaf_addr, 4, 2, 32);
|
||||
|
||||
defs->shader_index =
|
||||
nir_iand_imm(b, nir_channel(b, leaf_desc, 0), (1 << 24) - 1);
|
||||
defs->contribution_to_hit_group_index =
|
||||
nir_iand_imm(b, nir_channel(b, leaf_desc, 1), (1 << 24) - 1);
|
||||
|
||||
defs->world_to_object[0] =
|
||||
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 16), 4, 3, 32);
|
||||
defs->world_to_object[1] =
|
||||
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 28), 4, 3, 32);
|
||||
defs->world_to_object[2] =
|
||||
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 40), 4, 3, 32);
|
||||
/* The last column of the matrices is swapped between the two probably
|
||||
* because it makes it easier/faster for hardware somehow.
|
||||
*/
|
||||
defs->object_to_world[3] =
|
||||
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 52), 4, 3, 32);
|
||||
|
||||
nir_def *data =
|
||||
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 64), 4, 4, 32);
|
||||
defs->instance_id = nir_channel(b, data, 2);
|
||||
defs->instance_index = nir_channel(b, data, 3);
|
||||
|
||||
defs->object_to_world[0] =
|
||||
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 80), 4, 3, 32);
|
||||
defs->object_to_world[1] =
|
||||
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 92), 4, 3, 32);
|
||||
defs->object_to_world[2] =
|
||||
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 104), 4, 3, 32);
|
||||
defs->world_to_object[3] =
|
||||
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 116), 4, 3, 32);
|
||||
}
|
||||
|
||||
struct brw_nir_rt_bvh_primitive_leaf_defs {
|
||||
nir_def *shader_index;
|
||||
nir_def *geom_mask;
|
||||
nir_def *geom_index;
|
||||
nir_def *type;
|
||||
nir_def *geom_flags;
|
||||
};
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_load_bvh_primitive_leaf(nir_builder *b,
|
||||
struct brw_nir_rt_bvh_primitive_leaf_defs *defs,
|
||||
nir_def *leaf_addr)
|
||||
{
|
||||
nir_def *desc = brw_nir_rt_load(b, leaf_addr, 4, 2, 32);
|
||||
|
||||
defs->shader_index =
|
||||
nir_ubitfield_extract(b, nir_channel(b, desc, 0),
|
||||
nir_imm_int(b, 23), nir_imm_int(b, 0));
|
||||
defs->geom_mask =
|
||||
nir_ubitfield_extract(b, nir_channel(b, desc, 0),
|
||||
nir_imm_int(b, 31), nir_imm_int(b, 24));
|
||||
|
||||
defs->geom_index =
|
||||
nir_ubitfield_extract(b, nir_channel(b, desc, 1),
|
||||
nir_imm_int(b, 28), nir_imm_int(b, 0));
|
||||
defs->type =
|
||||
nir_ubitfield_extract(b, nir_channel(b, desc, 1),
|
||||
nir_imm_int(b, 29), nir_imm_int(b, 29));
|
||||
defs->geom_flags =
|
||||
nir_ubitfield_extract(b, nir_channel(b, desc, 1),
|
||||
nir_imm_int(b, 31), nir_imm_int(b, 30));
|
||||
}
|
||||
|
||||
struct brw_nir_rt_bvh_primitive_leaf_positions_defs {
|
||||
nir_def *positions[3];
|
||||
};
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_load_bvh_primitive_leaf_positions(nir_builder *b,
|
||||
struct brw_nir_rt_bvh_primitive_leaf_positions_defs *defs,
|
||||
nir_def *leaf_addr)
|
||||
{
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(defs->positions); i++) {
|
||||
defs->positions[i] =
|
||||
brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 16 + i * 4 * 3), 4, 3, 32);
|
||||
}
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_load_primitive_id_from_hit(nir_builder *b,
|
||||
nir_def *is_procedural,
|
||||
const struct brw_nir_rt_mem_hit_defs *defs)
|
||||
{
|
||||
if (!is_procedural) {
|
||||
is_procedural =
|
||||
nir_ieq_imm(b, defs->leaf_type,
|
||||
BRW_RT_BVH_NODE_TYPE_PROCEDURAL);
|
||||
}
|
||||
|
||||
nir_def *prim_id_proc, *prim_id_quad;
|
||||
nir_push_if(b, is_procedural);
|
||||
{
|
||||
/* For procedural leafs, the index is in dw[3]. */
|
||||
nir_def *offset =
|
||||
nir_iadd_imm(b, nir_ishl_imm(b, defs->prim_leaf_index, 2), 12);
|
||||
prim_id_proc = nir_load_global(b, nir_iadd(b, defs->prim_leaf_ptr,
|
||||
nir_u2u64(b, offset)),
|
||||
4, /* align */ 1, 32);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
/* For quad leafs, the index is dw[2] and there is a 16bit additional
|
||||
* offset in dw[3].
|
||||
*/
|
||||
prim_id_quad = nir_load_global(b, nir_iadd_imm(b, defs->prim_leaf_ptr, 8),
|
||||
4, /* align */ 1, 32);
|
||||
prim_id_quad = nir_iadd(b,
|
||||
prim_id_quad,
|
||||
defs->prim_index_delta);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
return nir_if_phi(b, prim_id_proc, prim_id_quad);
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_rt_acceleration_structure_to_root_node(nir_builder *b,
|
||||
nir_def *as_addr)
|
||||
{
|
||||
/* The HW memory structure in which we specify what acceleration structure
|
||||
* to traverse, takes the address to the root node in the acceleration
|
||||
* structure, not the acceleration structure itself. To find that, we have
|
||||
* to read the root node offset from the acceleration structure which is
|
||||
* the first QWord.
|
||||
*
|
||||
* But if the acceleration structure pointer is NULL, then we should return
|
||||
* NULL as root node pointer.
|
||||
*
|
||||
* TODO: we could optimize this by assuming that for a given version of the
|
||||
* BVH, we can find the root node at a given offset.
|
||||
*/
|
||||
nir_def *root_node_ptr, *null_node_ptr;
|
||||
nir_push_if(b, nir_ieq_imm(b, as_addr, 0));
|
||||
{
|
||||
null_node_ptr = nir_imm_int64(b, 0);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
root_node_ptr =
|
||||
nir_iadd(b, as_addr, brw_nir_rt_load(b, as_addr, 256, 1, 64));
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
return nir_if_phi(b, null_node_ptr, root_node_ptr);
|
||||
}
|
||||
|
||||
#endif /* BRW_NIR_RT_BUILDER_H */
|
||||
|
|
@ -1,292 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2020 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_RT_H
|
||||
#define BRW_RT_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "compiler/shader_enums.h"
|
||||
#include "util/macros.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** Vulkan defines shaderGroupHandleSize = 32 */
|
||||
#define BRW_RT_SBT_HANDLE_SIZE 32
|
||||
|
||||
/** RT_DISPATCH_GLOBALS size (see gen_rt.xml) */
|
||||
#define BRW_RT_DISPATCH_GLOBALS_SIZE 80
|
||||
|
||||
/** Offset after the RT dispatch globals at which "push" constants live */
|
||||
#define BRW_RT_PUSH_CONST_OFFSET 128
|
||||
|
||||
/** Stride of the resume SBT */
|
||||
#define BRW_BTD_RESUME_SBT_STRIDE 8
|
||||
|
||||
/* Vulkan always uses exactly two levels of BVH: world and object. At the API
|
||||
* level, these are referred to as top and bottom.
|
||||
*/
|
||||
enum brw_rt_bvh_level {
|
||||
BRW_RT_BVH_LEVEL_WORLD = 0,
|
||||
BRW_RT_BVH_LEVEL_OBJECT = 1,
|
||||
};
|
||||
#define BRW_RT_MAX_BVH_LEVELS 2
|
||||
|
||||
enum brw_rt_bvh_node_type {
|
||||
BRW_RT_BVH_NODE_TYPE_INTERNAL = 0,
|
||||
BRW_RT_BVH_NODE_TYPE_INSTANCE = 1,
|
||||
BRW_RT_BVH_NODE_TYPE_PROCEDURAL = 3,
|
||||
BRW_RT_BVH_NODE_TYPE_QUAD = 4,
|
||||
};
|
||||
|
||||
/** HitKind values returned for triangle geometry
|
||||
*
|
||||
* This enum must match the SPIR-V enum.
|
||||
*/
|
||||
enum brw_rt_hit_kind {
|
||||
BRW_RT_HIT_KIND_FRONT_FACE = 0xfe,
|
||||
BRW_RT_HIT_KIND_BACK_FACE = 0xff,
|
||||
};
|
||||
|
||||
/** Ray flags
|
||||
*
|
||||
* This enum must match the SPIR-V RayFlags enum.
|
||||
*/
|
||||
enum brw_rt_ray_flags {
|
||||
BRW_RT_RAY_FLAG_FORCE_OPAQUE = 0x01,
|
||||
BRW_RT_RAY_FLAG_FORCE_NON_OPAQUE = 0x02,
|
||||
BRW_RT_RAY_FLAG_TERMINATE_ON_FIRST_HIT = 0x04,
|
||||
BRW_RT_RAY_FLAG_SKIP_CLOSEST_HIT_SHADER = 0x08,
|
||||
BRW_RT_RAY_FLAG_CULL_BACK_FACING_TRIANGLES = 0x10,
|
||||
BRW_RT_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES = 0x20,
|
||||
BRW_RT_RAY_FLAG_CULL_OPAQUE = 0x40,
|
||||
BRW_RT_RAY_FLAG_CULL_NON_OPAQUE = 0x80,
|
||||
BRW_RT_RAY_FLAG_SKIP_TRIANGLES = 0x100,
|
||||
BRW_RT_RAY_FLAG_SKIP_AABBS = 0x200,
|
||||
};
|
||||
|
||||
struct brw_rt_scratch_layout {
|
||||
/** Number of stack IDs per DSS */
|
||||
uint32_t stack_ids_per_dss;
|
||||
|
||||
/** Start offset (in bytes) of the hardware MemRay stack */
|
||||
uint32_t ray_stack_start;
|
||||
|
||||
/** Stride (in bytes) of the hardware MemRay stack */
|
||||
uint32_t ray_stack_stride;
|
||||
|
||||
/** Start offset (in bytes) of the SW stacks */
|
||||
uint64_t sw_stack_start;
|
||||
|
||||
/** Size (in bytes) of the SW stack for a single shader invocation */
|
||||
uint32_t sw_stack_size;
|
||||
|
||||
/** Total size (in bytes) of the RT scratch memory area */
|
||||
uint64_t total_size;
|
||||
};
|
||||
|
||||
/** Parameters passed to the raygen trampoline shader
|
||||
*
|
||||
* This struct is carefully construected to be 32B and must be passed to the
|
||||
* raygen trampoline shader as as inline constant data.
|
||||
*/
|
||||
struct brw_rt_raygen_trampoline_params {
|
||||
/** The GPU address of the RT_DISPATCH_GLOBALS */
|
||||
uint64_t rt_disp_globals_addr;
|
||||
|
||||
/** The GPU address of the BINDLESS_SHADER_RECORD for the raygen shader */
|
||||
uint64_t raygen_bsr_addr;
|
||||
|
||||
/** 1 if this is an indirect dispatch, 0 otherwise */
|
||||
uint8_t is_indirect;
|
||||
|
||||
/** The integer log2 of the local group size
|
||||
*
|
||||
* Ray-tracing shaders don't have a concept of local vs. global workgroup
|
||||
* size. They only have a single 3D launch size. The raygen trampoline
|
||||
* shader is always dispatched with a local workgroup size equal to the
|
||||
* SIMD width but the shape of the local workgroup is determined at
|
||||
* dispatch time based on the shape of the launch and passed to the
|
||||
* trampoline via this field. (There's no sense having a Z dimension on
|
||||
* the local workgroup if the launch is 2D.)
|
||||
*
|
||||
* We use the integer log2 of the size because there's no point in
|
||||
* non-power-of-two sizes and shifts are cheaper than division.
|
||||
*/
|
||||
uint8_t local_group_size_log2[3];
|
||||
|
||||
uint32_t pad[3];
|
||||
};
|
||||
|
||||
/** Size of the "hot zone" in bytes
|
||||
*
|
||||
* The hot zone is a SW-defined data structure which is a single uvec4
|
||||
* containing two bits of information:
|
||||
*
|
||||
* - hotzone.x: Stack offset (in bytes)
|
||||
*
|
||||
* This is the offset (in bytes) into the per-thread scratch space at which
|
||||
* the current shader's stack starts. This is incremented by the calling
|
||||
* shader prior to any shader call type instructions and gets decremented
|
||||
* by the resume shader as part of completing the return operation.
|
||||
*
|
||||
*
|
||||
* - hotzone.yzw: The launch ID associated with the current thread
|
||||
*
|
||||
* Inside a bindless shader, the only information we have is the DSS ID
|
||||
* from the hardware EU and a per-DSS stack ID. In particular, the three-
|
||||
* dimensional launch ID is lost the moment we leave the raygen trampoline.
|
||||
*/
|
||||
#define BRW_RT_SIZEOF_HOTZONE 16
|
||||
|
||||
/* From the BSpec "Address Computation for Memory Based Data Structures:
|
||||
* Ray and TraversalStack (Async Ray Tracing)":
|
||||
*
|
||||
* sizeof(Ray) = 64B, sizeof(HitInfo) = 32B, sizeof(TravStack) = 32B.
|
||||
*/
|
||||
#define BRW_RT_SIZEOF_RAY 64
|
||||
#define BRW_RT_SIZEOF_HIT_INFO 32
|
||||
#define BRW_RT_SIZEOF_TRAV_STACK 32
|
||||
|
||||
/* From the BSpec:
|
||||
*
|
||||
* syncStackSize = (maxBVHLevels % 2 == 1) ?
|
||||
* (sizeof(HitInfo) * 2 +
|
||||
* (sizeof(Ray) + sizeof(TravStack)) * maxBVHLevels + 32B) :
|
||||
* (sizeof(HitInfo) * 2 +
|
||||
* (sizeof(Ray) + sizeof(TravStack)) * maxBVHLevels);
|
||||
*
|
||||
* The select is just to align to 64B.
|
||||
*/
|
||||
#define BRW_RT_SIZEOF_RAY_QUERY \
|
||||
(BRW_RT_SIZEOF_HIT_INFO * 2 + \
|
||||
(BRW_RT_SIZEOF_RAY + BRW_RT_SIZEOF_TRAV_STACK) * BRW_RT_MAX_BVH_LEVELS + \
|
||||
(BRW_RT_MAX_BVH_LEVELS % 2 ? 32 : 0))
|
||||
|
||||
#define BRW_RT_SIZEOF_SHADOW_RAY_QUERY \
|
||||
(BRW_RT_SIZEOF_HIT_INFO * 2 + \
|
||||
(BRW_RT_SIZEOF_RAY + BRW_RT_SIZEOF_TRAV_STACK) * BRW_RT_MAX_BVH_LEVELS)
|
||||
|
||||
#define BRW_RT_SIZEOF_HW_STACK \
|
||||
(BRW_RT_SIZEOF_HIT_INFO * 2 + \
|
||||
BRW_RT_SIZEOF_RAY * BRW_RT_MAX_BVH_LEVELS + \
|
||||
BRW_RT_SIZEOF_TRAV_STACK * BRW_RT_MAX_BVH_LEVELS)
|
||||
|
||||
/* This is a mesa-defined region for hit attribute data */
|
||||
#define BRW_RT_SIZEOF_HIT_ATTRIB_DATA 64
|
||||
#define BRW_RT_OFFSETOF_HIT_ATTRIB_DATA BRW_RT_SIZEOF_HW_STACK
|
||||
|
||||
#define BRW_RT_ASYNC_STACK_STRIDE \
|
||||
ALIGN_POT(BRW_RT_OFFSETOF_HIT_ATTRIB_DATA + \
|
||||
BRW_RT_SIZEOF_HIT_ATTRIB_DATA, 64)
|
||||
|
||||
static inline void
|
||||
brw_rt_compute_scratch_layout(struct brw_rt_scratch_layout *layout,
|
||||
const struct intel_device_info *devinfo,
|
||||
uint32_t stack_ids_per_dss,
|
||||
uint32_t sw_stack_size)
|
||||
{
|
||||
layout->stack_ids_per_dss = stack_ids_per_dss;
|
||||
|
||||
const uint32_t dss_count = intel_device_info_dual_subslice_id_bound(devinfo);
|
||||
const uint32_t num_stack_ids = dss_count * stack_ids_per_dss;
|
||||
|
||||
uint64_t size = 0;
|
||||
|
||||
/* The first thing in our scratch area is an array of "hot zones" which
|
||||
* store the stack offset as well as the launch IDs for each active
|
||||
* invocation.
|
||||
*/
|
||||
size += BRW_RT_SIZEOF_HOTZONE * num_stack_ids;
|
||||
|
||||
/* Next, we place the HW ray stacks */
|
||||
assert(size % 64 == 0); /* Cache-line aligned */
|
||||
assert(size < UINT32_MAX);
|
||||
layout->ray_stack_start = size;
|
||||
layout->ray_stack_stride = BRW_RT_ASYNC_STACK_STRIDE;
|
||||
size += num_stack_ids * layout->ray_stack_stride;
|
||||
|
||||
/* Finally, we place the SW stacks for the individual ray-tracing shader
|
||||
* invocations. We align these to 64B to ensure that we don't have any
|
||||
* shared cache lines which could hurt performance.
|
||||
*/
|
||||
assert(size % 64 == 0);
|
||||
layout->sw_stack_start = size;
|
||||
layout->sw_stack_size = ALIGN(sw_stack_size, 64);
|
||||
|
||||
/* Currently it's always the case that sw_stack_size is a power of
|
||||
* two, but power-of-two SW stack sizes are prone to causing
|
||||
* collisions in the hashing function used by the L3 to map memory
|
||||
* addresses to banks, which can cause stack accesses from most
|
||||
* DSSes to bottleneck on a single L3 bank. Fix it by padding the
|
||||
* SW stack by a single cacheline if it was a power of two.
|
||||
*/
|
||||
if (layout->sw_stack_size > 64 &&
|
||||
util_is_power_of_two_nonzero(layout->sw_stack_size))
|
||||
layout->sw_stack_size += 64;
|
||||
|
||||
size += num_stack_ids * layout->sw_stack_size;
|
||||
|
||||
layout->total_size = size;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
brw_rt_ray_queries_hw_stacks_size(const struct intel_device_info *devinfo)
|
||||
{
|
||||
/* Maximum slice/subslice/EU ID can be computed from the max_scratch_ids
|
||||
* which includes all the threads.
|
||||
*/
|
||||
uint32_t max_eu_id = devinfo->max_scratch_ids[MESA_SHADER_COMPUTE];
|
||||
uint32_t max_simd_size = 16; /* Cannot run in SIMD32 with ray queries */
|
||||
return max_eu_id * max_simd_size * BRW_RT_SIZEOF_RAY_QUERY;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
brw_rt_ray_queries_shadow_stack_size(const struct intel_device_info *devinfo)
|
||||
{
|
||||
/* Maximum slice/subslice/EU ID can be computed from the max_scratch_ids
|
||||
* which includes all the threads.
|
||||
*/
|
||||
uint32_t max_eu_id = devinfo->max_scratch_ids[MESA_SHADER_COMPUTE];
|
||||
uint32_t max_simd_size = 16; /* Cannot run in SIMD32 with ray queries */
|
||||
return max_eu_id * max_simd_size * BRW_RT_SIZEOF_SHADOW_RAY_QUERY;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
brw_rt_ray_queries_shadow_stacks_size(const struct intel_device_info *devinfo,
|
||||
uint32_t ray_queries)
|
||||
{
|
||||
/* Don't bother a shadow stack if we only have a single query. We can
|
||||
* directly write in the HW buffer.
|
||||
*/
|
||||
return (ray_queries > 1 ? ray_queries : 0) * brw_rt_ray_queries_shadow_stack_size(devinfo) +
|
||||
ray_queries * 4; /* Ctrl + Level data */
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BRW_RT_H */
|
||||
|
|
@ -1,676 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2021 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_compiler.h"
|
||||
#include "brw_kernel.h"
|
||||
#include "compiler/brw_disasm.h"
|
||||
#include "compiler/clc/clc.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
#include "compiler/nir/nir_serialize.h"
|
||||
#include "dev/intel_debug.h"
|
||||
#include "util/build_id.h"
|
||||
#include "util/disk_cache.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/mesa-sha1.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <getopt.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
/* Shader functions */
|
||||
#define SPIR_V_MAGIC_NUMBER 0x07230203
|
||||
|
||||
static struct disk_cache *
|
||||
get_disk_cache(struct brw_compiler *compiler)
|
||||
{
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
char renderer[14];
|
||||
ASSERTED int len = snprintf(renderer, sizeof(renderer), "brw_clc_%04x",
|
||||
compiler->devinfo->pci_device_id);
|
||||
assert(len == sizeof(renderer) - 2);
|
||||
|
||||
const struct build_id_note *note =
|
||||
build_id_find_nhdr_for_addr(get_disk_cache);
|
||||
if (note == NULL) {
|
||||
fprintf(stderr, "Failed to find build-id\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
unsigned build_id_len = build_id_length(note);
|
||||
if (build_id_len < 20) {
|
||||
fprintf(stderr, "build-id too short. It needs to be a SHA\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
struct mesa_sha1 sha1_ctx;
|
||||
uint8_t sha1[20];
|
||||
_mesa_sha1_init(&sha1_ctx);
|
||||
_mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
|
||||
_mesa_sha1_final(&sha1_ctx, sha1);
|
||||
|
||||
char timestamp[41];
|
||||
_mesa_sha1_format(timestamp, sha1);
|
||||
|
||||
const uint64_t driver_flags = brw_get_compiler_config_value(compiler);
|
||||
|
||||
return disk_cache_create(renderer, timestamp, driver_flags);
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
compiler_log(void *data, unsigned *id, const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
if (INTEL_DEBUG(DEBUG_CS))
|
||||
vfprintf(stderr, fmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
static void
|
||||
msg_callback(void *priv, const char *msg)
|
||||
{
|
||||
(void)priv;
|
||||
fprintf(stderr, "%s", msg);
|
||||
}
|
||||
|
||||
static void
|
||||
print_u32_data(FILE *fp, const char *prefix, const char *arr_name,
|
||||
const uint32_t *data, size_t len)
|
||||
{
|
||||
assert(len % 4 == 0);
|
||||
fprintf(fp, "static const uint32_t %s_%s[] = {", prefix, arr_name);
|
||||
for (unsigned i = 0; i < (len / 4); i++) {
|
||||
if (i % 4 == 0)
|
||||
fprintf(fp,"\n ");
|
||||
|
||||
fprintf(fp, " 0x%08" PRIx32 ",", data[i]);
|
||||
}
|
||||
fprintf(fp, "\n};\n");
|
||||
}
|
||||
|
||||
static void
|
||||
print_u8_data(FILE *fp, const char *prefix, const char *arr_name,
|
||||
const uint8_t *data, size_t len)
|
||||
{
|
||||
fprintf(fp, "static const uint8_t %s_%s[] = {", prefix, arr_name);
|
||||
for (unsigned i = 0; i < len; i++) {
|
||||
if (i % 16 == 0)
|
||||
fprintf(fp,"\n ");
|
||||
|
||||
fprintf(fp, " 0x%02" PRIx8 ",", data[i]);
|
||||
}
|
||||
fprintf(fp, "\n};\n");
|
||||
}
|
||||
|
||||
static const char *
|
||||
reloc_type_str(enum brw_shader_reloc_type type)
|
||||
{
|
||||
switch (type) {
|
||||
#define CASE(e) case e: return #e;
|
||||
CASE(BRW_SHADER_RELOC_TYPE_U32)
|
||||
CASE(BRW_SHADER_RELOC_TYPE_MOV_IMM)
|
||||
#undef CASE
|
||||
default:
|
||||
unreachable("Unknown relocation type");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_cs_prog_data_fields(FILE *fp, const char *prefix, const char *pad,
|
||||
const struct brw_cs_prog_data *cs_prog_data)
|
||||
{
|
||||
#define PROG_DATA_FIELD(fmt, field) \
|
||||
fprintf(fp, "%s." #field " = " fmt ",\n", pad, cs_prog_data->field)
|
||||
|
||||
#define PROG_DATA_BOOL_FIELD(field) \
|
||||
fprintf(fp, "%s." #field " = %s,\n", pad, \
|
||||
cs_prog_data->field ? "true" : "false")
|
||||
|
||||
PROG_DATA_FIELD("%u", base.nr_params);
|
||||
assert(cs_prog_data->base.stage == MESA_SHADER_COMPUTE);
|
||||
fprintf(fp, "%s.base.stage = MESA_SHADER_COMPUTE,\n", pad);
|
||||
assert(cs_prog_data->base.zero_push_reg == 0);
|
||||
assert(cs_prog_data->base.push_reg_mask_param == 0);
|
||||
PROG_DATA_FIELD("%u", base.curb_read_length);
|
||||
PROG_DATA_FIELD("%u", base.total_scratch);
|
||||
PROG_DATA_FIELD("%u", base.total_shared);
|
||||
PROG_DATA_FIELD("%u", base.program_size);
|
||||
PROG_DATA_FIELD("%u", base.const_data_size);
|
||||
PROG_DATA_FIELD("%u", base.const_data_offset);
|
||||
PROG_DATA_FIELD("%u", base.num_relocs);
|
||||
fprintf(fp, "%s.base.relocs = %s_relocs,\n", pad, prefix);
|
||||
assert(!cs_prog_data->base.has_ubo_pull);
|
||||
assert(cs_prog_data->base.dispatch_grf_start_reg == 0);
|
||||
assert(!cs_prog_data->base.use_alt_mode);
|
||||
assert(cs_prog_data->base.param == 0);
|
||||
PROG_DATA_BOOL_FIELD(base.uses_atomic_load_store);
|
||||
fprintf(fp, "%s.local_size = { %u, %u, %u },\n", pad,
|
||||
cs_prog_data->local_size[0],
|
||||
cs_prog_data->local_size[1],
|
||||
cs_prog_data->local_size[2]);
|
||||
fprintf(fp, "%s.prog_offset = { %u, %u, %u },\n", pad,
|
||||
cs_prog_data->prog_offset[0],
|
||||
cs_prog_data->prog_offset[1],
|
||||
cs_prog_data->prog_offset[2]);
|
||||
PROG_DATA_FIELD("%u", prog_mask);
|
||||
PROG_DATA_FIELD("%u", prog_spilled);
|
||||
PROG_DATA_BOOL_FIELD(uses_barrier);
|
||||
PROG_DATA_BOOL_FIELD(uses_num_work_groups);
|
||||
assert(!cs_prog_data->uses_inline_data);
|
||||
assert(!cs_prog_data->uses_btd_stack_ids);
|
||||
PROG_DATA_FIELD("%u", push.per_thread.dwords);
|
||||
PROG_DATA_FIELD("%u", push.per_thread.regs);
|
||||
PROG_DATA_FIELD("%u", push.per_thread.size);
|
||||
PROG_DATA_FIELD("%u", push.cross_thread.dwords);
|
||||
PROG_DATA_FIELD("%u", push.cross_thread.regs);
|
||||
PROG_DATA_FIELD("%u", push.cross_thread.size);
|
||||
|
||||
#undef PROG_DATA_FIELD
|
||||
#undef PROG_DATA_BOOL_FIELD
|
||||
}
|
||||
|
||||
static void
|
||||
print_kernel(FILE *fp, const char *prefix,
|
||||
const struct brw_kernel *kernel,
|
||||
const struct brw_isa_info *isa)
|
||||
{
|
||||
struct mesa_sha1 sha1_ctx;
|
||||
_mesa_sha1_init(&sha1_ctx);
|
||||
|
||||
#define SHA1_UPDATE_VALUE(val) \
|
||||
_mesa_sha1_update(&sha1_ctx, &val, sizeof(val))
|
||||
|
||||
fprintf(fp, "#include \"intel/compiler/brw_kernel.h\"\n");
|
||||
fprintf(fp, "\n");
|
||||
|
||||
fprintf(fp, "static const struct brw_shader_reloc %s_relocs[] = {\n",
|
||||
prefix);
|
||||
for (unsigned i = 0; i < kernel->prog_data.base.num_relocs; i++) {
|
||||
const struct brw_shader_reloc *reloc = &kernel->prog_data.base.relocs[i];
|
||||
fprintf(fp, " { %"PRIu32", %s, %"PRIu32", %"PRIu32" },\n",
|
||||
reloc->id, reloc_type_str(reloc->type),
|
||||
reloc->offset, reloc->delta);
|
||||
}
|
||||
fprintf(fp, "};\n");
|
||||
_mesa_sha1_update(&sha1_ctx, kernel->prog_data.base.relocs,
|
||||
kernel->prog_data.base.num_relocs *
|
||||
sizeof(kernel->prog_data.base.relocs[0]));
|
||||
|
||||
/* Get rid of the pointers before we hash */
|
||||
struct brw_cs_prog_data cs_prog_data = kernel->prog_data;
|
||||
cs_prog_data.base.relocs = NULL;
|
||||
assert(cs_prog_data.base.param == NULL);
|
||||
_mesa_sha1_update(&sha1_ctx, &cs_prog_data, sizeof(cs_prog_data));
|
||||
|
||||
SHA1_UPDATE_VALUE(kernel->args_size);
|
||||
SHA1_UPDATE_VALUE(kernel->arg_count);
|
||||
_mesa_sha1_update(&sha1_ctx, kernel->args,
|
||||
kernel->arg_count * sizeof(kernel->args[0]));
|
||||
|
||||
fprintf(fp, "static const struct brw_kernel_arg_desc %s_args[] = {\n",
|
||||
prefix);
|
||||
for (unsigned i = 0; i < kernel->arg_count; i++) {
|
||||
fprintf(fp, " { %d, %d },\n",
|
||||
kernel->args[i].offset, kernel->args[i].size);
|
||||
}
|
||||
fprintf(fp, "};\n\n");
|
||||
|
||||
_mesa_sha1_update(&sha1_ctx, kernel->code,
|
||||
kernel->prog_data.base.program_size);
|
||||
|
||||
fprintf(fp, "#if 0 /* BEGIN KERNEL ASSEMBLY */\n");
|
||||
fprintf(fp, "\n");
|
||||
brw_disassemble_with_errors(isa, kernel->code, 0, fp);
|
||||
fprintf(fp, "\n");
|
||||
fprintf(fp, "#endif /* END KERNEL ASSEMBLY */\n");
|
||||
print_u32_data(fp, prefix, "code", kernel->code,
|
||||
kernel->prog_data.base.program_size);
|
||||
|
||||
fprintf(fp, "static const struct brw_kernel %s = {\n", prefix);
|
||||
fprintf(fp, " .prog_data = {\n");
|
||||
print_cs_prog_data_fields(fp, prefix, " ", &kernel->prog_data);
|
||||
fprintf(fp, " },\n");
|
||||
fprintf(fp, " .args_size = %d,\n", (int)kernel->args_size);
|
||||
fprintf(fp, " .arg_count = %d,\n", (int)kernel->arg_count);
|
||||
fprintf(fp, " .args = %s_args,\n", prefix);
|
||||
fprintf(fp, " .code = %s_code,\n", prefix);
|
||||
fprintf(fp, "};\n");
|
||||
|
||||
unsigned char sha1[20];
|
||||
_mesa_sha1_final(&sha1_ctx, sha1);
|
||||
char sha1_str[41];
|
||||
_mesa_sha1_format(sha1_str, sha1);
|
||||
fprintf(fp, "const char *%s_sha1 = \"%s\";\n", prefix, sha1_str);
|
||||
}
|
||||
|
||||
static void
|
||||
print_usage(char *exec_name, FILE *f)
|
||||
{
|
||||
fprintf(f,
|
||||
"Usage: %s [options] -- [clang args]\n"
|
||||
"Options:\n"
|
||||
" -h --help Print this help.\n"
|
||||
" -e, --entrypoint <name> Specify the entry-point name.\n"
|
||||
" -L, --llvm17-wa Enable LLVM 17 workarounds for opaque pointers"
|
||||
" -p, --platform <name> Specify the target platform name.\n"
|
||||
" --prefix <prefix> Prefix for variable names in generated C code.\n"
|
||||
" -o, --out <filename> Specify the output filename.\n"
|
||||
" -i, --in <filename> Specify one input filename. Accepted multiple times.\n"
|
||||
" -s, --spv <filename> Specify the output filename for spirv.\n"
|
||||
" -n, --nir Specify whether to output serialized NIR instead of ISA.\n"
|
||||
" -t, --text <filename> Specify the output filename for the parsed text\n"
|
||||
" -v, --verbose Print more information during compilation.\n"
|
||||
" -M, --llvm-version Print LLVM version.\n"
|
||||
, exec_name);
|
||||
}
|
||||
|
||||
#define OPT_PREFIX 1000
|
||||
|
||||
struct intel_clc_params {
|
||||
char *entry_point;
|
||||
char *platform;
|
||||
char *outfile;
|
||||
char *spv_outfile;
|
||||
char *txt_outfile;
|
||||
char *prefix;
|
||||
|
||||
bool output_nir;
|
||||
bool print_info;
|
||||
bool llvm17_wa;
|
||||
|
||||
void *mem_ctx;
|
||||
|
||||
struct intel_device_info devinfo;
|
||||
};
|
||||
|
||||
#include "compiler/spirv/nir_spirv.h"
|
||||
|
||||
static int
|
||||
output_nir(const struct intel_clc_params *params, struct clc_binary *binary)
|
||||
{
|
||||
struct spirv_to_nir_options spirv_options = {
|
||||
.environment = NIR_SPIRV_OPENCL,
|
||||
.caps = {
|
||||
.address = true,
|
||||
.groups = true,
|
||||
.image_write_without_format = true,
|
||||
.int8 = true,
|
||||
.int16 = true,
|
||||
.int64 = true,
|
||||
.int64_atomics = true,
|
||||
.kernel = true,
|
||||
.linkage = true, /* We receive linked kernel from clc */
|
||||
.float_controls = true,
|
||||
.generic_pointers = true,
|
||||
.storage_8bit = true,
|
||||
.storage_16bit = true,
|
||||
.subgroup_arithmetic = true,
|
||||
.subgroup_basic = true,
|
||||
.subgroup_ballot = true,
|
||||
.subgroup_dispatch = true,
|
||||
.subgroup_quad = true,
|
||||
.subgroup_shuffle = true,
|
||||
.subgroup_vote = true,
|
||||
|
||||
.intel_subgroup_shuffle = true,
|
||||
.intel_subgroup_buffer_block_io = true,
|
||||
},
|
||||
.shared_addr_format = nir_address_format_62bit_generic,
|
||||
.global_addr_format = nir_address_format_62bit_generic,
|
||||
.temp_addr_format = nir_address_format_62bit_generic,
|
||||
.constant_addr_format = nir_address_format_64bit_global,
|
||||
.create_library = true,
|
||||
};
|
||||
|
||||
FILE *fp = params->outfile != NULL ?
|
||||
fopen(params->outfile, "w") : stdout;
|
||||
if (!fp) {
|
||||
fprintf(stderr, "Failed to open %s\n", params->outfile);
|
||||
return -1;
|
||||
}
|
||||
|
||||
spirv_library_to_nir_builder(fp, binary->data, binary->size / 4,
|
||||
&spirv_options);
|
||||
|
||||
nir_shader *nir = brw_nir_from_spirv(params->mem_ctx,
|
||||
binary->data, binary->size,
|
||||
params->llvm17_wa);
|
||||
if (!nir) {
|
||||
fprintf(stderr, "Failed to generate NIR out of SPIRV\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct blob blob;
|
||||
blob_init(&blob);
|
||||
nir_serialize(&blob, nir, false /* strip */);
|
||||
print_u8_data(fp, params->prefix, "nir", blob.data, blob.size);
|
||||
blob_finish(&blob);
|
||||
|
||||
if (params->outfile)
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
output_isa(const struct intel_clc_params *params, struct clc_binary *binary)
|
||||
{
|
||||
struct brw_kernel kernel = {};
|
||||
char *error_str;
|
||||
|
||||
struct brw_isa_info _isa, *isa = &_isa;
|
||||
brw_init_isa_info(isa, ¶ms->devinfo);
|
||||
|
||||
struct brw_compiler *compiler = brw_compiler_create(params->mem_ctx,
|
||||
¶ms->devinfo);
|
||||
compiler->shader_debug_log = compiler_log;
|
||||
compiler->shader_perf_log = compiler_log;
|
||||
struct disk_cache *disk_cache = get_disk_cache(compiler);
|
||||
|
||||
if (!brw_kernel_from_spirv(compiler, disk_cache, &kernel, NULL, params->mem_ctx,
|
||||
binary->data, binary->size,
|
||||
params->entry_point, &error_str)) {
|
||||
fprintf(stderr, "Compile failed: %s\n", error_str);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (params->print_info) {
|
||||
fprintf(stdout, "kernel info:\n");
|
||||
fprintf(stdout, " uses_barrier : %u\n", kernel.prog_data.uses_barrier);
|
||||
fprintf(stdout, " uses_num_work_groups : %u\n", kernel.prog_data.uses_num_work_groups);
|
||||
fprintf(stdout, " uses_inline_data : %u\n", kernel.prog_data.uses_inline_data);
|
||||
fprintf(stdout, " local_size : %ux%ux%u\n",
|
||||
kernel.prog_data.local_size[0],
|
||||
kernel.prog_data.local_size[1],
|
||||
kernel.prog_data.local_size[2]);
|
||||
fprintf(stdout, " curb_read_length : %u\n", kernel.prog_data.base.curb_read_length);
|
||||
fprintf(stdout, " total_scratch : %u\n", kernel.prog_data.base.total_scratch);
|
||||
fprintf(stdout, " total_shared : %u\n", kernel.prog_data.base.total_shared);
|
||||
fprintf(stdout, " program_size : %u\n", kernel.prog_data.base.program_size);
|
||||
fprintf(stdout, " const_data_size : %u\n", kernel.prog_data.base.const_data_size);
|
||||
fprintf(stdout, " uses_atomic_load_store : %u\n", kernel.prog_data.base.uses_atomic_load_store);
|
||||
fprintf(stdout, " dispatch_grf_start_reg : %u\n", kernel.prog_data.base.dispatch_grf_start_reg);
|
||||
}
|
||||
|
||||
char *prefix = params->prefix;
|
||||
char prefix_tmp[256];
|
||||
if (prefix == NULL) {
|
||||
bool is_pt_5 = (params->devinfo.verx10 % 10) == 5;
|
||||
snprintf(prefix_tmp, sizeof(prefix_tmp), "gfx%d%s_clc_%s",
|
||||
params->devinfo.ver, is_pt_5 ? "5" : "", params->entry_point);
|
||||
prefix = prefix_tmp;
|
||||
}
|
||||
|
||||
if (params->outfile != NULL) {
|
||||
FILE *fp = fopen(params->outfile, "w");
|
||||
print_kernel(fp, prefix, &kernel, isa);
|
||||
fclose(fp);
|
||||
} else {
|
||||
print_kernel(stdout, prefix, &kernel, isa);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
print_llvm_version(FILE *out)
|
||||
{
|
||||
fprintf(out, "%s\n", MESA_LLVM_VERSION_STRING);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int exit_code = 0;
|
||||
|
||||
process_intel_debug_variable();
|
||||
|
||||
static struct option long_options[] ={
|
||||
{"help", no_argument, 0, 'h'},
|
||||
{"entrypoint", required_argument, 0, 'e'},
|
||||
{"platform", required_argument, 0, 'p'},
|
||||
{"prefix", required_argument, 0, OPT_PREFIX},
|
||||
{"in", required_argument, 0, 'i'},
|
||||
{"out", required_argument, 0, 'o'},
|
||||
{"spv", required_argument, 0, 's'},
|
||||
{"text", required_argument, 0, 't'},
|
||||
{"nir", no_argument, 0, 'n'},
|
||||
{"llvm17-wa", no_argument, 0, 'L'},
|
||||
{"llvm-version", no_argument, 0, 'M'},
|
||||
{"verbose", no_argument, 0, 'v'},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
struct intel_clc_params params = {};
|
||||
|
||||
struct util_dynarray clang_args;
|
||||
struct util_dynarray input_files;
|
||||
|
||||
struct clc_binary spirv_obj = {0};
|
||||
struct clc_parsed_spirv parsed_spirv_data = {0};
|
||||
struct disk_cache *disk_cache = NULL;
|
||||
|
||||
params.mem_ctx = ralloc_context(NULL);
|
||||
|
||||
util_dynarray_init(&clang_args, params.mem_ctx);
|
||||
util_dynarray_init(&input_files, params.mem_ctx);
|
||||
|
||||
int ch;
|
||||
while ((ch = getopt_long(argc, argv, "he:p:s:t:i:no:MLv", long_options, NULL)) != -1)
|
||||
{
|
||||
switch (ch)
|
||||
{
|
||||
case 'h':
|
||||
print_usage(argv[0], stdout);
|
||||
goto end;
|
||||
case 'e':
|
||||
params.entry_point = optarg;
|
||||
break;
|
||||
case 'p':
|
||||
params.platform = optarg;
|
||||
break;
|
||||
case 'o':
|
||||
params.outfile = optarg;
|
||||
break;
|
||||
case 'i':
|
||||
util_dynarray_append(&input_files, char *, optarg);
|
||||
break;
|
||||
case 'n':
|
||||
params.output_nir = true;
|
||||
break;
|
||||
case 's':
|
||||
params.spv_outfile = optarg;
|
||||
break;
|
||||
case 't':
|
||||
params.txt_outfile = optarg;
|
||||
break;
|
||||
case 'v':
|
||||
params.print_info = true;
|
||||
break;
|
||||
case 'L':
|
||||
params.llvm17_wa = true;
|
||||
break;
|
||||
case 'M':
|
||||
print_llvm_version(stdout);
|
||||
return EXIT_SUCCESS;
|
||||
case OPT_PREFIX:
|
||||
params.prefix = optarg;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Unrecognized option \"%s\".\n", optarg);
|
||||
print_usage(argv[0], stderr);
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = optind; i < argc; i++) {
|
||||
util_dynarray_append(&clang_args, char *, argv[i]);
|
||||
}
|
||||
|
||||
if (util_dynarray_num_elements(&input_files, char *) == 0) {
|
||||
fprintf(stderr, "No input file(s).\n");
|
||||
print_usage(argv[0], stderr);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
struct clc_logger logger = {
|
||||
.error = msg_callback,
|
||||
.warning = msg_callback,
|
||||
};
|
||||
|
||||
size_t total_size = 0;
|
||||
char *all_inputs = NULL;
|
||||
util_dynarray_foreach(&input_files, char *, infile) {
|
||||
int fd = open(*infile, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, "Failed to open %s\n", *infile);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
off_t len = lseek(fd, 0, SEEK_END);
|
||||
size_t new_size = total_size + len;
|
||||
all_inputs = reralloc_size(params.mem_ctx, all_inputs, new_size + 1);
|
||||
if (!all_inputs) {
|
||||
fprintf(stderr, "Failed to allocate memory\n");
|
||||
goto fail;
|
||||
}
|
||||
lseek(fd, 0, SEEK_SET);
|
||||
read(fd, all_inputs + total_size, len);
|
||||
close(fd);
|
||||
total_size = new_size;
|
||||
all_inputs[total_size] = '\0';
|
||||
}
|
||||
|
||||
if (params.txt_outfile) {
|
||||
FILE *fp = fopen(params.txt_outfile, "w");
|
||||
fwrite(all_inputs, total_size, 1, fp);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
const char *allowed_spirv_extensions[] = {
|
||||
"SPV_EXT_shader_atomic_float_add",
|
||||
"SPV_EXT_shader_atomic_float_min_max",
|
||||
"SPV_KHR_float_controls",
|
||||
"SPV_INTEL_subgroups",
|
||||
NULL,
|
||||
};
|
||||
|
||||
struct clc_compile_args clc_args = {
|
||||
.source = {
|
||||
.name = "intel_clc_files",
|
||||
.value = all_inputs,
|
||||
},
|
||||
.features = {
|
||||
.fp16 = true,
|
||||
.intel_subgroups = true,
|
||||
.subgroups = true,
|
||||
.subgroups_ifp = true,
|
||||
},
|
||||
.args = util_dynarray_begin(&clang_args),
|
||||
.num_args = util_dynarray_num_elements(&clang_args, char *),
|
||||
.allowed_spirv_extensions = allowed_spirv_extensions,
|
||||
};
|
||||
|
||||
if (!clc_compile_c_to_spirv(&clc_args, &logger, &spirv_obj)) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (params.spv_outfile) {
|
||||
FILE *fp = fopen(params.spv_outfile, "w");
|
||||
fwrite(spirv_obj.data, spirv_obj.size, 1, fp);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
glsl_type_singleton_init_or_ref();
|
||||
|
||||
if (params.output_nir) {
|
||||
exit_code = output_nir(¶ms, &spirv_obj);
|
||||
} else {
|
||||
if (params.platform == NULL) {
|
||||
fprintf(stderr, "No target platform name specified.\n");
|
||||
print_usage(argv[0], stderr);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
int pci_id = intel_device_name_to_pci_device_id(params.platform);
|
||||
if (pci_id < 0) {
|
||||
fprintf(stderr, "Invalid target platform name: %s\n", params.platform);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (!intel_get_device_info_from_pci_id(pci_id, ¶ms.devinfo)) {
|
||||
fprintf(stderr, "Failed to get device information.\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (params.devinfo.verx10 < 125) {
|
||||
fprintf(stderr, "Platform currently not supported.\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (params.entry_point == NULL) {
|
||||
fprintf(stderr, "No entry-point name specified.\n");
|
||||
print_usage(argv[0], stderr);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
struct clc_parsed_spirv parsed_spirv_data;
|
||||
if (!clc_parse_spirv(&spirv_obj, &logger, &parsed_spirv_data))
|
||||
goto fail;
|
||||
|
||||
const struct clc_kernel_info *kernel_info = NULL;
|
||||
for (unsigned i = 0; i < parsed_spirv_data.num_kernels; i++) {
|
||||
if (strcmp(parsed_spirv_data.kernels[i].name, params.entry_point) == 0) {
|
||||
kernel_info = &parsed_spirv_data.kernels[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (kernel_info == NULL) {
|
||||
fprintf(stderr, "Kernel entrypoint %s not found\n", params.entry_point);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
exit_code = output_isa(¶ms, &spirv_obj);
|
||||
}
|
||||
|
||||
glsl_type_singleton_decref();
|
||||
|
||||
goto end;
|
||||
|
||||
fail:
|
||||
exit_code = 1;
|
||||
|
||||
end:
|
||||
disk_cache_destroy(disk_cache);
|
||||
clc_free_parsed_spirv(&parsed_spirv_data);
|
||||
clc_free_spirv(&spirv_obj);
|
||||
ralloc_free(params.mem_ctx);
|
||||
|
||||
return exit_code;
|
||||
}
|
||||
|
|
@ -65,7 +65,6 @@ libintel_compiler_elk_files = files(
|
|||
'brw_fs_reg_allocate.cpp',
|
||||
'brw_fs_register_coalesce.cpp',
|
||||
'brw_fs_saturate_propagation.cpp',
|
||||
'brw_fs_scoreboard.cpp',
|
||||
'brw_fs_sel_peephole.cpp',
|
||||
'brw_fs_thread_payload.cpp',
|
||||
'brw_fs_validate.cpp',
|
||||
|
|
@ -81,23 +80,14 @@ libintel_compiler_elk_files = files(
|
|||
'brw_ir_vec4.h',
|
||||
'brw_isa_info.h',
|
||||
'brw_lower_logical_sends.cpp',
|
||||
'brw_mesh.cpp',
|
||||
'brw_nir.h',
|
||||
'brw_nir.c',
|
||||
'brw_nir_analyze_boolean_resolves.c',
|
||||
'brw_nir_analyze_ubo_ranges.c',
|
||||
'brw_nir_attribute_workarounds.c',
|
||||
'brw_nir_lower_cooperative_matrix.c',
|
||||
'brw_nir_lower_cs_intrinsics.c',
|
||||
'brw_nir_lower_alpha_to_coverage.c',
|
||||
'brw_nir_lower_intersection_shader.c',
|
||||
'brw_nir_lower_ray_queries.c',
|
||||
'brw_nir_lower_rt_intrinsics.c',
|
||||
'brw_nir_lower_shader_calls.c',
|
||||
'brw_nir_lower_storage_image.c',
|
||||
'brw_nir_rt.h',
|
||||
'brw_nir_rt.c',
|
||||
'brw_nir_rt_builder.h',
|
||||
'brw_packed_float.c',
|
||||
'brw_predicated_break.cpp',
|
||||
'brw_prim.h',
|
||||
|
|
@ -105,7 +95,6 @@ libintel_compiler_elk_files = files(
|
|||
'brw_reg.h',
|
||||
'brw_reg_type.c',
|
||||
'brw_reg_type.h',
|
||||
'brw_rt.h',
|
||||
'brw_schedule_instructions.cpp',
|
||||
'brw_shader.cpp',
|
||||
'brw_shader.h',
|
||||
|
|
@ -173,7 +162,6 @@ if with_tests
|
|||
'test_fs_combine_constants.cpp',
|
||||
'test_fs_copy_propagation.cpp',
|
||||
'test_fs_saturate_propagation.cpp',
|
||||
'test_fs_scoreboard.cpp',
|
||||
'test_simd_selection.cpp',
|
||||
'test_vec4_cmod_propagation.cpp',
|
||||
'test_vec4_copy_propagation.cpp',
|
||||
|
|
@ -228,10 +216,6 @@ asm_testcases = [
|
|||
['ivb', 'gfx7'],
|
||||
['hsw', 'gfx7.5'],
|
||||
['bdw', 'gfx8'],
|
||||
['skl', 'gfx9'],
|
||||
['icl', 'gfx11'],
|
||||
['tgl', 'gfx12'],
|
||||
['dg2', 'gfx12.5'],
|
||||
]
|
||||
|
||||
test_runner = find_program('tests/run-test.py')
|
||||
|
|
|
|||
|
|
@ -1,893 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2019 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "brw_fs.h"
|
||||
#include "brw_fs_builder.h"
|
||||
#include "brw_cfg.h"
|
||||
|
||||
using namespace brw;
|
||||
|
||||
class scoreboard_test : public ::testing::Test {
|
||||
protected:
|
||||
scoreboard_test();
|
||||
~scoreboard_test() override;
|
||||
|
||||
struct brw_compiler *compiler;
|
||||
struct brw_compile_params params;
|
||||
struct intel_device_info *devinfo;
|
||||
void *ctx;
|
||||
struct brw_wm_prog_data *prog_data;
|
||||
struct gl_shader_program *shader_prog;
|
||||
fs_visitor *v;
|
||||
fs_builder bld;
|
||||
};
|
||||
|
||||
scoreboard_test::scoreboard_test()
|
||||
: bld(NULL, 0)
|
||||
{
|
||||
ctx = ralloc_context(NULL);
|
||||
compiler = rzalloc(ctx, struct brw_compiler);
|
||||
devinfo = rzalloc(ctx, struct intel_device_info);
|
||||
devinfo->ver = 12;
|
||||
devinfo->verx10 = devinfo->ver * 10;
|
||||
|
||||
compiler->devinfo = devinfo;
|
||||
brw_init_isa_info(&compiler->isa, devinfo);
|
||||
|
||||
params = {};
|
||||
params.mem_ctx = ctx;
|
||||
|
||||
prog_data = ralloc(ctx, struct brw_wm_prog_data);
|
||||
nir_shader *shader =
|
||||
nir_shader_create(ctx, MESA_SHADER_FRAGMENT, NULL, NULL);
|
||||
|
||||
v = new fs_visitor(compiler, ¶ms, NULL, &prog_data->base, shader, 8,
|
||||
false, false);
|
||||
|
||||
bld = fs_builder(v).at_end();
|
||||
}
|
||||
|
||||
scoreboard_test::~scoreboard_test()
|
||||
{
|
||||
delete v;
|
||||
v = NULL;
|
||||
|
||||
ralloc_free(ctx);
|
||||
ctx = NULL;
|
||||
}
|
||||
|
||||
static fs_inst *
|
||||
instruction(bblock_t *block, int num)
|
||||
{
|
||||
fs_inst *inst = (fs_inst *)block->start();
|
||||
for (int i = 0; i < num; i++) {
|
||||
inst = (fs_inst *)inst->next;
|
||||
}
|
||||
return inst;
|
||||
}
|
||||
|
||||
static void
|
||||
lower_scoreboard(fs_visitor *v)
|
||||
{
|
||||
const bool print = getenv("TEST_DEBUG");
|
||||
|
||||
if (print) {
|
||||
fprintf(stderr, "= Before =\n");
|
||||
v->cfg->dump();
|
||||
}
|
||||
|
||||
v->lower_scoreboard();
|
||||
|
||||
if (print) {
|
||||
fprintf(stderr, "\n= After =\n");
|
||||
v->cfg->dump();
|
||||
}
|
||||
}
|
||||
|
||||
fs_inst *
|
||||
emit_SEND(const fs_builder &bld, const fs_reg &dst,
|
||||
const fs_reg &desc, const fs_reg &payload)
|
||||
{
|
||||
fs_inst *inst = bld.emit(SHADER_OPCODE_SEND, dst, desc, desc, payload);
|
||||
inst->mlen = 1;
|
||||
return inst;
|
||||
}
|
||||
|
||||
static tgl_swsb
|
||||
tgl_swsb_testcase(unsigned regdist, unsigned sbid, enum tgl_sbid_mode mode)
|
||||
{
|
||||
tgl_swsb swsb = tgl_swsb_sbid(mode, sbid);
|
||||
swsb.regdist = regdist;
|
||||
return swsb;
|
||||
}
|
||||
|
||||
bool operator ==(const tgl_swsb &a, const tgl_swsb &b)
|
||||
{
|
||||
return a.mode == b.mode &&
|
||||
a.regdist == b.regdist &&
|
||||
(a.mode == TGL_SBID_NULL || a.sbid == b.sbid);
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const tgl_swsb &swsb) {
|
||||
if (swsb.regdist)
|
||||
os << "@" << swsb.regdist;
|
||||
|
||||
if (swsb.mode) {
|
||||
if (swsb.regdist)
|
||||
os << " ";
|
||||
os << "$" << swsb.sbid;
|
||||
if (swsb.mode & TGL_SBID_DST)
|
||||
os << ".dst";
|
||||
if (swsb.mode & TGL_SBID_SRC)
|
||||
os << ".src";
|
||||
}
|
||||
|
||||
return os;
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, RAW_inorder_inorder)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
fs_reg y = v->vgrf(glsl_int_type());
|
||||
bld.ADD( x, g[1], g[2]);
|
||||
bld.MUL( y, g[3], g[4]);
|
||||
bld.AND(g[5], x, y);
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
lower_scoreboard(v);
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
|
||||
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
|
||||
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_regdist(1));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, RAW_inorder_outoforder)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
bld.ADD( x, g[1], g[2]);
|
||||
bld.MUL( g[3], g[4], g[5]);
|
||||
emit_SEND(bld, g[6], g[7], x);
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
lower_scoreboard(v);
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
|
||||
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
|
||||
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(2, 0, TGL_SBID_SET));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, RAW_outoforder_inorder)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
fs_reg y = v->vgrf(glsl_int_type());
|
||||
emit_SEND(bld, x, g[1], g[2]);
|
||||
bld.MUL( y, g[3], g[4]);
|
||||
bld.AND( g[5], x, y);
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
lower_scoreboard(v);
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
|
||||
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
|
||||
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(1, 0, TGL_SBID_DST));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, RAW_outoforder_outoforder)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
/* The second SEND depends on the first, and would need to refer to two
|
||||
* SBIDs. Since it is not possible we expect a SYNC instruction to be
|
||||
* added.
|
||||
*/
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
emit_SEND(bld, x, g[1], g[2]);
|
||||
emit_SEND(bld, g[3], x, g[4])->sfid++;
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(1, block0->end_ip);
|
||||
|
||||
lower_scoreboard(v);
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
|
||||
|
||||
fs_inst *sync = instruction(block0, 1);
|
||||
EXPECT_EQ(sync->opcode, BRW_OPCODE_SYNC);
|
||||
EXPECT_EQ(sync->sched, tgl_swsb_sbid(TGL_SBID_DST, 0));
|
||||
|
||||
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_sbid(TGL_SBID_SET, 1));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, WAR_inorder_inorder)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
bld.ADD(g[1], x, g[2]);
|
||||
bld.MUL(g[3], g[4], g[5]);
|
||||
bld.AND( x, g[6], g[7]);
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
lower_scoreboard(v);
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
|
||||
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
|
||||
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_null());
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, WAR_inorder_outoforder)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
bld.ADD( g[1], x, g[2]);
|
||||
bld.MUL( g[3], g[4], g[5]);
|
||||
emit_SEND(bld, x, g[6], g[7]);
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
lower_scoreboard(v);
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
|
||||
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
|
||||
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(2, 0, TGL_SBID_SET));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, WAR_outoforder_inorder)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
emit_SEND(bld, g[1], g[2], x);
|
||||
bld.MUL( g[4], g[5], g[6]);
|
||||
bld.AND( x, g[7], g[8]);
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
lower_scoreboard(v);
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
|
||||
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
|
||||
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_sbid(TGL_SBID_SRC, 0));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, WAR_outoforder_outoforder)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
emit_SEND(bld, g[1], g[2], x);
|
||||
emit_SEND(bld, x, g[3], g[4])->sfid++;
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(1, block0->end_ip);
|
||||
|
||||
lower_scoreboard(v);
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
|
||||
|
||||
fs_inst *sync = instruction(block0, 1);
|
||||
EXPECT_EQ(sync->opcode, BRW_OPCODE_SYNC);
|
||||
EXPECT_EQ(sync->sched, tgl_swsb_sbid(TGL_SBID_SRC, 0));
|
||||
|
||||
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_sbid(TGL_SBID_SET, 1));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, WAW_inorder_inorder)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
bld.ADD( x, g[1], g[2]);
|
||||
bld.MUL(g[3], g[4], g[5]);
|
||||
bld.AND( x, g[6], g[7]);
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
lower_scoreboard(v);
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
|
||||
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
|
||||
|
||||
/* NOTE: We only need this RegDist if a long instruction is followed by a
|
||||
* short one. The pass is currently conservative about this and adding the
|
||||
* annotation.
|
||||
*/
|
||||
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_regdist(2));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, WAW_inorder_outoforder)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
bld.ADD( x, g[1], g[2]);
|
||||
bld.MUL( g[3], g[4], g[5]);
|
||||
emit_SEND(bld, x, g[6], g[7]);
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
lower_scoreboard(v);
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
|
||||
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
|
||||
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_testcase(2, 0, TGL_SBID_SET));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, WAW_outoforder_inorder)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
emit_SEND(bld, x, g[1], g[2]);
|
||||
bld.MUL( g[3], g[4], g[5]);
|
||||
bld.AND( x, g[6], g[7]);
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
lower_scoreboard(v);
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
|
||||
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
|
||||
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_sbid(TGL_SBID_DST, 0));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, WAW_outoforder_outoforder)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
emit_SEND(bld, x, g[1], g[2]);
|
||||
emit_SEND(bld, x, g[3], g[4])->sfid++;
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(1, block0->end_ip);
|
||||
|
||||
lower_scoreboard(v);
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_sbid(TGL_SBID_SET, 0));
|
||||
|
||||
fs_inst *sync = instruction(block0, 1);
|
||||
EXPECT_EQ(sync->opcode, BRW_OPCODE_SYNC);
|
||||
EXPECT_EQ(sync->sched, tgl_swsb_sbid(TGL_SBID_DST, 0));
|
||||
|
||||
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_sbid(TGL_SBID_SET, 1));
|
||||
}
|
||||
|
||||
|
||||
TEST_F(scoreboard_test, loop1)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
bld.XOR( x, g[1], g[2]);
|
||||
|
||||
bld.emit(BRW_OPCODE_DO);
|
||||
|
||||
bld.ADD( x, g[1], g[2]);
|
||||
bld.emit(BRW_OPCODE_WHILE)->predicate = BRW_PREDICATE_NORMAL;
|
||||
|
||||
bld.MUL( x, g[1], g[2]);
|
||||
|
||||
v->calculate_cfg();
|
||||
lower_scoreboard(v);
|
||||
|
||||
bblock_t *body = v->cfg->blocks[2];
|
||||
fs_inst *add = instruction(body, 0);
|
||||
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
|
||||
EXPECT_EQ(add->sched, tgl_swsb_regdist(1));
|
||||
|
||||
bblock_t *last_block = v->cfg->blocks[3];
|
||||
fs_inst *mul = instruction(last_block, 0);
|
||||
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
|
||||
EXPECT_EQ(mul->sched, tgl_swsb_regdist(1));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, loop2)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
bld.XOR( x, g[1], g[2]);
|
||||
bld.XOR(g[3], g[1], g[2]);
|
||||
bld.XOR(g[4], g[1], g[2]);
|
||||
bld.XOR(g[5], g[1], g[2]);
|
||||
|
||||
bld.emit(BRW_OPCODE_DO);
|
||||
|
||||
bld.ADD( x, g[1], g[2]);
|
||||
bld.emit(BRW_OPCODE_WHILE)->predicate = BRW_PREDICATE_NORMAL;
|
||||
|
||||
bld.MUL( x, g[1], g[2]);
|
||||
|
||||
v->calculate_cfg();
|
||||
lower_scoreboard(v);
|
||||
|
||||
/* Now the write in ADD has the tightest RegDist for both ADD and MUL. */
|
||||
|
||||
bblock_t *body = v->cfg->blocks[2];
|
||||
fs_inst *add = instruction(body, 0);
|
||||
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
|
||||
EXPECT_EQ(add->sched, tgl_swsb_regdist(2));
|
||||
|
||||
bblock_t *last_block = v->cfg->blocks[3];
|
||||
fs_inst *mul = instruction(last_block, 0);
|
||||
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
|
||||
EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, loop3)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
bld.XOR( x, g[1], g[2]);
|
||||
|
||||
bld.emit(BRW_OPCODE_DO);
|
||||
|
||||
/* For the ADD in the loop body this extra distance will always apply. */
|
||||
bld.XOR(g[3], g[1], g[2]);
|
||||
bld.XOR(g[4], g[1], g[2]);
|
||||
bld.XOR(g[5], g[1], g[2]);
|
||||
bld.XOR(g[6], g[1], g[2]);
|
||||
|
||||
bld.ADD( x, g[1], g[2]);
|
||||
bld.emit(BRW_OPCODE_WHILE)->predicate = BRW_PREDICATE_NORMAL;
|
||||
|
||||
bld.MUL( x, g[1], g[2]);
|
||||
|
||||
v->calculate_cfg();
|
||||
lower_scoreboard(v);
|
||||
|
||||
bblock_t *body = v->cfg->blocks[2];
|
||||
fs_inst *add = instruction(body, 4);
|
||||
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
|
||||
EXPECT_EQ(add->sched, tgl_swsb_regdist(5));
|
||||
|
||||
bblock_t *last_block = v->cfg->blocks[3];
|
||||
fs_inst *mul = instruction(last_block, 0);
|
||||
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
|
||||
EXPECT_EQ(mul->sched, tgl_swsb_regdist(1));
|
||||
}
|
||||
|
||||
|
||||
TEST_F(scoreboard_test, conditional1)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
bld.XOR( x, g[1], g[2]);
|
||||
bld.emit(BRW_OPCODE_IF);
|
||||
|
||||
bld.ADD( x, g[1], g[2]);
|
||||
|
||||
bld.emit(BRW_OPCODE_ENDIF);
|
||||
bld.MUL( x, g[1], g[2]);
|
||||
|
||||
v->calculate_cfg();
|
||||
lower_scoreboard(v);
|
||||
|
||||
bblock_t *body = v->cfg->blocks[1];
|
||||
fs_inst *add = instruction(body, 0);
|
||||
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
|
||||
EXPECT_EQ(add->sched, tgl_swsb_regdist(2));
|
||||
|
||||
bblock_t *last_block = v->cfg->blocks[2];
|
||||
fs_inst *mul = instruction(last_block, 1);
|
||||
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
|
||||
EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, conditional2)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
bld.XOR( x, g[1], g[2]);
|
||||
bld.XOR(g[3], g[1], g[2]);
|
||||
bld.XOR(g[4], g[1], g[2]);
|
||||
bld.XOR(g[5], g[1], g[2]);
|
||||
bld.emit(BRW_OPCODE_IF);
|
||||
|
||||
bld.ADD( x, g[1], g[2]);
|
||||
|
||||
bld.emit(BRW_OPCODE_ENDIF);
|
||||
bld.MUL( x, g[1], g[2]);
|
||||
|
||||
v->calculate_cfg();
|
||||
lower_scoreboard(v);
|
||||
|
||||
bblock_t *body = v->cfg->blocks[1];
|
||||
fs_inst *add = instruction(body, 0);
|
||||
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
|
||||
EXPECT_EQ(add->sched, tgl_swsb_regdist(5));
|
||||
|
||||
bblock_t *last_block = v->cfg->blocks[2];
|
||||
fs_inst *mul = instruction(last_block, 1);
|
||||
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
|
||||
EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, conditional3)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
bld.XOR( x, g[1], g[2]);
|
||||
bld.emit(BRW_OPCODE_IF);
|
||||
|
||||
bld.XOR(g[3], g[1], g[2]);
|
||||
bld.XOR(g[4], g[1], g[2]);
|
||||
bld.XOR(g[5], g[1], g[2]);
|
||||
bld.ADD( x, g[1], g[2]);
|
||||
|
||||
bld.emit(BRW_OPCODE_ENDIF);
|
||||
bld.MUL( x, g[1], g[2]);
|
||||
|
||||
v->calculate_cfg();
|
||||
lower_scoreboard(v);
|
||||
|
||||
bblock_t *body = v->cfg->blocks[1];
|
||||
fs_inst *add = instruction(body, 3);
|
||||
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
|
||||
EXPECT_EQ(add->sched, tgl_swsb_regdist(5));
|
||||
|
||||
bblock_t *last_block = v->cfg->blocks[2];
|
||||
fs_inst *mul = instruction(last_block, 1);
|
||||
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
|
||||
EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, conditional4)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
bld.XOR( x, g[1], g[2]);
|
||||
bld.emit(BRW_OPCODE_IF);
|
||||
|
||||
bld.ADD( x, g[1], g[2]);
|
||||
bld.XOR(g[3], g[1], g[2]);
|
||||
bld.XOR(g[4], g[1], g[2]);
|
||||
bld.XOR(g[5], g[1], g[2]);
|
||||
|
||||
bld.emit(BRW_OPCODE_ENDIF);
|
||||
bld.MUL( x, g[1], g[2]);
|
||||
|
||||
v->calculate_cfg();
|
||||
lower_scoreboard(v);
|
||||
|
||||
bblock_t *body = v->cfg->blocks[1];
|
||||
fs_inst *add = instruction(body, 0);
|
||||
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
|
||||
EXPECT_EQ(add->sched, tgl_swsb_regdist(2));
|
||||
|
||||
bblock_t *last_block = v->cfg->blocks[2];
|
||||
fs_inst *mul = instruction(last_block, 1);
|
||||
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
|
||||
EXPECT_EQ(mul->sched, tgl_swsb_regdist(3));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, conditional5)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
bld.XOR( x, g[1], g[2]);
|
||||
bld.emit(BRW_OPCODE_IF);
|
||||
|
||||
bld.ADD( x, g[1], g[2]);
|
||||
bld.emit(BRW_OPCODE_ELSE);
|
||||
|
||||
bld.ROL( x, g[1], g[2]);
|
||||
|
||||
bld.emit(BRW_OPCODE_ENDIF);
|
||||
bld.MUL( x, g[1], g[2]);
|
||||
|
||||
v->calculate_cfg();
|
||||
lower_scoreboard(v);
|
||||
|
||||
bblock_t *then_body = v->cfg->blocks[1];
|
||||
fs_inst *add = instruction(then_body, 0);
|
||||
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
|
||||
EXPECT_EQ(add->sched, tgl_swsb_regdist(2));
|
||||
|
||||
bblock_t *else_body = v->cfg->blocks[2];
|
||||
fs_inst *rol = instruction(else_body, 0);
|
||||
EXPECT_EQ(rol->opcode, BRW_OPCODE_ROL);
|
||||
EXPECT_EQ(rol->sched, tgl_swsb_regdist(2));
|
||||
|
||||
bblock_t *last_block = v->cfg->blocks[3];
|
||||
fs_inst *mul = instruction(last_block, 1);
|
||||
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
|
||||
EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, conditional6)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
bld.XOR( x, g[1], g[2]);
|
||||
bld.emit(BRW_OPCODE_IF);
|
||||
|
||||
bld.XOR(g[3], g[1], g[2]);
|
||||
bld.XOR(g[4], g[1], g[2]);
|
||||
bld.XOR(g[5], g[1], g[2]);
|
||||
bld.ADD( x, g[1], g[2]);
|
||||
bld.emit(BRW_OPCODE_ELSE);
|
||||
|
||||
bld.XOR(g[6], g[1], g[2]);
|
||||
bld.XOR(g[7], g[1], g[2]);
|
||||
bld.XOR(g[8], g[1], g[2]);
|
||||
bld.XOR(g[9], g[1], g[2]);
|
||||
bld.ROL( x, g[1], g[2]);
|
||||
|
||||
bld.emit(BRW_OPCODE_ENDIF);
|
||||
bld.MUL( x, g[1], g[2]);
|
||||
|
||||
v->calculate_cfg();
|
||||
lower_scoreboard(v);
|
||||
|
||||
bblock_t *then_body = v->cfg->blocks[1];
|
||||
fs_inst *add = instruction(then_body, 3);
|
||||
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
|
||||
EXPECT_EQ(add->sched, tgl_swsb_regdist(5));
|
||||
|
||||
bblock_t *else_body = v->cfg->blocks[2];
|
||||
fs_inst *rol = instruction(else_body, 4);
|
||||
EXPECT_EQ(rol->opcode, BRW_OPCODE_ROL);
|
||||
EXPECT_EQ(rol->sched, tgl_swsb_regdist(6));
|
||||
|
||||
bblock_t *last_block = v->cfg->blocks[3];
|
||||
fs_inst *mul = instruction(last_block, 1);
|
||||
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
|
||||
EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, conditional7)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
bld.XOR( x, g[1], g[2]);
|
||||
bld.emit(BRW_OPCODE_IF);
|
||||
|
||||
bld.ADD( x, g[1], g[2]);
|
||||
bld.XOR(g[3], g[1], g[2]);
|
||||
bld.XOR(g[4], g[1], g[2]);
|
||||
bld.XOR(g[5], g[1], g[2]);
|
||||
bld.emit(BRW_OPCODE_ELSE);
|
||||
|
||||
bld.ROL( x, g[1], g[2]);
|
||||
bld.XOR(g[6], g[1], g[2]);
|
||||
bld.XOR(g[7], g[1], g[2]);
|
||||
bld.XOR(g[8], g[1], g[2]);
|
||||
bld.XOR(g[9], g[1], g[2]);
|
||||
|
||||
bld.emit(BRW_OPCODE_ENDIF);
|
||||
bld.MUL( x, g[1], g[2]);
|
||||
|
||||
v->calculate_cfg();
|
||||
lower_scoreboard(v);
|
||||
|
||||
bblock_t *then_body = v->cfg->blocks[1];
|
||||
fs_inst *add = instruction(then_body, 0);
|
||||
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
|
||||
EXPECT_EQ(add->sched, tgl_swsb_regdist(2));
|
||||
|
||||
bblock_t *else_body = v->cfg->blocks[2];
|
||||
fs_inst *rol = instruction(else_body, 0);
|
||||
EXPECT_EQ(rol->opcode, BRW_OPCODE_ROL);
|
||||
EXPECT_EQ(rol->sched, tgl_swsb_regdist(2));
|
||||
|
||||
bblock_t *last_block = v->cfg->blocks[3];
|
||||
fs_inst *mul = instruction(last_block, 1);
|
||||
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
|
||||
EXPECT_EQ(mul->sched, tgl_swsb_regdist(6));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, conditional8)
|
||||
{
|
||||
fs_reg g[16];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(g); i++)
|
||||
g[i] = v->vgrf(glsl_int_type());
|
||||
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
bld.XOR( x, g[1], g[2]);
|
||||
bld.XOR(g[3], g[1], g[2]);
|
||||
bld.XOR(g[4], g[1], g[2]);
|
||||
bld.XOR(g[5], g[1], g[2]);
|
||||
bld.XOR(g[6], g[1], g[2]);
|
||||
bld.XOR(g[7], g[1], g[2]);
|
||||
bld.emit(BRW_OPCODE_IF);
|
||||
|
||||
bld.ADD( x, g[1], g[2]);
|
||||
bld.emit(BRW_OPCODE_ELSE);
|
||||
|
||||
bld.ROL( x, g[1], g[2]);
|
||||
|
||||
bld.emit(BRW_OPCODE_ENDIF);
|
||||
bld.MUL( x, g[1], g[2]);
|
||||
|
||||
v->calculate_cfg();
|
||||
lower_scoreboard(v);
|
||||
|
||||
bblock_t *then_body = v->cfg->blocks[1];
|
||||
fs_inst *add = instruction(then_body, 0);
|
||||
EXPECT_EQ(add->opcode, BRW_OPCODE_ADD);
|
||||
EXPECT_EQ(add->sched, tgl_swsb_regdist(7));
|
||||
|
||||
/* Note that the ROL will have RegDist 2 and not 7, illustrating the
|
||||
* physical CFG edge between the then-block and the else-block.
|
||||
*/
|
||||
bblock_t *else_body = v->cfg->blocks[2];
|
||||
fs_inst *rol = instruction(else_body, 0);
|
||||
EXPECT_EQ(rol->opcode, BRW_OPCODE_ROL);
|
||||
EXPECT_EQ(rol->sched, tgl_swsb_regdist(2));
|
||||
|
||||
bblock_t *last_block = v->cfg->blocks[3];
|
||||
fs_inst *mul = instruction(last_block, 1);
|
||||
EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL);
|
||||
EXPECT_EQ(mul->sched, tgl_swsb_regdist(2));
|
||||
}
|
||||
|
||||
TEST_F(scoreboard_test, gfx125_RaR_over_different_pipes)
|
||||
{
|
||||
devinfo->verx10 = 125;
|
||||
brw_init_isa_info(&compiler->isa, devinfo);
|
||||
|
||||
fs_reg a = v->vgrf(glsl_int_type());
|
||||
fs_reg b = v->vgrf(glsl_int_type());
|
||||
fs_reg f = v->vgrf(glsl_float_type());
|
||||
fs_reg x = v->vgrf(glsl_int_type());
|
||||
|
||||
bld.ADD(f, x, x);
|
||||
bld.ADD(a, x, x);
|
||||
bld.ADD(x, b, b);
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
lower_scoreboard(v);
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
|
||||
EXPECT_EQ(instruction(block0, 1)->sched, tgl_swsb_null());
|
||||
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_regdist(1));
|
||||
}
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
and(1) cr0<1>UD cr0<0,1,0>UD 0xfffffb7fUD { align1 1N switch };
|
||||
and(1) cr0<1>UD cr0<0,1,0>UD 0xffffff7fUD { align1 1N switch };
|
||||
and(1) cr0<1>UD cr0<0,1,0>UD 0xffffffcfUD { align1 1N switch };
|
||||
and(1) cr0<1>UD cr0<0,1,0>UD 0xfffffbffUD { align1 1N switch };
|
||||
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000400UD { align1 1N switch };
|
||||
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000030UD { align1 1N switch };
|
||||
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000080UD { align1 1N switch };
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
05 80 00 00 00 00 00 30 00 10 00 06 7f fb ff ff
|
||||
05 80 00 00 00 00 00 30 00 10 00 06 7f ff ff ff
|
||||
05 80 00 00 00 00 00 30 00 10 00 06 cf ff ff ff
|
||||
05 80 00 00 00 00 00 30 00 10 00 06 ff fb ff ff
|
||||
06 80 00 00 00 00 00 30 00 10 00 06 00 04 00 00
|
||||
06 80 00 00 00 00 00 30 00 10 00 06 30 00 00 00
|
||||
06 80 00 00 00 00 00 30 00 10 00 06 80 00 00 00
|
||||
|
|
@ -1 +0,0 @@
|
|||
rol(16) g3<1>UD g2<0,1,0>UD g2.1<0,1,0>UD { align1 1H };
|
||||
|
|
@ -1 +0,0 @@
|
|||
0f 00 80 00 08 02 60 20 40 00 00 02 44 00 00 00
|
||||
|
|
@ -1 +0,0 @@
|
|||
ror(16) g3<1>UD g2<0,1,0>UD g2.1<0,1,0>UD { align1 1H };
|
||||
|
|
@ -1 +0,0 @@
|
|||
0e 00 80 00 08 02 60 20 40 00 00 02 44 00 00 00
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
add3(8) g118<1>D -g117<8,8,1>D g114<8,8,1>D g115<1,1,1>D { align1 1Q I@2 };
|
||||
add3(16) g55<1>D g50<8,8,1>D g46<8,8,1>D -g53<1,1,1>D { align1 1H @2 $5.dst };
|
||||
add3(16) g111<1>D -g40<8,8,1>D -g88<8,8,1>D g111<1,1,1>D { align1 1H I@1 };
|
||||
add3(16) g49<1>D 0x0008UW g47<8,8,1>D g26<1,1,1>D { align1 1H I@4 };
|
||||
add3(16) g55<1>D 0x0008UW g53<8,8,1>D g65<1,1,1>D { align1 2H I@3 };
|
||||
add3(8) g57<1>D g52<8,8,1>D (abs)g48<8,8,1>D (abs)g59<1,1,1>D { align1 1Q I@4 };
|
||||
add3(16) g51<1>D g63<8,8,1>D -g122<8,8,1>D (abs)g27<1,1,1>D { align1 1H I@7 };
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
52 1a 03 00 68 2e 04 76 05 75 0e 0e 05 72 05 73
|
||||
52 a5 04 00 68 0e 04 37 05 32 2e 0e 05 2e 05 35
|
||||
52 19 04 00 68 2e 04 6f 05 28 8e 0e 05 58 05 6f
|
||||
52 1c 04 00 60 41 04 31 08 00 0e 0e 05 2f 05 1a
|
||||
52 1b 24 00 60 41 04 37 08 00 0e 0e 05 35 05 41
|
||||
52 1c 03 00 68 0e 04 39 05 34 5e 0e 05 30 05 3b
|
||||
52 1f 04 00 68 0e 04 33 05 3f 9e 0e 05 7a 05 1b
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
(+f0.0.any8h) send(1) g57UD g58UD nullUD 0x6210c500 0x02000000
|
||||
ugm MsgDesc: ( load, a32, d32, V8, transpose, L1STATE_L3MOCS dst_len = 1, src0_len = 1, src1_len = 0 bti ) BTI 2 base_offset 0 { align1 WE_all 1N $5 };
|
||||
(+f0.0.any8h) send(1) g28UD g29UD nullUD 0x6210c500 0x02000000
|
||||
ugm MsgDesc: ( load, a32, d32, V8, transpose, L1STATE_L3MOCS dst_len = 1, src0_len = 1, src1_len = 0 bti ) BTI 2 base_offset 0 { align1 WE_all 1N $2 };
|
||||
(+f0.0.any32h) send(1) g57UD g58UD nullUD 0x6210c500 0x02000000
|
||||
ugm MsgDesc: ( load, a32, d32, V8, transpose, L1STATE_L3MOCS dst_len = 1, src0_len = 1, src1_len = 0 bti ) BTI 2 base_offset 0 { align1 WE_all 1N $0 };
|
||||
send(8) nullUD g79UD g10UD 0x6200f506 0x04000100
|
||||
ugm MsgDesc: ( store_cmask, a32, d32, xyzw, L1STATE_L3MOCS dst_len = 0, src0_len = 1, src1_len = 4 bti ) BTI 4 base_offset 0 { align1 1Q $0 };
|
||||
send(16) nullUD g9UD g7UD 0x44000504 a0.1<0>UD
|
||||
ugm MsgDesc: ( store, a32, d32, V1, L1STATE_L3MOCS dst_len = 0, src0_len = 2, src1_len = 0 ss ) surface_state_index 0 { align1 1H @1 $0 };
|
||||
send(1) g4UD g0UD nullUD 0x0210151f 0x00000000
|
||||
tgm MsgDesc: ( fence, a32, tile, evict, normal_routing dst_len = 1, src0_len = 1, src1_len = 0 flat ) base_offset 0 { align1 WE_all 1N $3 };
|
||||
send(8) nullUD g36UD g37UD 0x02000b04 0x00000040
|
||||
slm MsgDesc: ( store, a32, d16u32, V1, L1STATE_L3MOCS dst_len = 0, src0_len = 1, src1_len = 1 flat ) base_offset 0 { align1 1Q $1 };
|
||||
send(8) nullUD g34UD g35UD 0x02000b04 0x00000040
|
||||
slm MsgDesc: ( store, a32, d16u32, V1, L1STATE_L3MOCS dst_len = 0, src0_len = 1, src1_len = 1 flat ) base_offset 0 { align1 1Q $0 };
|
||||
send(8) nullUD g6UD g7UD 0x0200f506 0x00000100
|
||||
slm MsgDesc: ( store_cmask, a32, d32, xyzw, L1STATE_L3MOCS dst_len = 0, src0_len = 1, src1_len = 4 flat ) base_offset 0 { align1 1Q $6 };
|
||||
send(16) nullUD g82UD g91UD 0x04040519 0x00000080
|
||||
slm MsgDesc: ( atomic_or, a32, d32, V1, L1UC_L3WB dst_len = 0, src0_len = 2, src1_len = 2 flat ) base_offset 0 { align1 2H $0 };
|
||||
send(1) g10UD g0UD nullUD 0x0210011f 0x00000000
|
||||
slm MsgDesc: ( fence, a32, threadgroup, none, normal_routing dst_len = 1, src0_len = 1, src1_len = 0 flat ) base_offset 0 { align1 WE_all 1N $1 };
|
||||
send(1) g23UD g117UD nullUD 0x2210c500 a0.1<0>UD
|
||||
ugm MsgDesc: ( load, a32, d32, V8, transpose, L1STATE_L3MOCS dst_len = 1, src0_len = 1, bss ) src1_len = 0 ex_bso surface_state_index 0 { align1 WE_all 1N @1 $10 };
|
||||
send(8) nullUD g14UD g24UD 0x040350fc a0.1<0>UD
|
||||
dp data 1 MsgDesc: (DC typed surface write, Surface = 252, SIMD16, Mask = 0x0) src1_len = 4 ex_bso mlen 2 rlen 0 { align1 1Q @1 $5 };
|
||||
send(8) nullUD g51UD g52UD 0x02000000 0x00000040
|
||||
rt accel MsgDesc: SIMD8, mlen 1 ex_mlen 1 rlen 0 { align1 1Q $2 };
|
||||
send(16) nullUD g88UD g98UD 0x02000100 0x00000080
|
||||
rt accel MsgDesc: SIMD16, mlen 1 ex_mlen 2 rlen 0 { align1 1H $6 };
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
31 45 00 88 00 00 0c 39 8e 3a 00 fa 00 00 30 04
|
||||
31 42 00 88 00 00 0c 1c 8e 1d 00 fa 00 00 30 04
|
||||
31 40 00 8c 00 00 0c 39 8e 3a 00 fa 00 00 30 04
|
||||
31 40 03 00 00 00 00 00 8c 4f 0c fa 25 0a 3c 04
|
||||
31 90 04 00 00 01 02 00 14 09 08 fa 04 07 00 04
|
||||
31 43 00 80 00 00 0c 04 0c 00 3e da 00 00 04 00
|
||||
31 41 03 00 00 00 00 00 0c 24 08 e6 0c 25 02 00
|
||||
31 40 03 00 00 00 00 00 0c 22 08 e6 0c 23 02 00
|
||||
31 46 03 00 00 00 00 00 0c 06 0c ea 24 07 3c 00
|
||||
31 40 24 00 00 00 00 00 14 52 32 ea 14 5b 00 01
|
||||
31 41 00 80 00 00 0c 0a 0c 00 3e e2 00 00 00 00
|
||||
31 9a 00 80 80 01 0e 17 8c 75 00 fa 00 00 30 00
|
||||
31 95 03 00 80 01 02 00 14 0e f8 c1 24 18 d4 00
|
||||
31 42 03 00 00 00 00 00 0c 33 00 80 0c 34 00 00
|
||||
31 46 04 00 00 00 00 00 0c 58 00 82 14 62 00 00
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
mul(8) g37<1>D g99<8,8,1>D g36<16,8,2>UW { align1 1Q I@1 };
|
||||
mul(8) g37<1>D g99<8,8,1>D g36<16,8,2>UW { align1 1Q I@2 };
|
||||
mul(8) g37<1>D g99<8,8,1>D g36<16,8,2>UW { align1 1Q I@3 };
|
||||
mul(8) g37<1>D g99<8,8,1>D g36<16,8,2>UW { align1 1Q I@4 };
|
||||
mul(8) g37<1>D g99<8,8,1>D g36<16,8,2>UW { align1 1Q I@5 };
|
||||
mul(8) g37<1>D g99<8,8,1>D g36<16,8,2>UW { align1 1Q I@6 };
|
||||
mul(8) g37<1>D g99<8,8,1>D g36<16,8,2>UW { align1 1Q I@7 };
|
||||
|
||||
mov(8) g36<1>UD g35<8,8,1>F { align1 1Q F@1 };
|
||||
mov(8) g36<1>UD g35<8,8,1>F { align1 1Q F@2 };
|
||||
mov(8) g36<1>UD g35<8,8,1>F { align1 1Q F@3 };
|
||||
mov(8) g36<1>UD g35<8,8,1>F { align1 1Q F@4 };
|
||||
mov(8) g36<1>UD g35<8,8,1>F { align1 1Q F@5 };
|
||||
mov(8) g36<1>UD g35<8,8,1>F { align1 1Q F@6 };
|
||||
mov(8) g36<1>UD g35<8,8,1>F { align1 1Q F@7 };
|
||||
|
||||
add(1) a0<1>UD a0<0,1,0>UD 0x00000800UD { align1 WE_all 1N A@1 };
|
||||
add(1) a0<1>UD a0<0,1,0>UD 0x00000800UD { align1 WE_all 1N A@2 };
|
||||
add(1) a0<1>UD a0<0,1,0>UD 0x00000800UD { align1 WE_all 1N A@3 };
|
||||
add(1) a0<1>UD a0<0,1,0>UD 0x00000800UD { align1 WE_all 1N A@4 };
|
||||
add(1) a0<1>UD a0<0,1,0>UD 0x00000800UD { align1 WE_all 1N A@5 };
|
||||
add(1) a0<1>UD a0<0,1,0>UD 0x00000800UD { align1 WE_all 1N A@6 };
|
||||
add(1) a0<1>UD a0<0,1,0>UD 0x00000800UD { align1 WE_all 1N A@7 };
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
41 19 03 00 60 06 05 25 05 63 46 01 06 24 56 00
|
||||
41 1a 03 00 60 06 05 25 05 63 46 01 06 24 56 00
|
||||
41 1b 03 00 60 06 05 25 05 63 46 01 06 24 56 00
|
||||
41 1c 03 00 60 06 05 25 05 63 46 01 06 24 56 00
|
||||
41 1d 03 00 60 06 05 25 05 63 46 01 06 24 56 00
|
||||
41 1e 03 00 60 06 05 25 05 63 46 01 06 24 56 00
|
||||
41 1f 03 00 60 06 05 25 05 63 46 01 06 24 56 00
|
||||
61 11 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
|
||||
61 12 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
|
||||
61 13 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
|
||||
61 14 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
|
||||
61 15 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
|
||||
61 16 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
|
||||
61 17 03 00 20 0a 05 24 05 23 46 00 00 00 00 00
|
||||
40 09 00 80 20 82 01 10 00 10 00 02 00 08 00 00
|
||||
40 0a 00 80 20 82 01 10 00 10 00 02 00 08 00 00
|
||||
40 0b 00 80 20 82 01 10 00 10 00 02 00 08 00 00
|
||||
40 0c 00 80 20 82 01 10 00 10 00 02 00 08 00 00
|
||||
40 0d 00 80 20 82 01 10 00 10 00 02 00 08 00 00
|
||||
40 0e 00 80 20 82 01 10 00 10 00 02 00 08 00 00
|
||||
40 0f 00 80 20 82 01 10 00 10 00 02 00 08 00 00
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
dp4a(8) g10<1>D g2<8,8,1>D g6<8,8,1>D g7<1,1,1>D { align1 1Q @1 };
|
||||
dp4a(8) g10<1>D g2<8,8,1>D g6<8,8,1>D g7<1,1,1>UD { align1 1Q @1 };
|
||||
dp4a(8) g10<1>D g2<8,8,1>D g8<8,8,1>D g9<1,1,1>D { align1 1Q @1 };
|
||||
dp4a(8) g10<1>D g2<8,8,1>D g8<8,8,1>D g9<1,1,1>UD { align1 1Q @1 };
|
||||
dp4a(8) g10<1>UD g2<8,8,1>UD g6<8,8,1>UD g7<1,1,1>UD { align1 1Q @1 };
|
||||
dp4a(8) g10<1>UD g2<8,8,1>UD g8<8,8,1>UD g9<1,1,1>UD { align1 1Q @1 };
|
||||
dp4a(8) g5<1>D g2<8,8,1>D g3<8,8,1>D g4<1,1,1>D { align1 1Q @3 $0.dst };
|
||||
dp4a(8) g5<1>D g2<8,8,1>D g3<8,8,1>D g4<1,1,1>UD { align1 1Q @3 $0.dst };
|
||||
dp4a(8) g5<1>UD g2<8,8,1>UD g3<8,8,1>UD g4<1,1,1>UD { align1 1Q @3 $0.dst };
|
||||
dp4a(8) g6<1>D g2<8,8,1>D g3<8,8,1>D g4<1,1,1>D { align1 1Q @4 $1.dst };
|
||||
dp4a(8) g6<1>D g2<8,8,1>D g3<8,8,1>D g4<1,1,1>UD { align1 1Q @4 $1.dst };
|
||||
dp4a(8) g6<1>D g2<8,8,1>D g4<8,8,1>D g5<1,1,1>D { align1 1Q @4 $0.dst };
|
||||
dp4a(8) g6<1>D g2<8,8,1>D g4<8,8,1>D g5<1,1,1>UD { align1 1Q @4 $0.dst };
|
||||
dp4a(8) g6<1>UD g2<8,8,1>UD g3<8,8,1>UD g4<1,1,1>UD { align1 1Q @4 $1.dst };
|
||||
dp4a(8) g6<1>UD g2<8,8,1>UD g4<8,8,1>UD g5<1,1,1>UD { align1 1Q @4 $0.dst };
|
||||
dp4a(8) g7<1>D g2<8,8,1>D g5<8,8,1>D g6<1,1,1>D { align1 1Q @1 };
|
||||
dp4a(8) g7<1>D g2<8,8,1>D g5<8,8,1>D g6<1,1,1>UD { align1 1Q @1 };
|
||||
dp4a(8) g7<1>UD g2<8,8,1>UD g5<8,8,1>UD g6<1,1,1>UD { align1 1Q @1 };
|
||||
dp4a(8) g8<1>D g2<8,8,1>D g4<8,8,1>D g5<1,1,1>D { align1 1Q @3 $0.dst };
|
||||
dp4a(8) g8<1>D g2<8,8,1>D g4<8,8,1>D g5<1,1,1>D { align1 1Q @4 $0.dst };
|
||||
dp4a(8) g8<1>D g2<8,8,1>D g4<8,8,1>D g5<1,1,1>UD { align1 1Q @3 $0.dst };
|
||||
dp4a(8) g8<1>D g2<8,8,1>D g4<8,8,1>D g5<1,1,1>UD { align1 1Q @4 $0.dst };
|
||||
dp4a(8) g8<1>D g2<8,8,1>D g6<8,8,1>D g7<1,1,1>D { align1 1Q @1 };
|
||||
dp4a(8) g8<1>D g2<8,8,1>D g6<8,8,1>D g7<1,1,1>UD { align1 1Q @1 };
|
||||
dp4a(8) g8<1>UD g2<8,8,1>UD g4<8,8,1>UD g5<1,1,1>UD { align1 1Q @3 $0.dst };
|
||||
dp4a(8) g8<1>UD g2<8,8,1>UD g4<8,8,1>UD g5<1,1,1>UD { align1 1Q @4 $0.dst };
|
||||
dp4a(8) g8<1>UD g2<8,8,1>UD g6<8,8,1>UD g7<1,1,1>UD { align1 1Q @1 };
|
||||
dp4a.sat(8) g10<1>D g5<8,8,1>D g6<8,8,1>D g7<1,1,1>D { align1 1Q @1 $2.dst };
|
||||
dp4a.sat(8) g10<1>D g5<8,8,1>D g6<8,8,1>D g7<1,1,1>UD { align1 1Q @1 $2.dst };
|
||||
dp4a.sat(8) g10<1>UD g5<8,8,1>UD g6<8,8,1>UD g7<1,1,1>UD { align1 1Q @1 $2.dst };
|
||||
dp4a.sat(8) g8<1>D g5<8,8,1>D g3<8,8,1>D g4<1,1,1>D { align1 1Q $2.dst };
|
||||
dp4a.sat(8) g8<1>D g5<8,8,1>D g3<8,8,1>D g4<1,1,1>UD { align1 1Q $2.dst };
|
||||
dp4a.sat(8) g8<1>UD g5<8,8,1>UD g3<8,8,1>UD g4<1,1,1>UD { align1 1Q $2.dst };
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
58 01 03 00 68 0e 04 0a 05 02 0e 0e 05 06 05 07
|
||||
58 01 03 00 68 0e 04 0a 05 02 0a 0e 05 06 05 07
|
||||
58 01 03 00 68 0e 04 0a 05 02 0e 0e 05 08 05 09
|
||||
58 01 03 00 68 0e 04 0a 05 02 0a 0e 05 08 05 09
|
||||
58 01 03 00 28 0a 04 0a 05 02 0a 0a 05 06 05 07
|
||||
58 01 03 00 28 0a 04 0a 05 02 0a 0a 05 08 05 09
|
||||
58 b0 03 00 68 0e 04 05 05 02 0e 0e 05 03 05 04
|
||||
58 b0 03 00 68 0e 04 05 05 02 0a 0e 05 03 05 04
|
||||
58 b0 03 00 28 0a 04 05 05 02 0a 0a 05 03 05 04
|
||||
58 c1 03 00 68 0e 04 06 05 02 0e 0e 05 03 05 04
|
||||
58 c1 03 00 68 0e 04 06 05 02 0a 0e 05 03 05 04
|
||||
58 c0 03 00 68 0e 04 06 05 02 0e 0e 05 04 05 05
|
||||
58 c0 03 00 68 0e 04 06 05 02 0a 0e 05 04 05 05
|
||||
58 c1 03 00 28 0a 04 06 05 02 0a 0a 05 03 05 04
|
||||
58 c0 03 00 28 0a 04 06 05 02 0a 0a 05 04 05 05
|
||||
58 01 03 00 68 0e 04 07 05 02 0e 0e 05 05 05 06
|
||||
58 01 03 00 68 0e 04 07 05 02 0a 0e 05 05 05 06
|
||||
58 01 03 00 28 0a 04 07 05 02 0a 0a 05 05 05 06
|
||||
58 b0 03 00 68 0e 04 08 05 02 0e 0e 05 04 05 05
|
||||
58 c0 03 00 68 0e 04 08 05 02 0e 0e 05 04 05 05
|
||||
58 b0 03 00 68 0e 04 08 05 02 0a 0e 05 04 05 05
|
||||
58 c0 03 00 68 0e 04 08 05 02 0a 0e 05 04 05 05
|
||||
58 01 03 00 68 0e 04 08 05 02 0e 0e 05 06 05 07
|
||||
58 01 03 00 68 0e 04 08 05 02 0a 0e 05 06 05 07
|
||||
58 b0 03 00 28 0a 04 08 05 02 0a 0a 05 04 05 05
|
||||
58 c0 03 00 28 0a 04 08 05 02 0a 0a 05 04 05 05
|
||||
58 01 03 00 28 0a 04 08 05 02 0a 0a 05 06 05 07
|
||||
58 92 03 00 6c 0e 04 0a 05 05 0e 0e 05 06 05 07
|
||||
58 92 03 00 6c 0e 04 0a 05 05 0a 0e 05 06 05 07
|
||||
58 92 03 00 2c 0a 04 0a 05 05 0a 0a 05 06 05 07
|
||||
58 22 03 00 6c 0e 04 08 05 05 0e 0e 05 03 05 04
|
||||
58 22 03 00 6c 0e 04 08 05 05 0a 0e 05 03 05 04
|
||||
58 22 03 00 2c 0a 04 08 05 05 0a 0a 05 03 05 04
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
send(16) g113UD g12UD nullUD a0<0>UD 0x00000000
|
||||
dp data 1 MsgDesc: indirect ex_mlen 0 { align1 1H @1 $6 };
|
||||
(+f1.0) send(16) nullUD g15UD g17UD a0<0>UD 0x00000080
|
||||
dp data 1 MsgDesc: indirect ex_mlen 2 { align1 1H @1 $4 };
|
||||
send(8) g104UD g119UD nullUD 0x04116e13 0x00000000
|
||||
dp data 1 MsgDesc: (DC typed surface read, Surface = 19, SIMD8, Mask = 0xe) mlen 2 ex_mlen 0 rlen 1 { align1 2Q $8 };
|
||||
send(8) nullUD g92UD g117UD 0x020350fc a0.1<0>UD
|
||||
dp data 1 MsgDesc: (DC typed surface write, Surface = 252, SIMD16, Mask = 0x0) mlen 1 rlen 0 { align1 1Q @1 $8 };
|
||||
(+f0.0.any8h) send(8) g55UD g118UD nullUD 0x02184201 0x00000000
|
||||
data MsgDesc: (DC unaligned OWORD block read, bti 1, 2) mlen 1 ex_mlen 0 rlen 1 { align1 WE_all 1Q @3 $9 };
|
||||
send(8) nullUD g126UD nullUD 0x02000000 0x00000000
|
||||
thread_spawner MsgDesc: mlen 1 ex_mlen 0 rlen 0 { align1 WE_all 1Q @1 EOT };
|
||||
send(8) g18UD g24UD nullUD 0x04115e10 0x00000000
|
||||
dp data 1 MsgDesc: (DC typed surface read, Surface = 16, SIMD16, Mask = 0xe) mlen 2 ex_mlen 0 rlen 1 { align1 1Q $1 };
|
||||
send(8) g19UD g28UD nullUD 0x04116e10 0x00000000
|
||||
dp data 1 MsgDesc: (DC typed surface read, Surface = 16, SIMD8, Mask = 0xe) mlen 2 ex_mlen 0 rlen 1 { align1 2Q @7 $2 };
|
||||
send(16) g50UD g36UD nullUD a0<0>UD 0x00000000
|
||||
sampler MsgDesc: indirect ex_mlen 0 { align1 1H @1 $3 };
|
||||
send(8) nullUD g25UD g21UD 0x02035001 0x00000100
|
||||
dp data 1 MsgDesc: (DC typed surface write, Surface = 1, SIMD16, Mask = 0x0) mlen 1 ex_mlen 4 rlen 0 { align1 1Q $9 };
|
||||
send(8) g5UD g25UD nullUD 0x02415001 0x00000000
|
||||
dp data 1 MsgDesc: (DC typed surface read, Surface = 1, SIMD16, Mask = 0x0) mlen 1 ex_mlen 0 rlen 4 { align1 1Q $10 };
|
||||
send(8) g27UD g35UD nullUD 0x04146efd 0x00000000
|
||||
dp data 1 MsgDesc: (DC A64 untyped surface read, Surface = 253, SIMD8, Mask = 0xe) mlen 2 ex_mlen 0 rlen 1 { align1 1Q @1 $0 };
|
||||
send(8) nullUD g36UD g38UD 0x04035001 0x00000100
|
||||
dp data 1 MsgDesc: (DC typed surface write, Surface = 1, SIMD16, Mask = 0x0) mlen 2 ex_mlen 4 rlen 0 { align1 1Q @1 $1 };
|
||||
send(8) nullUD g126UD g118UD 0x02080007 0x00000200
|
||||
urb MsgDesc: offset 0 SIMD8 write mlen 1 ex_mlen 8 rlen 0 { align1 1Q @1 EOT };
|
||||
send(8) g14UD g37UD nullUD 0x02110401 0x00000000
|
||||
data MsgDesc: (DC byte scattered read, bti 1, 4) mlen 1 ex_mlen 0 rlen 1 { align1 1Q @1 $0 };
|
||||
send(1) g100UD g0UD nullUD 0x0219e000 0x00000000
|
||||
data MsgDesc: (DC mfence, bti 0, 32) mlen 1 ex_mlen 0 rlen 1 { align1 WE_all 1N $1 };
|
||||
send(1) g15UD g0UD nullUD 0x0219e000 0x00000000
|
||||
data MsgDesc: (DC mfence, bti 0, 32) mlen 1 ex_mlen 0 rlen 1 { align1 WE_all 1N $5 };
|
||||
|
||||
sendc(16) nullUD g119UD nullUD 0x10031000 0x00000000
|
||||
render MsgDesc: RT write SIMD16 LastRT Surface = 0 mlen 8 ex_mlen 0 rlen 0 { align1 1H @1 EOT };
|
||||
sendc(8) nullUD g125UD g123UD 0x04031400 0x00000080
|
||||
render MsgDesc: RT write SIMD8 LastRT Surface = 0 mlen 2 ex_mlen 2 rlen 0 { align1 1Q @1 EOT };
|
||||
sendc(16) nullUD g119UD nullUD 0x10031000 0x00000000
|
||||
render MsgDesc: RT write SIMD16 LastRT Surface = 0 mlen 8 ex_mlen 0 rlen 0 { align1 1H @1 EOT };
|
||||
sendc(16) nullUD g123UD g119UD 0x08031000 0x00000100
|
||||
render MsgDesc: RT write SIMD16 LastRT Surface = 0 mlen 4 ex_mlen 4 rlen 0 { align1 1H @1 EOT };
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
31 96 04 00 00 00 05 71 04 0c 00 c0 00 00 00 00
|
||||
31 94 84 01 00 00 01 00 04 0f 00 c0 14 11 00 00
|
||||
31 48 13 00 00 00 0c 68 14 77 26 cc 00 00 5a 00
|
||||
31 98 03 00 00 01 02 00 0c 5c f8 c1 04 75 d4 00
|
||||
31 b9 03 88 00 00 0c 37 0c 76 02 a4 00 00 10 02
|
||||
31 01 03 80 04 00 00 00 0c 7e 00 70 00 00 00 00
|
||||
31 41 03 00 00 00 0c 12 14 18 20 cc 00 00 56 00
|
||||
31 f2 13 00 00 00 0c 13 14 1c 20 cc 00 00 5a 00
|
||||
31 93 04 00 00 00 05 32 04 24 00 20 00 00 00 00
|
||||
31 49 03 00 00 00 00 00 0c 19 02 c0 24 15 d4 00
|
||||
31 4a 03 00 00 00 24 05 0c 19 02 c0 00 00 54 00
|
||||
31 90 03 00 00 00 0c 1b 14 23 fa cd 00 00 1a 01
|
||||
31 91 03 00 00 00 00 00 14 24 02 c0 24 26 d4 00
|
||||
31 01 03 00 04 00 00 00 0c 7e 0e 60 44 76 00 02
|
||||
31 90 03 00 00 00 0c 0e 0c 25 02 a8 00 00 40 00
|
||||
31 41 00 80 00 00 0c 64 0c 00 00 a0 00 00 78 02
|
||||
31 45 00 80 00 00 0c 0f 0c 00 00 a0 00 00 78 02
|
||||
32 01 04 00 04 00 00 00 44 77 00 50 00 00 c4 00
|
||||
32 01 03 00 04 00 00 00 14 7d 00 58 14 7b c4 00
|
||||
32 01 04 00 04 00 00 00 44 77 00 50 00 00 c4 00
|
||||
32 01 04 00 04 00 00 00 24 7b 00 50 24 77 c4 00
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
cmp.l.f0.0(8) g55<1>UD g54<8,8,1>UD 0x00000290UD { align1 1Q @1 };
|
||||
mov(16) g6<1>D g20<8,8,1>W { align1 2H @2 };
|
||||
add(16) g122<1>F g98<8,8,1>F (abs)g102<8,8,1>F { align1 1H @3 };
|
||||
shl(8) g75<1>D g122<8,8,1>D 0x00000002UD { align1 1Q @4 };
|
||||
sel.l(4) g90.4<1>D g90.3<0,1,0>D g90.4<4,4,1>D { align1 WE_all 1N @5 };
|
||||
and(16) g58<1>UD g16<8,8,1>UD g56<8,8,1>UD { align1 1H @6 };
|
||||
or.nz.f0.0(16) null<1>UD g105<8,8,1>UD g103<8,8,1>UD { align1 1H @7 };
|
||||
|
||||
math cos(16) g17<1>F g15<8,8,1>F null<8,8,1>F { align1 1H @1 $0 };
|
||||
math exp(16) g1<1>F g29<8,8,1>F null<8,8,1>F { align1 1H @5 $2 };
|
||||
math sqrt(8) g9<1>HF g6<8,8,1>HF null<8,8,1>F { align1 1Q @1 $3 };
|
||||
math intdiv(8) g103<1>D g101<8,8,1>D g35<8,8,1>D { align1 1Q @4 $4 };
|
||||
math intmod(8) g101<1>D g97<8,8,1>D g76<8,8,1>D { align1 2Q @2 $5 };
|
||||
math inv(16) g10<1>F g8<8,8,1>F null<8,8,1>F { align1 2H @2 $6 };
|
||||
math log(16) g102<1>F g100<8,8,1>F null<8,8,1>F { align1 2H @1 $7 };
|
||||
math rsq(16) g76<1>F g74<8,8,1>F null<8,8,1>F { align1 1H @7 $8 };
|
||||
math sin(16) g123<1>F g121<8,8,1>F null<8,8,1>F { align1 1H @4 $9 };
|
||||
math sqrt(16) g43<1>F g47<8,8,1>F null<8,8,1>F { align1 2H @7 $10 };
|
||||
math cos(8) g103<1>HF g98<8,8,1>HF null<8,8,1>F { align1 1Q @3 $11 };
|
||||
math exp(8) g54<1>HF g52<8,8,1>HF null<8,8,1>F { align1 1Q @1 $12 };
|
||||
math intdiv(8) g35<1>D g31<8,8,1>D g33<8,8,1>D { align1 4Q @2 $13 };
|
||||
math intmod(8) g101<1>D g97<8,8,1>D g99<8,8,1>D { align1 2Q @4 $14 };
|
||||
math inv(8) g102<1>HF g92<8,8,1>HF null<8,8,1>F { align1 1Q @6 $15 };
|
||||
|
||||
sel.ge(16) g7<1>UW g7<16,16,1>UW g89<16,8,2>UW { align1 1H @7 $0.dst };
|
||||
mov(16) a0<1>UW 0x03e0UW { align1 WE_all 1H @3 $1.dst };
|
||||
add(16) g100<1>D g102<8,8,1>D -2114D { align1 1H @3 $2.dst };
|
||||
add(16) g100<1>D g105<8,8,1>D (abs)g18<8,8,1>D { align1 1H @3 $3.dst };
|
||||
add(16) g36<1>D g36<8,8,1>D g106<8,8,1>D { align1 1H @7 $4.dst };
|
||||
and(16) g49<1>UD g45<8,8,1>UD g47<8,8,1>UD { align1 1H @3 $5.dst };
|
||||
asr(16) g102<2>W g41<16,8,2>W g28<8,8,1>UD { align1 2H @6 $6.dst };
|
||||
cmp.l.f0.0(8) g97<1>F (abs)g96<8,8,1>F 0x3d4ccccdF /* 0.05F */ { align1 1Q @3 $7.dst };
|
||||
cmp.nz.f0.0(8) g100<1>F g98<8,8,1>F g99<8,8,1>F { align1 1Q @1 $8.dst };
|
||||
(+f0.0) sel(8) g64<1>D -g15<8,8,1>D g15<8,8,1>D { align1 1Q @1 $9.dst };
|
||||
mov(16) g15<1>UD g13<8,8,1>D { align1 1H @1 $10.dst };
|
||||
mul(8) acc0<1>UD g10<8,4,2>UD g101<16,8,2>UW { align1 1Q @7 $11.dst };
|
||||
or(16) g51<1>UW g51<16,16,1>UW g75<16,8,2>UW { align1 1H @7 $12.dst };
|
||||
sel.ge(16) g28<1>W g28<16,16,1>W g92<16,8,2>W { align1 2H @7 $13.dst };
|
||||
xor(16) g10<1>UD g10<8,8,1>UD g100<8,8,1>UD { align1 1H @7 $14.dst };
|
||||
and(16) g39<1>UD g35<8,8,1>UD g37<8,8,1>UD { align1 2H @5 $15.dst };
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
70 01 03 00 20 82 05 37 05 36 46 52 90 02 00 00
|
||||
61 02 24 00 60 05 05 06 05 14 46 00 00 00 00 00
|
||||
40 03 04 00 a0 0a 05 7a 05 62 46 0a 05 66 46 01
|
||||
69 04 03 00 60 86 05 4b 05 7a 46 02 02 00 00 00
|
||||
62 05 02 80 60 06 85 5a 64 5a 00 56 85 5a 34 00
|
||||
65 06 04 00 20 02 05 3a 05 10 46 02 05 38 46 00
|
||||
66 07 04 00 20 02 01 00 05 69 46 22 05 67 46 00
|
||||
38 90 04 00 a0 0a 05 11 05 0f 46 7a 01 00 46 00
|
||||
38 d2 04 00 a0 0a 05 01 05 1d 46 3a 01 00 46 00
|
||||
38 93 03 00 90 09 05 09 05 06 46 4a 01 00 46 00
|
||||
38 c4 03 00 60 06 05 67 05 65 46 c6 05 23 46 00
|
||||
38 a5 13 00 60 06 05 65 05 61 46 d6 05 4c 46 00
|
||||
38 a6 24 00 a0 0a 05 0a 05 08 46 1a 01 00 46 00
|
||||
38 97 24 00 a0 0a 05 66 05 64 46 2a 01 00 46 00
|
||||
38 f8 04 00 a0 0a 05 4c 05 4a 46 5a 01 00 46 00
|
||||
38 c9 04 00 a0 0a 05 7b 05 79 46 6a 01 00 46 00
|
||||
38 fa 24 00 a0 0a 05 2b 05 2f 46 4a 01 00 46 00
|
||||
38 bb 03 00 90 09 05 67 05 62 46 7a 01 00 46 00
|
||||
38 9c 03 00 90 09 05 36 05 34 46 3a 01 00 46 00
|
||||
38 ad 33 00 60 06 05 23 05 1f 46 c6 05 21 46 00
|
||||
38 ce 13 00 60 06 05 65 05 61 46 d6 05 63 46 00
|
||||
38 ef 03 00 90 09 05 66 05 5c 46 1a 01 00 46 00
|
||||
62 f0 04 00 10 01 05 07 05 07 58 41 06 59 56 00
|
||||
61 b1 04 80 10 41 01 10 00 00 00 00 e0 03 e0 03
|
||||
40 b2 04 00 60 86 05 64 05 66 46 06 be f7 ff ff
|
||||
40 b3 04 00 60 06 05 64 05 69 46 06 05 12 46 01
|
||||
40 f4 04 00 60 06 05 24 05 24 46 06 05 6a 46 00
|
||||
65 b5 04 00 20 02 05 31 05 2d 46 02 05 2f 46 00
|
||||
6c e6 24 00 50 05 06 66 06 29 56 02 05 1c 46 00
|
||||
70 b7 03 00 a0 9a 05 61 05 60 46 5a cd cc 4c 3d
|
||||
70 98 03 00 a0 0a 05 64 05 62 46 2a 05 63 46 00
|
||||
62 99 03 01 60 26 05 40 05 0f 46 06 05 0f 46 00
|
||||
61 9a 04 00 20 06 05 0f 05 0d 46 00 00 00 00 00
|
||||
41 fb 03 00 20 02 01 20 06 0a 44 01 06 65 56 00
|
||||
66 fc 04 00 10 01 05 33 05 33 58 01 06 4b 56 00
|
||||
62 fd 24 00 50 05 05 1c 05 1c 58 45 06 5c 56 00
|
||||
67 fe 04 00 20 02 05 0a 05 0a 46 02 05 64 46 00
|
||||
65 df 24 00 20 02 05 27 05 23 46 02 05 25 46 00
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
sync nop(16) null<0,1,0>UB { align1 WE_all 1H @1 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 1N @1 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 1N @2 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 1N @3 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 1N @4 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 1N @5 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 1N @6 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 1N @7 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 3N @1 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 3N @2 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 3N @3 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 3N @4 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 3N @5 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 3N @6 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 3N @7 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 5N @1 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 5N @2 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 5N @3 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 5N @4 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 5N @5 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 5N @6 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 5N @7 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 7N @1 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 7N @2 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 7N @3 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 7N @4 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 7N @5 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 7N @6 };
|
||||
sync nop(1) null<0,1,0>UB { align1 WE_all 7N @7 };
|
||||
sync nop(32) null<0,1,0>UB { align1 WE_all @1 };
|
||||
sync nop(8) null<0,1,0>UB { align1 WE_all 1Q @1 };
|
||||
sync allwr(16) null<0,1,0>UB { align1 1H };
|
||||
sync allwr(8) null<0,1,0>UB { align1 1Q };
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
01 01 04 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 01 00 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 02 00 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 03 00 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 04 00 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 05 00 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 06 00 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 07 00 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 01 10 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 02 10 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 03 10 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 04 10 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 05 10 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 06 10 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 07 10 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 01 20 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 02 20 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 03 20 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 04 20 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 05 20 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 06 20 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 07 20 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 01 30 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 02 30 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 03 30 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 04 30 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 05 30 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 06 30 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 07 30 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 01 05 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 01 03 80 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
01 00 04 00 00 00 00 00 00 00 00 30 00 00 00 00
|
||||
01 00 03 00 00 00 00 00 00 00 00 30 00 00 00 00
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
add(8) g124<1>F g7<8,8,1>D 1D { align1 1Q };
|
||||
add(16) g120<1>F g11<8,8,1>D 1D { align1 1H };
|
||||
add(16) g4<1>F g1<0,1,0>F -g1.4<0,1,0>F { align1 1H };
|
||||
add(8) g3.8<1>UW g3<8,8,1>UW 0x0008UW { align1 WE_all 1Q };
|
||||
add(16) g3<1>D g18<8,8,1>D g12<8,8,1>D { align1 1H };
|
||||
add(16) g6<1>UW g1.4<1,4,0>UW 0x11001010V { align1 WE_all 1H };
|
||||
add(32) g10<1>UW g1.4<1,4,0>UW 0x11001010V { align1 WE_all };
|
||||
add(8) g2<1>D g96<8,8,1>D -1023D { align1 1Q };
|
||||
add(8) g4<1>F g5.6<0,1,0>F g7.2<0,1,0>F { align1 1Q };
|
||||
add(8) g53<1>DF g49<4,4,1>DF g51<4,4,1>DF { align1 1Q };
|
||||
add.sat(16) g5<1>UD g3<8,8,1>UD 0x00000001UD { align1 1H };
|
||||
add(1) g125.3<1>UD g0.3<0,1,0>UD g7<0,1,0>UD { align1 WE_all 1N };
|
||||
add(8) a0<1>UW g34<16,8,2>UW 0x0080UW { align1 1Q };
|
||||
add(8) g8<1>DF g2<0,1,0>DF g3.2<0,1,0>DF { align1 2Q };
|
||||
add(16) a0<1>UW g3<16,8,2>UW 0x0040UW { align1 1H };
|
||||
add.sat.le.f0.0(8) g125<1>F -g6<8,8,1>F 0x3f000000F /* 0.5F */ { align1 1Q };
|
||||
add.z.f0.0(8) g8<1>F g2<0,1,0>F -g2.4<0,1,0>F { align1 1Q };
|
||||
add.z.f0.0(16) g3<1>F g2<0,1,0>F -g2.1<0,1,0>F { align1 1H };
|
||||
add(8) g3<1>UD g2<8,8,1>UD 0xffffffffUD { align1 1Q };
|
||||
(+f0.0) add(8) g15<1>D -g15<8,8,1>D 31D { align1 1Q };
|
||||
add(1) a0<1>UD a0<0,1,0>UD 0x00000200UD { align1 WE_all 1N };
|
||||
add.sat(8) g124<1>F g7<8,8,1>F -g6<8,8,1>F { align1 1Q };
|
||||
add(8) g8<1>UD g6<8,8,1>D 0x00000001UD { align1 1Q };
|
||||
add(16) g11<1>UD g9<8,8,1>D 0x00000001UD { align1 1H };
|
||||
(+f0.0) add(16) g8<1>D -g8<8,8,1>D 31D { align1 1H };
|
||||
add.sat(16) g126<1>F g2<0,1,0>F g2.4<0,1,0>F { align1 1H };
|
||||
add.sat(8) g124<1>F g17<8,8,1>D 1D { align1 1Q };
|
||||
add(16) g114<1>D g118<8,8,1>D g116<8,8,1>D { align1 2H };
|
||||
add.z.f0.0(16) null<1>D g120<8,8,1>D 1D { align1 1H };
|
||||
add.z.f0.0(16) null<1>D g116<8,8,1>D 1D { align1 2H };
|
||||
add.z.f0.0(8) g3<1>D g5<8,8,1>D g4<8,8,1>D { align1 1Q };
|
||||
add(16) g20<1>UD g17<8,8,1>UD 1D { align1 1H };
|
||||
add(8) g7<1>F -g6<4>.xyxyF g6<4>.zwzwF { align16 1Q };
|
||||
add(16) g9<1>F -g7<4>.xyxyF g7<4>.zwzwF { align16 1H };
|
||||
add(8) g7<1>UD g2<8,8,1>UD -g6<8,8,1>UD { align1 WE_all 1Q };
|
||||
add.le.f0.0(16) g1<1>D g3.1<0,1,0>D -g6<8,8,1>D { align1 1H };
|
||||
add.sat(8) g10<1>UD g9<8,8,1>UD 0x00000001UD { align1 1Q };
|
||||
add(1) g14<1>UD g14<0,1,0>UD 0x00000001UD { align1 WE_all 3N };
|
||||
add(8) g25<1>Q g22<4,4,1>Q -g24<4,4,1>Q { align1 1Q };
|
||||
add(8) g12<1>Q g5<4,4,1>Q -g11<4,4,1>Q { align1 2Q };
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
40 00 60 00 e8 0a 80 2f e0 00 8d 0e 01 00 00 00
|
||||
40 00 80 00 e8 0a 00 2f 60 01 8d 0e 01 00 00 00
|
||||
40 00 80 00 e8 3a 80 20 20 00 00 3a 30 40 00 00
|
||||
40 00 60 00 4c 12 70 20 60 00 8d 16 08 00 08 00
|
||||
40 00 80 00 28 0a 60 20 40 02 8d 0a 80 01 8d 00
|
||||
40 00 80 00 4c 12 c0 20 28 00 28 36 10 10 00 11
|
||||
40 00 a0 00 4c 12 40 21 28 00 28 36 10 10 00 11
|
||||
40 00 60 00 28 0a 40 20 00 0c 8d 0e 01 fc ff ff
|
||||
40 00 60 00 e8 3a 80 20 b8 00 00 3a e8 00 00 00
|
||||
40 00 60 00 c8 32 a0 26 20 06 69 32 60 06 69 00
|
||||
40 00 80 80 08 02 a0 20 60 00 8d 06 01 00 00 00
|
||||
40 00 00 00 0c 02 ac 2f 0c 00 00 02 e0 00 00 00
|
||||
40 00 60 00 40 12 00 22 40 04 ae 16 80 00 80 00
|
||||
40 10 60 00 c8 32 00 21 40 00 00 32 70 00 00 00
|
||||
40 00 80 00 40 12 00 22 60 00 ae 16 40 00 40 00
|
||||
40 00 60 86 e8 3a a0 2f c0 40 8d 3e 00 00 00 3f
|
||||
40 00 60 01 e8 3a 00 21 40 00 00 3a 50 40 00 00
|
||||
40 00 80 01 e8 3a 60 20 40 00 00 3a 44 40 00 00
|
||||
40 00 60 00 08 02 60 20 40 00 8d 06 ff ff ff ff
|
||||
40 00 61 00 28 0a e0 21 e0 41 8d 0e 1f 00 00 00
|
||||
40 00 00 00 04 00 00 22 00 02 00 06 00 02 00 00
|
||||
40 00 60 80 e8 3a 80 2f e0 00 8d 3a c0 40 8d 00
|
||||
40 00 60 00 08 0a 00 21 c0 00 8d 06 01 00 00 00
|
||||
40 00 80 00 08 0a 60 21 20 01 8d 06 01 00 00 00
|
||||
40 00 81 00 28 0a 00 21 00 41 8d 0e 1f 00 00 00
|
||||
40 00 80 80 e8 3a c0 2f 40 00 00 3a 50 00 00 00
|
||||
40 00 60 80 e8 0a 80 2f 20 02 8d 0e 01 00 00 00
|
||||
40 20 80 00 28 0a 40 2e c0 0e 8d 0a 80 0e 8d 00
|
||||
40 00 80 01 20 0a 00 20 00 0f 8d 0e 01 00 00 00
|
||||
40 20 80 01 20 0a 00 20 80 0e 8d 0e 01 00 00 00
|
||||
40 00 60 01 28 0a 60 20 a0 00 8d 0a 80 00 8d 00
|
||||
40 00 80 00 08 02 80 22 20 02 8d 0e 01 00 00 00
|
||||
40 01 60 00 e8 3a ef 20 c4 40 64 3a ce 00 6e 00
|
||||
40 01 80 00 e8 3a 2f 21 e4 40 64 3a ee 00 6e 00
|
||||
40 00 60 00 0c 02 e0 20 40 00 8d 02 c0 40 8d 00
|
||||
40 00 80 06 28 0a 20 20 64 00 00 0a c0 40 8d 00
|
||||
40 00 60 80 08 02 40 21 20 01 8d 06 01 00 00 00
|
||||
40 10 00 00 0c 02 c0 21 c0 01 00 06 01 00 00 00
|
||||
40 00 60 00 28 4b 20 23 c0 02 69 4a 00 43 69 00
|
||||
40 10 60 00 28 4b 80 21 a0 00 69 4a 60 41 69 00
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
and(8) g3<1>UD g2<0,1,0>UD ~g2.2<0,1,0>D { align1 1Q };
|
||||
and(16) g3<1>UD g2<0,1,0>UD ~g2.2<0,1,0>D { align1 1H };
|
||||
and(8) g8<1>UD g0.1<0,1,0>UW 0x07ffUW { align1 1Q };
|
||||
and(16) g18<1>UD g0.1<0,1,0>UW 0x07ffUW { align1 1H };
|
||||
and(1) g7<1>UD g5<0,1,0>UD 0x000000f0UD { align1 WE_all 1N };
|
||||
and.nz.f0.0(8) null<1>UD g36<8,8,1>UD g37<8,8,1>UD { align1 1Q };
|
||||
and.nz.f0.0(16) null<1>UD g70<8,8,1>UD g72<8,8,1>UD { align1 1H };
|
||||
and.z.f0.0(16) g21<1>UD g19<8,8,1>UD g17<8,8,1>UD { align1 1H };
|
||||
and(8) g61<1>UD g79<8,8,1>UD g32.1<8,4,2>UD { align1 2Q };
|
||||
and(8) g96<1>D ~g94<8,8,1>D ~g95<8,8,1>D { align1 1Q };
|
||||
and(16) g24<1>D ~g20<8,8,1>D ~g22<8,8,1>D { align1 1H };
|
||||
and(1) a0<1>UD g4<0,1,0>UD 0x000000ffUD { align1 WE_all 1N };
|
||||
and(16) g118<1>UD g114<8,8,1>UD 0x0000003fUD { align1 2H };
|
||||
and(1) g4<1>UD g20<0,1,0>UD 0x000000ffUD { align1 WE_all 3N };
|
||||
and.z.f0.0(8) null<1>D g13<8,8,1>UD 0x0000001fUD { align1 1Q };
|
||||
and(8) g21<1>UD g15<8,8,1>UD 0x00000003UD { align1 WE_all 1Q };
|
||||
and.z.f0.0(8) null<1>UD g20<8,8,1>UD 0x00000001UD { align1 1Q };
|
||||
and.z.f0.0(16) null<1>UD g45<8,8,1>UD 0x00000001UD { align1 1H };
|
||||
and(8) g4<1>UW g3<8,8,1>UW 0xfffcUW { align1 1Q };
|
||||
and(16) g13<1>UW g19<16,8,2>UW 0xfffcUW { align1 1H };
|
||||
and.nz.f0.0(8) null<1>UD ~g2.2<0,1,0>D g9<8,8,1>UD { align1 1Q };
|
||||
and(8) g18<1>UD ~g2.2<0,1,0>D g7<8,8,1>UD { align1 1Q };
|
||||
and.nz.f0.0(16) null<1>UD ~g2.2<0,1,0>D g14<8,8,1>UD { align1 1H };
|
||||
and(16) g30<1>UD ~g2.2<0,1,0>D g10<8,8,1>UD { align1 1H };
|
||||
and.nz.f0.0(8) g10<1>UD g9<8,8,1>UD 0x00000001UD { align1 1Q };
|
||||
and.nz.f0.0(16) g16<1>UD g14<8,8,1>UD 0x00000001UD { align1 1H };
|
||||
and.z.f0.0(8) g9<1>UD g8<8,8,1>UD 0x00000003UD { align1 1Q };
|
||||
and(8) g12<1>UQ g9<4,4,1>UQ g11<4,4,1>UQ { align1 1Q };
|
||||
and(8) g26<1>UQ g18<4,4,1>UQ g22<4,4,1>UQ { align1 2Q };
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
05 00 60 00 08 02 60 20 40 00 00 0a 48 40 00 00
|
||||
05 00 80 00 08 02 60 20 40 00 00 0a 48 40 00 00
|
||||
05 00 60 00 08 12 00 21 02 00 00 16 ff 07 ff 07
|
||||
05 00 80 00 08 12 40 22 02 00 00 16 ff 07 ff 07
|
||||
05 00 00 00 0c 02 e0 20 a0 00 00 06 f0 00 00 00
|
||||
05 00 60 02 00 02 00 20 80 04 8d 02 a0 04 8d 00
|
||||
05 00 80 02 00 02 00 20 c0 08 8d 02 00 09 8d 00
|
||||
05 00 80 01 08 02 a0 22 60 02 8d 02 20 02 8d 00
|
||||
05 10 60 00 08 02 a0 27 e0 09 8d 02 04 04 8a 00
|
||||
05 00 60 00 28 0a 00 2c c0 4b 8d 0a e0 4b 8d 00
|
||||
05 00 80 00 28 0a 00 23 80 42 8d 0a c0 42 8d 00
|
||||
05 00 00 00 04 02 00 22 80 00 00 06 ff 00 00 00
|
||||
05 20 80 00 08 02 c0 2e 40 0e 8d 06 3f 00 00 00
|
||||
05 10 00 00 0c 02 80 20 80 02 00 06 ff 00 00 00
|
||||
05 00 60 01 20 02 00 20 a0 01 8d 06 1f 00 00 00
|
||||
05 00 60 00 0c 02 a0 22 e0 01 8d 06 03 00 00 00
|
||||
05 00 60 01 00 02 00 20 80 02 8d 06 01 00 00 00
|
||||
05 00 80 01 00 02 00 20 a0 05 8d 06 01 00 00 00
|
||||
05 00 60 00 48 12 80 20 60 00 8d 16 fc ff fc ff
|
||||
05 00 80 00 48 12 a0 21 60 02 ae 16 fc ff fc ff
|
||||
05 00 60 02 00 0a 00 20 48 40 00 02 20 01 8d 00
|
||||
05 00 60 00 08 0a 40 22 48 40 00 02 e0 00 8d 00
|
||||
05 00 80 02 00 0a 00 20 48 40 00 02 c0 01 8d 00
|
||||
05 00 80 00 08 0a c0 23 48 40 00 02 40 01 8d 00
|
||||
05 00 60 02 08 02 40 21 20 01 8d 06 01 00 00 00
|
||||
05 00 80 02 08 02 00 22 c0 01 8d 06 01 00 00 00
|
||||
05 00 60 01 08 02 20 21 00 01 8d 06 03 00 00 00
|
||||
05 00 60 00 08 43 80 21 20 01 69 42 60 01 69 00
|
||||
05 10 60 00 08 43 40 23 40 02 69 42 c0 02 69 00
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
asr(8) g19<1>D g7<8,8,1>D 0x00000001UD { align1 1Q };
|
||||
asr(16) g20<1>D g2.7<0,1,0>D 0x0000001fUD { align1 1H };
|
||||
asr.nz.f0.0(8) null<1>D -g0<0,1,0>W 15D { align1 1Q };
|
||||
asr.nz.f0.0(16) null<1>D -g0<0,1,0>W 15D { align1 1H };
|
||||
asr(8) g2<1>D -g0<0,1,0>W 15D { align1 1Q };
|
||||
asr(16) g2<1>D -g0<0,1,0>W 15D { align1 1H };
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
0c 00 60 00 28 0a 60 22 e0 00 8d 06 01 00 00 00
|
||||
0c 00 80 00 28 0a 80 22 5c 00 00 06 1f 00 00 00
|
||||
0c 00 60 02 20 1a 00 20 00 40 00 0e 0f 00 00 00
|
||||
0c 00 80 02 20 1a 00 20 00 40 00 0e 0f 00 00 00
|
||||
0c 00 60 00 28 1a 40 20 00 40 00 0e 0f 00 00 00
|
||||
0c 00 80 00 28 1a 40 20 00 40 00 0e 0f 00 00 00
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
bfe(8) g96<1>UD g89<4,4,1>UD g30<4,4,1>UD g91<4,4,1>UD { align16 1Q };
|
||||
bfe(16) g13<1>UD g44<4,4,1>UD g115<4,4,1>UD g126<4,4,1>UD { align16 1H };
|
||||
bfe(8) g18<1>D g17<4,4,1>D g16<4,4,1>D g49<4,4,1>D { align16 1Q };
|
||||
bfe(16) g13<1>D g11<4,4,1>D g42<4,4,1>D g5<4,4,1>D { align16 1H };
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
18 01 60 00 00 90 1e 60 c8 91 05 39 3c 20 c7 16
|
||||
18 01 80 00 00 90 1e 0d c8 c1 02 39 e6 20 87 1f
|
||||
18 01 60 00 00 48 1e 12 c8 11 01 39 20 20 47 0c
|
||||
18 01 80 00 00 48 1e 0d c8 b1 00 39 54 20 47 01
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
bfi1(8) g20<1>UD g19<8,8,1>D g18<8,8,1>D { align1 1Q };
|
||||
bfi1(16) g16<1>UD g14<8,8,1>D g12<8,8,1>D { align1 1H };
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
19 00 60 00 08 0a 80 22 60 02 8d 0a 40 02 8d 00
|
||||
19 00 80 00 08 0a 00 22 c0 01 8d 0a 80 01 8d 00
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
bfi2(8) g31<1>UD g88<4,4,1>UD g90<4,4,1>UD g91<4,4,1>UD { align16 1Q };
|
||||
bfi2(16) g5<1>UD g42<4,4,1>UD g40<4,4,1>UD g126<4,4,1>UD { align16 1H };
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
1a 01 60 00 00 90 1e 1f c8 81 05 39 b4 20 c7 16
|
||||
1a 01 80 00 00 90 1e 05 c8 a1 02 39 50 20 87 1f
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
bfrev(8) g5<1>UD g5<8,8,1>UD { align1 1Q };
|
||||
bfrev(16) g6<1>UD g8<8,8,1>UD { align1 1H };
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
17 00 60 00 08 02 a0 20 a0 00 8d 00 00 00 00 00
|
||||
17 00 80 00 08 02 c0 20 00 01 8d 00 00 00 00 00
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
break(8) JIP: LABEL0 UIP: LABEL1 { align1 1Q };
|
||||
break(16) JIP: LABEL0 UIP: LABEL1 { align1 1H };
|
||||
LABEL0:
|
||||
(+f0.0) break(8) JIP: LABEL1 UIP: LABEL1 { align1 1Q };
|
||||
(+f0.0) break(16) JIP: LABEL1 UIP: LABEL1 { align1 1H };
|
||||
LABEL1:
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
28 00 60 00 20 0e 00 20 40 00 00 00 20 00 00 00
|
||||
28 00 80 00 20 0e 00 20 30 00 00 00 10 00 00 00
|
||||
28 00 61 00 20 0e 00 20 20 00 00 00 20 00 00 00
|
||||
28 00 81 00 20 0e 00 20 10 00 00 00 10 00 00 00
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
cbit(8) g9<1>UD g31<8,8,1>UD { align1 1Q };
|
||||
cbit(16) g6<1>UD g8<8,8,1>UD { align1 1H };
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
4d 00 60 00 08 02 20 21 e0 03 8d 00 00 00 00 00
|
||||
4d 00 80 00 08 02 c0 20 00 01 8d 00 00 00 00 00
|
||||
|
|
@ -1,104 +0,0 @@
|
|||
cmp.z.f0.0(8) null<1>F g20<8,8,1>F 0xbf800000F /* -1F */ { align1 1Q };
|
||||
cmp.nz.f0.0(8) g59<1>DF g2.1<0,1,0>DF g59<4,4,1>DF { align1 1Q };
|
||||
cmp.nz.f0.0(8) g49<1>F g47<8,8,1>F g14.1<0,1,0>F { align1 1Q };
|
||||
cmp.nz.f0.0(8) null<1>D g7<8,8,1>D 0D { align1 1Q };
|
||||
cmp.z.f0.0(8) g5<1>D g4<8,8,1>D g2.5<0,1,0>D { align1 1Q };
|
||||
cmp.z.f0.0(16) g7<1>D g5<8,8,1>D g2.5<0,1,0>D { align1 1H };
|
||||
cmp.l.f0.0(16) g28<1>F g26<8,8,1>F g24<8,8,1>F { align1 1H };
|
||||
cmp.ge.f0.0(16) g30<1>F g26<8,8,1>F g24<8,8,1>F { align1 1H };
|
||||
cmp.nz.f0.0(8) g43<1>D g42<8,8,1>D g2.1<0,1,0>D { align1 1Q };
|
||||
cmp.z.f0.0(8) g86<1>DF (abs)g6.2<0,1,0>DF g68<4,4,1>DF { align1 1Q };
|
||||
cmp.le.f0.0(8) g108<1>D g106<8,8,1>D 0D { align1 1Q };
|
||||
cmp.nz.f0.0(8) null<1>DF g6.2<0,1,0>DF g66<4,4,1>DF { align1 1Q };
|
||||
cmp.l.f0.0(8) g5<1>DF g36<4,4,1>DF g53<4,4,1>DF { align1 1Q };
|
||||
cmp.ge.f0.0(8) g18<1>DF g36<4,4,1>DF g53<4,4,1>DF { align1 1Q };
|
||||
cmp.z.f0.0(8) g34<1>DF (abs)g106<4,4,1>DF g52<4,4,1>DF { align1 2Q };
|
||||
cmp.le.f0.0(16) g35<1>D g21<8,8,1>D 0D { align1 1H };
|
||||
cmp.nz.f0.0(8) null<1>DF g106<4,4,1>DF g50<4,4,1>DF { align1 2Q };
|
||||
cmp.nz.f0.0(8) g113<1>DF g3.1<0,1,0>DF g59<4,4,1>DF { align1 2Q };
|
||||
cmp.l.f0.0(8) null<1>UD g12<8,8,1>UD 0x00000004UD { align1 1Q };
|
||||
cmp.l.f0.0(8) g53<1>F g52<8,8,1>F g51<8,8,1>F { align1 1Q };
|
||||
cmp.ge.f0.0(8) g55<1>F g52<8,8,1>F g51<8,8,1>F { align1 1Q };
|
||||
cmp.ge.f0.0(8) g15<1>D (abs)g12<8,8,1>D 1D { align1 1Q };
|
||||
cmp.l.f0.0(8) null<1>D g6<0,1,0>D 2D { align1 1Q };
|
||||
(+f0.1) cmp.z.f0.1(8) null<1>D g8<8,8,1>D 0D { align1 1Q };
|
||||
cmp.nz.f0.0(16) g11<1>D g9<8,8,1>D 3D { align1 1H };
|
||||
(+f0.1) cmp.z.f0.1(16) null<1>D g11<8,8,1>D 0D { align1 1H };
|
||||
cmp.z.f0.0(8) null<1>D g22<8,8,1>D 1D { align1 1Q };
|
||||
cmp.z.f0.0(16) null<1>D g47<8,8,1>D 1D { align1 1H };
|
||||
cmp.ge.f0.0(8) g30<1>UD g29<8,8,1>UD g5.7<0,1,0>UD { align1 1Q };
|
||||
cmp.l.f0.0(8) g31<1>UD g29<8,8,1>UD g5.3<0,1,0>UD { align1 1Q };
|
||||
cmp.ge.f0.0(16) g50<1>UD g48<8,8,1>UD g7.7<0,1,0>UD { align1 1H };
|
||||
cmp.l.f0.0(16) g52<1>UD g48<8,8,1>UD g7.3<0,1,0>UD { align1 1H };
|
||||
cmp.nz.f0.0(16) g9<1>F g2.5<0,1,0>F g1.1<0,1,0>F { align1 1H };
|
||||
cmp.ge.f0.0(8) null<1>D g38<8,8,1>D 32D { align1 1Q };
|
||||
cmp.ge.f0.0(8) null<1>DF g21<4,4,1>DF g13<4,4,1>DF { align1 1Q };
|
||||
cmp.ge.f0.0(16) g3<1>D g1.1<0,1,0>D g1<0,1,0>D { align1 1H };
|
||||
cmp.l.f0.0(16) g5<1>D g1.1<0,1,0>D g1<0,1,0>D { align1 1H };
|
||||
cmp.z.f0.0(8) g25<1>F g4.3<0,1,0>F g4.1<0,1,0>F { align1 1Q };
|
||||
cmp.l.f0.0(8) g33<1>D g5<0,1,0>D 1D { align1 1Q };
|
||||
cmp.l.f0.0(8) g43<1>DF g39<4,4,1>DF g37<4,4,1>DF { align1 2Q };
|
||||
cmp.ge.f0.0(8) g46<1>DF g39<4,4,1>DF g37<4,4,1>DF { align1 2Q };
|
||||
cmp.l.f0.0(16) null<1>D g6<0,1,0>D 1D { align1 1H };
|
||||
cmp.z.f0.0(16) g62<1>F g12<8,8,1>F g6.3<0,1,0>F { align1 1H };
|
||||
cmp.nz.f0.0(8) null<1>F g2<0,1,0>F 0x0F /* 0F */ { align1 1Q };
|
||||
cmp.nz.f0.0(16) null<1>F g2<0,1,0>F 0x0F /* 0F */ { align1 1H };
|
||||
cmp.ge.f0.0(16) null<1>UD g46<8,8,1>UD 0x00000040UD { align1 1H };
|
||||
cmp.z.f0.0(16) null<1>F g14<8,8,1>F g6.1<0,1,0>F { align1 1H };
|
||||
cmp.nz.f0.0(16) null<1>D g6<0,1,0>D 0D { align1 1H };
|
||||
cmp.l.f0.0(16) null<1>UD g39<8,8,1>UD 0x00000004UD { align1 1H };
|
||||
cmp.le.f0.0(8) null<1>F g2<8,8,1>F 0x3f000000F /* 0.5F */ { align1 1Q };
|
||||
cmp.le.f0.0(16) null<1>F g2<8,8,1>F 0x3f000000F /* 0.5F */ { align1 1H };
|
||||
cmp.le.f0.0(8) g20<1>F g5.3<0,1,0>F 0x0F /* 0F */ { align1 1Q };
|
||||
cmp.ge.f0.0(8) null<1>F (abs)g26<8,8,1>F 0x5d5e0b6bF /* 1e+18F */ { align1 1Q };
|
||||
cmp.g.f0.0(8) g80<1>F (abs)g44<8,8,1>F 0x3f800000F /* 1F */ { align1 1Q };
|
||||
cmp.ge.f0.0(16) null<1>D g67<8,8,1>D 32D { align1 1H };
|
||||
cmp.g.f0.0(8) null<1>F g124<8,8,1>F 0x0F /* 0F */ { align1 1Q };
|
||||
cmp.z.f0.0(8) g4<1>F g13<8,4,2>F g2.5<0,1,0>F { align1 2Q };
|
||||
cmp.g.f0.0(16) null<1>F g120<8,8,1>F 0x0F /* 0F */ { align1 1H };
|
||||
cmp.g.f0.0(16) g2<1>F (abs)g17<8,8,1>F 0x3f800000F /* 1F */ { align1 1H };
|
||||
cmp.l.f0.0(8) null<1>DF (abs)g5<0,1,0>DF g20<4,4,1>DF { align1 1Q };
|
||||
cmp.nz.f0.0(8) g29<1>D g22.1<8,4,2>D g3.2<0,1,0>D { align1 2Q };
|
||||
cmp.l.f0.0(8) null<1>DF g11<4,4,1>DF g8<4,4,1>DF { align1 2Q };
|
||||
cmp.nz.f0.0(8) g73<1>F g6.1<0,1,0>F g14<8,4,2>F { align1 2Q };
|
||||
cmp.g.f0.0(8) g7<1>D g2<0,1,0>D 0D { align1 1Q };
|
||||
cmp.l.f0.0(8) null<1>F g4.4<0,1,0>F 0x0F /* 0F */ { align1 1Q };
|
||||
cmp.l.f0.0(16) null<1>F g6.4<0,1,0>F 0x0F /* 0F */ { align1 1H };
|
||||
cmp.le.f0.0(8) null<1>D g2<8,8,1>D 50D { align1 1Q };
|
||||
cmp.le.f0.0(16) null<1>D g2<8,8,1>D 50D { align1 1H };
|
||||
cmp.ge.f0.0(16) null<1>F g35<8,8,1>F 0x3f000000F /* 0.5F */ { align1 1H };
|
||||
cmp.le.f0.0(8) g4<1>UD g2<0,1,0>UD 0x00000001UD { align1 1Q };
|
||||
cmp.g.f0.0(8) g5<1>UD g2<0,1,0>UD 0x00000001UD { align1 1Q };
|
||||
cmp.le.f0.0(16) g5<1>UD g2<0,1,0>UD 0x00000001UD { align1 1H };
|
||||
cmp.g.f0.0(16) g7<1>UD g2<0,1,0>UD 0x00000001UD { align1 1H };
|
||||
cmp.le.f0.0(16) g121<1>F g27<8,8,1>F 0x461c3f9aF /* 9999.9F */ { align1 1H };
|
||||
cmp.z.f0.0(8) g5<1>D g14<8,4,2>D g3.1<0,1,0>D { align1 2Q };
|
||||
cmp.g.f0.0(8) null<1>D g5.2<0,1,0>D 31D { align1 1Q };
|
||||
cmp.g.f0.0(8) null<1>UD g4.2<0,1,0>UD 0x0000001fUD { align1 1Q };
|
||||
(+f0.1) cmp.nz.f0.1(8) null<1>UW g0<8,8,1>UW g0<8,8,1>UW { align1 1Q };
|
||||
(+f0.1) cmp.nz.f0.1(16) null<1>UW g0<8,8,1>UW g0<8,8,1>UW { align1 1H };
|
||||
cmp.z.f0.0(16) null<1>D g1<8,8,1>D 1024D { align1 2H };
|
||||
cmp.l.f0.0(16) null<1>D g118<8,8,1>D 32D { align1 2H };
|
||||
cmp.nz.f0.0(8) null<1>UD g3<8,8,1>UD 0x00000000UD { align1 1Q };
|
||||
cmp.nz.f0.0(16) null<1>UD g3<8,8,1>UD 0x00000000UD { align1 1H };
|
||||
cmp.g.f0.0(16) null<1>D g2.1<0,1,0>D 0D { align1 1H };
|
||||
cmp.nz.f0.0(8) null<1>Q g6<4,4,1>Q g3<4,4,1>Q { align1 1Q };
|
||||
cmp.z.f0.0(8) g8<1>Q g5<4,4,1>Q g3<4,4,1>Q { align1 1Q };
|
||||
cmp.nz.f0.0(8) g2<1>Q g5<4,4,1>Q g3<4,4,1>Q { align1 1Q };
|
||||
cmp.nz.f0.0(8) null<1>Q g9<4,4,1>Q g4<4,4,1>Q { align1 2Q };
|
||||
cmp.z.f0.0(8) g17<1>Q g11<4,4,1>Q g4<4,4,1>Q { align1 2Q };
|
||||
cmp.nz.f0.0(8) g20<1>Q g11<4,4,1>Q g4<4,4,1>Q { align1 2Q };
|
||||
cmp.z.f0.0(8) null<1>UD g5<8,8,1>UD 0x00000000UD { align1 1Q };
|
||||
cmp.z.f0.0(16) null<1>UD g15<8,8,1>UD 0x00000000UD { align1 1H };
|
||||
cmp.g.f0.0(16) g1<1>D g8<8,8,1>D 0D { align1 1H };
|
||||
cmp.ge.f0.0(8) null<1>UD g10<8,8,1>UD g8<8,8,1>UD { align1 1Q };
|
||||
cmp.ge.f0.0(8) null<1>DF g37<4,4,1>DF g26<4,4,1>DF { align1 2Q };
|
||||
cmp.l.f0.0(8) null<1>Q g20<4,4,1>Q g25<4,4,1>Q { align1 1Q };
|
||||
cmp.l.f0.0(8) null<1>Q g2<4,4,1>Q g12<4,4,1>Q { align1 2Q };
|
||||
cmp.ge.f0.0(8) null<1>Q g20<4,4,1>Q g27<4,4,1>Q { align1 1Q };
|
||||
cmp.ge.f0.0(8) null<1>Q g2<4,4,1>Q g8<4,4,1>Q { align1 2Q };
|
||||
cmp.le.f0.0(8) null<1>UD g18<8,8,1>UD 0x000000ffUD { align1 1Q };
|
||||
cmp.le.f0.0(16) null<1>UD g32<8,8,1>UD 0x000000ffUD { align1 1H };
|
||||
cmp.z.f0.0(8) null<1>Q g12<4,4,1>Q g7<4,4,1>Q { align1 1Q };
|
||||
cmp.z.f0.0(8) null<1>Q g26<4,4,1>Q g12<4,4,1>Q { align1 2Q };
|
||||
cmp.g.f0.0(16) null<1>UD g4.2<0,1,0>UD 0x0000001fUD { align1 1H };
|
||||
|
|
@ -1,104 +0,0 @@
|
|||
10 00 60 01 e0 3a 00 20 80 02 8d 3e 00 00 80 bf
|
||||
10 00 60 02 c8 32 60 27 48 00 00 32 60 07 69 00
|
||||
10 00 60 02 e8 3a 20 26 e0 05 8d 3a c4 01 00 00
|
||||
10 00 60 02 20 0a 00 20 e0 00 8d 0e 00 00 00 00
|
||||
10 00 60 01 28 0a a0 20 80 00 8d 0a 54 00 00 00
|
||||
10 00 80 01 28 0a e0 20 a0 00 8d 0a 54 00 00 00
|
||||
10 00 80 05 e8 3a 80 23 40 03 8d 3a 00 03 8d 00
|
||||
10 00 80 04 e8 3a c0 23 40 03 8d 3a 00 03 8d 00
|
||||
10 00 60 02 28 0a 60 25 40 05 8d 0a 44 00 00 00
|
||||
10 00 60 01 c8 32 c0 2a d0 20 00 32 80 08 69 00
|
||||
10 00 60 06 28 0a 80 2d 40 0d 8d 0e 00 00 00 00
|
||||
10 00 60 02 c0 32 00 20 d0 00 00 32 40 08 69 00
|
||||
10 00 60 05 c8 32 a0 20 80 04 69 32 a0 06 69 00
|
||||
10 00 60 04 c8 32 40 22 80 04 69 32 a0 06 69 00
|
||||
10 10 60 01 c8 32 40 24 40 2d 69 32 80 06 69 00
|
||||
10 00 80 06 28 0a 60 24 a0 02 8d 0e 00 00 00 00
|
||||
10 10 60 02 c0 32 00 20 40 0d 69 32 40 06 69 00
|
||||
10 10 60 02 c8 32 20 2e 68 00 00 32 60 07 69 00
|
||||
10 00 60 05 00 02 00 20 80 01 8d 06 04 00 00 00
|
||||
10 00 60 05 e8 3a a0 26 80 06 8d 3a 60 06 8d 00
|
||||
10 00 60 04 e8 3a e0 26 80 06 8d 3a 60 06 8d 00
|
||||
10 00 60 04 28 0a e0 21 80 21 8d 0e 01 00 00 00
|
||||
10 00 60 05 20 0a 00 20 c0 00 00 0e 02 00 00 00
|
||||
10 00 61 01 21 0a 00 20 00 01 8d 0e 00 00 00 00
|
||||
10 00 80 02 28 0a 60 21 20 01 8d 0e 03 00 00 00
|
||||
10 00 81 01 21 0a 00 20 60 01 8d 0e 00 00 00 00
|
||||
10 00 60 01 20 0a 00 20 c0 02 8d 0e 01 00 00 00
|
||||
10 00 80 01 20 0a 00 20 e0 05 8d 0e 01 00 00 00
|
||||
10 00 60 04 08 02 c0 23 a0 03 8d 02 bc 00 00 00
|
||||
10 00 60 05 08 02 e0 23 a0 03 8d 02 ac 00 00 00
|
||||
10 00 80 04 08 02 40 26 00 06 8d 02 fc 00 00 00
|
||||
10 00 80 05 08 02 80 26 00 06 8d 02 ec 00 00 00
|
||||
10 00 80 02 e8 3a 20 21 54 00 00 3a 24 00 00 00
|
||||
10 00 60 04 20 0a 00 20 c0 04 8d 0e 20 00 00 00
|
||||
10 00 60 04 c0 32 00 20 a0 02 69 32 a0 01 69 00
|
||||
10 00 80 04 28 0a 60 20 24 00 00 0a 20 00 00 00
|
||||
10 00 80 05 28 0a a0 20 24 00 00 0a 20 00 00 00
|
||||
10 00 60 01 e8 3a 20 23 8c 00 00 3a 84 00 00 00
|
||||
10 00 60 05 28 0a 20 24 a0 00 00 0e 01 00 00 00
|
||||
10 10 60 05 c8 32 60 25 e0 04 69 32 a0 04 69 00
|
||||
10 10 60 04 c8 32 c0 25 e0 04 69 32 a0 04 69 00
|
||||
10 00 80 05 20 0a 00 20 c0 00 00 0e 01 00 00 00
|
||||
10 00 80 01 e8 3a c0 27 80 01 8d 3a cc 00 00 00
|
||||
10 00 60 02 e0 3a 00 20 40 00 00 3e 00 00 00 00
|
||||
10 00 80 02 e0 3a 00 20 40 00 00 3e 00 00 00 00
|
||||
10 00 80 04 00 02 00 20 c0 05 8d 06 40 00 00 00
|
||||
10 00 80 01 e0 3a 00 20 c0 01 8d 3a c4 00 00 00
|
||||
10 00 80 02 20 0a 00 20 c0 00 00 0e 00 00 00 00
|
||||
10 00 80 05 00 02 00 20 e0 04 8d 06 04 00 00 00
|
||||
10 00 60 06 e0 3a 00 20 40 00 8d 3e 00 00 00 3f
|
||||
10 00 80 06 e0 3a 00 20 40 00 8d 3e 00 00 00 3f
|
||||
10 00 60 06 e8 3a 80 22 ac 00 00 3e 00 00 00 00
|
||||
10 00 60 04 e0 3a 00 20 40 23 8d 3e 6b 0b 5e 5d
|
||||
10 00 60 03 e8 3a 00 2a 80 25 8d 3e 00 00 80 3f
|
||||
10 00 80 04 20 0a 00 20 60 08 8d 0e 20 00 00 00
|
||||
10 00 60 03 e0 3a 00 20 80 0f 8d 3e 00 00 00 00
|
||||
10 10 60 01 e8 3a 80 20 a0 01 8a 3a 54 00 00 00
|
||||
10 00 80 03 e0 3a 00 20 00 0f 8d 3e 00 00 00 00
|
||||
10 00 80 03 e8 3a 40 20 20 22 8d 3e 00 00 80 3f
|
||||
10 00 60 05 c0 32 00 20 a0 20 00 32 80 02 69 00
|
||||
10 10 60 02 28 0a a0 23 c4 02 8a 0a 68 00 00 00
|
||||
10 10 60 05 c0 32 00 20 60 01 69 32 00 01 69 00
|
||||
10 10 60 02 e8 3a 20 29 c4 00 00 3a c0 01 8a 00
|
||||
10 00 60 03 28 0a e0 20 40 00 00 0e 00 00 00 00
|
||||
10 00 60 05 e0 3a 00 20 90 00 00 3e 00 00 00 00
|
||||
10 00 80 05 e0 3a 00 20 d0 00 00 3e 00 00 00 00
|
||||
10 00 60 06 20 0a 00 20 40 00 8d 0e 32 00 00 00
|
||||
10 00 80 06 20 0a 00 20 40 00 8d 0e 32 00 00 00
|
||||
10 00 80 04 e0 3a 00 20 60 04 8d 3e 00 00 00 3f
|
||||
10 00 60 06 08 02 80 20 40 00 00 06 01 00 00 00
|
||||
10 00 60 03 08 02 a0 20 40 00 00 06 01 00 00 00
|
||||
10 00 80 06 08 02 a0 20 40 00 00 06 01 00 00 00
|
||||
10 00 80 03 08 02 e0 20 40 00 00 06 01 00 00 00
|
||||
10 00 80 06 e8 3a 20 2f 60 03 8d 3e 9a 3f 1c 46
|
||||
10 10 60 01 28 0a a0 20 c0 01 8a 0a 64 00 00 00
|
||||
10 00 60 03 20 0a 00 20 a8 00 00 0e 1f 00 00 00
|
||||
10 00 60 03 00 02 00 20 88 00 00 06 1f 00 00 00
|
||||
10 00 61 02 41 12 00 20 00 00 8d 12 00 00 8d 00
|
||||
10 00 81 02 41 12 00 20 00 00 8d 12 00 00 8d 00
|
||||
10 20 80 01 20 0a 00 20 20 00 8d 0e 00 04 00 00
|
||||
10 20 80 05 20 0a 00 20 c0 0e 8d 0e 20 00 00 00
|
||||
10 00 60 02 00 02 00 20 60 00 8d 06 00 00 00 00
|
||||
10 00 80 02 00 02 00 20 60 00 8d 06 00 00 00 00
|
||||
10 00 80 03 20 0a 00 20 44 00 00 0e 00 00 00 00
|
||||
10 00 60 02 20 4b 00 20 c0 00 69 4a 60 00 69 00
|
||||
10 00 60 01 28 4b 00 21 a0 00 69 4a 60 00 69 00
|
||||
10 00 60 02 28 4b 40 20 a0 00 69 4a 60 00 69 00
|
||||
10 10 60 02 20 4b 00 20 20 01 69 4a 80 00 69 00
|
||||
10 10 60 01 28 4b 20 22 60 01 69 4a 80 00 69 00
|
||||
10 10 60 02 28 4b 80 22 60 01 69 4a 80 00 69 00
|
||||
10 00 60 01 00 02 00 20 a0 00 8d 06 00 00 00 00
|
||||
10 00 80 01 00 02 00 20 e0 01 8d 06 00 00 00 00
|
||||
10 00 80 03 28 0a 20 20 00 01 8d 0e 00 00 00 00
|
||||
10 00 60 04 00 02 00 20 40 01 8d 02 00 01 8d 00
|
||||
10 10 60 04 c0 32 00 20 a0 04 69 32 40 03 69 00
|
||||
10 00 60 05 20 4b 00 20 80 02 69 4a 20 03 69 00
|
||||
10 10 60 05 20 4b 00 20 40 00 69 4a 80 01 69 00
|
||||
10 00 60 04 20 4b 00 20 80 02 69 4a 60 03 69 00
|
||||
10 10 60 04 20 4b 00 20 40 00 69 4a 00 01 69 00
|
||||
10 00 60 06 00 02 00 20 40 02 8d 06 ff 00 00 00
|
||||
10 00 80 06 00 02 00 20 00 04 8d 06 ff 00 00 00
|
||||
10 00 60 01 20 4b 00 20 80 01 69 4a e0 00 69 00
|
||||
10 10 60 01 20 4b 00 20 40 03 69 4a 80 01 69 00
|
||||
10 00 80 03 00 02 00 20 88 00 00 06 1f 00 00 00
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
cont(8) JIP: LABEL0 UIP: LABEL1 { align1 1Q };
|
||||
LABEL0:
|
||||
cont(16) JIP: LABEL1 UIP: LABEL1 { align1 1H };
|
||||
LABEL1:
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
29 00 60 00 00 0e 00 34 20 00 00 00 10 00 00 00
|
||||
29 00 80 00 00 0e 00 34 10 00 00 00 10 00 00 00
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
and(1) cr0<1>UD cr0<0,1,0>UD 0xfffffb3fUD { align1 1N switch };
|
||||
and(1) cr0<1>UD cr0<0,1,0>UD 0xffffff3fUD { align1 1N switch };
|
||||
and(1) cr0<1>UD cr0<0,1,0>UD 0xfffffb7fUD { align1 1N switch };
|
||||
and(1) cr0<1>UD cr0<0,1,0>UD 0xffffff7fUD { align1 1N switch };
|
||||
and(1) cr0<1>UD cr0<0,1,0>UD 0xfffffbbfUD { align1 1N switch };
|
||||
and(1) cr0<1>UD cr0<0,1,0>UD 0xffffffbfUD { align1 1N switch };
|
||||
and(1) cr0<1>UD cr0<0,1,0>UD 0xffffffcfUD { align1 1N switch };
|
||||
and(1) cr0<1>UD cr0<0,1,0>UD 0xfffffbffUD { align1 1N switch };
|
||||
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000400UD { align1 1N switch };
|
||||
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000030UD { align1 1N switch };
|
||||
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000040UD { align1 1N switch };
|
||||
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000440UD { align1 1N switch };
|
||||
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000080UD { align1 1N switch };
|
||||
or(1) cr0<1>UD cr0<0,1,0>UD 0x00000480UD { align1 1N switch };
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
05 80 00 00 00 00 00 30 00 10 00 06 3f fb ff ff
|
||||
05 80 00 00 00 00 00 30 00 10 00 06 3f ff ff ff
|
||||
05 80 00 00 00 00 00 30 00 10 00 06 7f fb ff ff
|
||||
05 80 00 00 00 00 00 30 00 10 00 06 7f ff ff ff
|
||||
05 80 00 00 00 00 00 30 00 10 00 06 bf fb ff ff
|
||||
05 80 00 00 00 00 00 30 00 10 00 06 bf ff ff ff
|
||||
05 80 00 00 00 00 00 30 00 10 00 06 cf ff ff ff
|
||||
05 80 00 00 00 00 00 30 00 10 00 06 ff fb ff ff
|
||||
06 80 00 00 00 00 00 30 00 10 00 06 00 04 00 00
|
||||
06 80 00 00 00 00 00 30 00 10 00 06 30 00 00 00
|
||||
06 80 00 00 00 00 00 30 00 10 00 06 40 00 00 00
|
||||
06 80 00 00 00 00 00 30 00 10 00 06 40 04 00 00
|
||||
06 80 00 00 00 00 00 30 00 10 00 06 80 00 00 00
|
||||
06 80 00 00 00 00 00 30 00 10 00 06 80 04 00 00
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
csel.nz(8) g15<1>F g11<4,4,1>F (abs)g11<4,4,1>F g11<4,4,1>F { align16 1Q };
|
||||
csel.nz(16) g14<1>F g8<4,4,1>F (abs)g8<4,4,1>F g8<4,4,1>F { align16 1H };
|
||||
csel.le(8) g21<1>F (abs)g5.3<0,1,0>F g5.0<0,1,0>F g5.3<0,1,0>F { align16 1Q };
|
||||
csel.l(8) g107<1>F -g101<4,4,1>F g101<4,4,1>F g104<4,4,1>F { align16 1Q };
|
||||
csel.le(8) g21<1>F g5.0<0,1,0>F (abs)g5.1<0,1,0>F g5.1<0,1,0>F { align16 1Q };
|
||||
csel.l(8) g127<1>F g2<4,4,1>F g8<4,4,1>F g4.0<0,1,0>F { align16 1Q };
|
||||
csel.l(16) g126<1>F g2<4,4,1>F g13<4,4,1>F g6.0<0,1,0>F { align16 1H };
|
||||
csel.le(16) g13<1>F (abs)g73<4,4,1>F g58<4,4,1>F g73<4,4,1>F { align16 1H };
|
||||
csel.le(16) g15<1>F g58<4,4,1>F (abs)g73<4,4,1>F g73<4,4,1>F { align16 1H };
|
||||
csel.l(16) g69<1>F -g65<4,4,1>F g65<4,4,1>F g67<4,4,1>F { align16 1H };
|
||||
csel.sat.g(8) g125<1>F g2.3<0,1,0>F g2.2<0,1,0>F g2.0<0,1,0>F { align16 1Q };
|
||||
csel.g(8) g125<1>F g2.3<0,1,0>F g2.2<0,1,0>F g2.0<0,1,0>F { align16 1Q };
|
||||
csel.g(16) g122<1>F g2.3<0,1,0>F g2.2<0,1,0>F g2.0<0,1,0>F { align16 1H };
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
12 01 60 02 80 00 1e 0f c8 b1 00 39 16 20 c7 02
|
||||
12 01 80 02 80 00 1e 0e c8 81 00 39 10 20 07 02
|
||||
12 01 60 06 20 00 1e 15 01 56 20 00 0a 04 58 01
|
||||
12 01 60 05 40 00 1e 6b c8 51 06 39 ca 20 07 1a
|
||||
12 01 60 06 80 00 1e 15 01 50 20 40 0a 04 48 01
|
||||
12 01 60 05 00 00 1e 7f c8 21 00 39 10 04 00 01
|
||||
12 01 80 05 00 00 1e 7e c8 21 00 39 1a 04 80 01
|
||||
12 01 80 06 20 00 1e 0d c8 91 04 39 74 20 47 12
|
||||
12 01 80 06 80 00 1e 0f c8 a1 03 39 92 20 47 12
|
||||
12 01 80 05 40 00 1e 45 c8 11 04 39 82 20 c7 10
|
||||
12 01 60 83 00 00 1e 7d 01 26 20 80 04 04 80 00
|
||||
12 01 60 03 00 00 1e 7d 01 26 20 80 04 04 80 00
|
||||
12 01 80 03 00 00 1e 7a 01 26 20 80 04 04 80 00
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
else(8) JIP: LABEL0 UIP: LABEL0 { align1 1Q };
|
||||
else(16) JIP: LABEL0 UIP: LABEL0 { align1 1H };
|
||||
else(32) JIP: LABEL0 UIP: LABEL0 { align1 };
|
||||
LABEL0:
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
24 00 60 00 20 0e 00 20 30 00 00 00 30 00 00 00
|
||||
24 00 80 00 20 0e 00 20 20 00 00 00 20 00 00 00
|
||||
24 00 a0 00 20 0e 00 20 10 00 00 00 10 00 00 00
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
endif(8) JIP: LABEL0 { align1 1Q };
|
||||
endif(16) JIP: LABEL0 { align1 1H };
|
||||
endif(32) JIP: LABEL0 { align1 };
|
||||
LABEL0:
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
25 00 60 00 00 0e 00 00 00 00 00 08 30 00 00 00
|
||||
25 00 80 00 00 0e 00 00 00 00 00 08 20 00 00 00
|
||||
25 00 a0 00 00 0e 00 00 00 00 00 08 10 00 00 00
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
fbh(8) g15<1>D g35<8,8,1>D { align1 1Q };
|
||||
fbh(16) g8<1>D g4<8,8,1>D { align1 1H };
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
4b 00 60 00 28 0a e0 21 60 04 8d 00 00 00 00 00
|
||||
4b 00 80 00 28 0a 00 21 80 00 8d 00 00 00 00 00
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
fbl(8) g5<1>UD g5<8,8,1>UD { align1 1Q };
|
||||
fbl(16) g6<1>UD g8<8,8,1>UD { align1 1H };
|
||||
fbl(1) g43<1>UD mask0<0,1,0>UD { align1 WE_all 1N };
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
4c 00 60 00 08 02 a0 20 a0 00 8d 00 00 00 00 00
|
||||
4c 00 80 00 08 02 c0 20 00 01 8d 00 00 00 00 00
|
||||
4c 00 00 00 0c 00 60 25 00 08 00 00 00 00 00 00
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
frc(8) g28<1>F g4<8,8,1>F { align1 1Q };
|
||||
frc(16) g3<1>F g1<0,1,0>F { align1 1H };
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
43 00 60 00 e8 3a 80 23 80 00 8d 00 00 00 00 00
|
||||
43 00 80 00 e8 3a 60 20 20 00 00 00 00 00 00 00
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
(-f0.1.any4h) halt(8) JIP: LABEL0 UIP: LABEL0 { align1 1Q };
|
||||
halt(8) JIP: LABEL1 UIP: LABEL1 { align1 1Q };
|
||||
LABEL1:
|
||||
(-f0.1.any4h) halt(16) JIP: LABEL0 UIP: LABEL0 { align1 1H };
|
||||
halt(16) JIP: LABEL0 UIP: LABEL0 { align1 1H };
|
||||
LABEL0:
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
2a 00 76 00 21 0e 00 20 40 00 00 00 40 00 00 00
|
||||
2a 00 60 00 20 0e 00 20 10 00 00 00 10 00 00 00
|
||||
2a 00 96 00 21 0e 00 20 20 00 00 00 20 00 00 00
|
||||
2a 00 80 00 20 0e 00 20 10 00 00 00 10 00 00 00
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
(+f0.0) if(8) JIP: LABEL0 UIP: LABEL1 { align1 1Q };
|
||||
(-f0.0) if(8) JIP: LABEL0 UIP: LABEL1 { align1 1Q };
|
||||
LABEL0:
|
||||
(-f0.0) if(16) JIP: LABEL1 UIP: LABEL1 { align1 1H };
|
||||
(+f0.0) if(16) JIP: LABEL1 UIP: LABEL1 { align1 1H };
|
||||
(+f0.0) if(32) JIP: LABEL1 UIP: LABEL1 { align1 };
|
||||
LABEL1:
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
22 00 61 00 20 0e 00 20 50 00 00 00 20 00 00 00
|
||||
22 00 71 00 20 0e 00 20 40 00 00 00 10 00 00 00
|
||||
22 00 91 00 20 0e 00 20 30 00 00 00 30 00 00 00
|
||||
22 00 81 00 20 0e 00 20 20 00 00 00 20 00 00 00
|
||||
22 00 a1 00 20 0e 00 20 10 00 00 00 10 00 00 00
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
lrp(8) g4<1>F g16<4,4,1>F g7.2<0,1,0>F g6.6<0,1,0>F { align16 1Q };
|
||||
lrp(16) g4<1>F g2.4<0,1,0>F g2.2<0,1,0>F g2.0<0,1,0>F { align16 1H };
|
||||
lrp.z.f0.0(8) g8<1>F g3.2<0,1,0>F g3.1<0,1,0>F g3.0<0,1,0>F { align16 1Q };
|
||||
lrp.sat(8) g7<1>F g10<4,4,1>F g13<4,4,1>F g16<4,4,1>F { align16 1Q };
|
||||
lrp.sat(16) g18<1>F g20<4,4,1>F g26<4,4,1>F g32<4,4,1>F { align16 1H };
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
5c 01 60 00 00 00 1e 04 c8 01 21 80 0e 04 b0 01
|
||||
5c 01 80 00 00 00 1e 04 01 28 20 80 04 04 80 00
|
||||
5c 01 60 01 00 00 1e 08 01 34 20 40 06 04 c0 00
|
||||
5c 01 60 80 00 00 1e 07 c8 a1 00 39 1a 20 07 04
|
||||
5c 01 80 80 00 00 1e 12 c8 41 01 39 34 20 07 08
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
lzd(8) g25<1>UD g3.1<0,1,0>UD { align1 1Q };
|
||||
lzd(16) g27<1>UD g3.1<0,1,0>UD { align1 1H };
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
4a 00 60 00 08 02 20 23 64 00 00 00 00 00 00 00
|
||||
4a 00 80 00 08 02 60 23 64 00 00 00 00 00 00 00
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
mach(8) g19<1>UD g17<8,8,1>UD 0xaaaaaaabUD { align1 1Q AccWrEnable };
|
||||
mach(8) g23<1>D g17<8,8,1>D 1431655766D { align1 1Q AccWrEnable };
|
||||
mach(8) g42<1>UD g39<8,8,1>UD 0xaaaaaaabUD { align1 2Q AccWrEnable };
|
||||
mach(8) g50<1>D g39<8,8,1>D 1431655766D { align1 2Q AccWrEnable };
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
49 00 60 10 08 02 60 22 20 02 8d 06 ab aa aa aa
|
||||
49 00 60 10 28 0a e0 22 20 02 8d 0e 56 55 55 55
|
||||
49 10 60 10 08 02 40 25 e0 04 8d 06 ab aa aa aa
|
||||
49 10 60 10 28 0a 40 26 e0 04 8d 0e 56 55 55 55
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
mad(8) g26<1>F g22<4,4,1>F g2.4<0,1,0>F g5<4,4,1>F { align16 1Q };
|
||||
mad(16) g14<1>F g12<4,4,1>F g4<4,4,1>F g4<4,4,1>F { align16 1H };
|
||||
mad(8) g64<1>DF g62<4,4,1>DF g40<4,4,1>DF g92<4,4,1>DF { align16 1Q };
|
||||
mad(8) g80<1>DF -g50<4,4,1>DF g24<4,4,1>DF g80<4,4,1>DF { align16 1Q };
|
||||
mad(8) g27<1>DF g48<4,4,1>DF g106<4,4,1>DF g25<4,4,1>DF { align16 2Q };
|
||||
mad(8) g13<1>F -g14.0<0,1,0>F g11<4,4,1>F g6<4,4,1>F { align16 1Q };
|
||||
mad(16) g29<1>F -g33.0<0,1,0>F g25<4,4,1>F g15<4,4,1>F { align16 1H };
|
||||
mad(8) g29<1>DF g23<4,4,1>DF g27<4,4,1>DF -g25<4,4,1>DF { align16 1Q };
|
||||
mad.le.f0.0(8) g5<1>F g3<4,4,1>F g4.2<0,1,0>F g64<4,4,1>F { align16 1Q };
|
||||
mad.le.f0.0(16) g7<1>F g4<4,4,1>F g6.2<0,1,0>F g16<4,4,1>F { align16 1H };
|
||||
mad(8) g32<1>F g31<4,4,1>F g2.3<0,1,0>F -g15<4,4,1>F { align16 1Q };
|
||||
mad(16) g56<1>F g54<4,4,1>F g2.3<0,1,0>F -g5<4,4,1>F { align16 1H };
|
||||
mad.sat(8) g12<1>F g4.1<0,1,0>F g4.0<0,1,0>F g8<4,4,1>F { align16 1Q };
|
||||
mad.sat(16) g18<1>F g6.1<0,1,0>F g6.0<0,1,0>F g10<4,4,1>F { align16 1H };
|
||||
mad(8) g86<1>F g88.6<0,1,0>F -g88.7<0,1,0>F g77<4,4,1>F { align16 1Q };
|
||||
mad(8) g85<1>DF g28<4,4,1>DF g83<4,4,1>DF -g81<4,4,1>DF { align16 2Q };
|
||||
mad(8) g11<1>F -g2.0<0,1,0>F g10<4,4,1>F (abs)g5.6<0,1,0>F { align16 1Q };
|
||||
mad(8) g15<1>F g2.1<0,1,0>F g11<4,4,1>F (abs)g5.6<0,1,0>F { align16 1Q };
|
||||
mad.l.f0.0(8) g2<1>F g22<4,4,1>F g5.7<0,1,0>F g6.3<0,1,0>F { align16 1Q };
|
||||
mad(8) g79<1>DF -g39<4,4,1>DF g21<4,4,1>DF g79<4,4,1>DF { align16 2Q };
|
||||
mad(8) g117<1>F -g116<4,4,1>F g9.0<0,1,0>F -g113<4,4,1>F { align16 1Q };
|
||||
mad.ge.f0.0(8) g13<1>F g28.0<0,1,0>F g9<4,4,1>F -g2.4<0,1,0>F { align16 1Q };
|
||||
mad.ge.f0.0(16) g23<1>F g17.0<0,1,0>F g6<4,4,1>F -g3.0<0,1,0>F { align16 1H };
|
||||
mad(8) g26<1>F g2.0<0,1,0>F -g2.1<0,1,0>F (abs)g5.6<0,1,0>F { align16 1Q };
|
||||
mad(8) g70<1>F -g13<4,4,1>F -g2.1<0,1,0>F -g47<4,4,1>F { align16 1Q };
|
||||
mad(16) g95<1>F -g93<4,4,1>F g85<4,4,1>F -g85<4,4,1>F { align16 1H };
|
||||
mad(16) g5<1>F -g21<4,4,1>F -g2.1<0,1,0>F -g85<4,4,1>F { align16 1H };
|
||||
mad(16) g56<1>F g6.4<0,1,0>F -g6.5<0,1,0>F g51<4,4,1>F { align16 1H };
|
||||
mad.sat(8) g124<1>F -g7<4,4,1>F g2.6<0,1,0>F g2.1<0,1,0>F { align16 1Q };
|
||||
mad(16) g71<1>F g55.0<0,1,0>F -g55.1<0,1,0>F (abs)g1.0<0,1,0>F { align16 1H };
|
||||
mad(16) g77<1>F -g55.2<0,1,0>F g71<4,4,1>F (abs)g1.0<0,1,0>F { align16 1H };
|
||||
mad(16) g37<1>F g55.3<0,1,0>F g77<4,4,1>F (abs)g1.0<0,1,0>F { align16 1H };
|
||||
mad(8) g43<1>DF g42<4,4,1>DF -g34<4,4,1>DF g7<4,4,1>DF { align16 1Q };
|
||||
mad(8) g3<1>DF g2<4,4,1>DF -g111<4,4,1>DF g39<4,4,1>DF { align16 2Q };
|
||||
mad(8) g12<1>F -g17<4,4,1>F (abs)g7<4,4,1>F g4.0<0,1,0>F { align16 1Q };
|
||||
mad(16) g27<1>F -g22<4,4,1>F (abs)g19<4,4,1>F g29.0<0,1,0>F { align16 1H };
|
||||
mad.sat(8) g125<1>F g9<4,4,1>F g6<4,4,1>F -g64.0<0,1,0>F { align16 1Q };
|
||||
mad.l.f0.0(16) g5<1>F g9<4,4,1>F g2.7<0,1,0>F g3.3<0,1,0>F { align16 1H };
|
||||
mad(8) g6<1>DF -g55<4,4,1>DF g2<4,4,1>DF -g47<4,4,1>DF { align16 1Q };
|
||||
mad.z.f0.0(8) g8<1>F g3.2<0,1,0>F g3.1<0,1,0>F g3.0<0,1,0>F { align16 1Q };
|
||||
mad(8) g63<1>DF -g48<4,4,1>DF g56<4,4,1>DF -g44<4,4,1>DF { align16 2Q };
|
||||
mad.nz.f0.0(8) g10<1>F -g12.0<0,1,0>F g7<4,4,1>F g10<4,4,1>F { align16 1Q };
|
||||
mad.nz.f0.0(16) g15<1>F -g33.0<0,1,0>F g9<4,4,1>F g17<4,4,1>F { align16 1H };
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
5b 01 60 00 00 00 1e 1a c8 61 21 00 05 20 47 01
|
||||
5b 01 80 00 00 00 1e 0e c8 c1 00 39 08 20 07 01
|
||||
5b 01 60 00 00 d8 1e 40 c8 e1 03 39 50 20 07 17
|
||||
5b 01 60 00 40 d8 1e 50 c8 21 03 39 30 20 07 14
|
||||
5b 11 60 00 00 d8 1e 1b c8 01 03 39 d4 20 47 06
|
||||
5b 01 60 00 40 00 1e 0d 01 e0 00 39 16 20 87 01
|
||||
5b 01 80 00 40 00 1e 1d 01 10 02 39 32 20 c7 03
|
||||
5b 01 60 00 00 dc 1e 1d c8 71 01 39 36 20 47 06
|
||||
5b 01 60 06 00 00 1e 05 c8 31 20 80 08 20 07 10
|
||||
5b 01 80 06 00 00 1e 07 c8 41 20 80 0c 20 07 04
|
||||
5b 01 60 00 00 04 1e 20 c8 f1 21 c0 04 20 c7 03
|
||||
5b 01 80 00 00 04 1e 38 c8 61 23 c0 04 20 47 01
|
||||
5b 01 60 80 00 00 1e 0c 01 42 20 00 08 20 07 02
|
||||
5b 01 80 80 00 00 1e 12 01 62 20 00 0c 20 87 02
|
||||
5b 01 60 00 00 01 1e 56 01 8c 25 c0 b1 20 47 13
|
||||
5b 11 60 00 00 dc 1e 55 c8 c1 01 39 a6 20 47 14
|
||||
5b 01 60 00 40 02 1e 0b 01 20 00 39 14 04 70 01
|
||||
5b 01 60 00 00 02 1e 0f 01 22 00 39 16 04 70 01
|
||||
5b 01 60 05 00 00 1e 02 c8 61 21 c0 0b 04 98 01
|
||||
5b 11 60 00 40 d8 1e 4f c8 71 02 39 2a 20 c7 13
|
||||
5b 01 60 00 40 04 1e 75 c8 41 27 00 12 20 47 1c
|
||||
5b 01 60 04 00 04 1e 0d 01 c0 01 39 12 04 a0 00
|
||||
5b 01 80 04 00 04 1e 17 01 10 01 39 0c 04 c0 00
|
||||
5b 01 60 00 00 03 1e 1a 01 20 20 40 04 04 70 01
|
||||
5b 01 60 00 40 05 1e 46 c8 d1 20 40 04 20 c7 0b
|
||||
5b 01 80 00 40 04 1e 5f c8 d1 05 39 aa 20 47 15
|
||||
5b 01 80 00 40 05 1e 05 c8 51 21 40 04 20 47 15
|
||||
5b 01 80 00 00 01 1e 38 01 68 20 40 0d 20 c7 0c
|
||||
5b 01 60 80 40 00 1e 7c c8 71 20 80 05 04 88 00
|
||||
5b 01 80 00 00 03 1e 47 01 70 23 40 6e 04 40 00
|
||||
5b 01 80 00 40 02 1e 4d 01 74 03 39 8e 04 40 00
|
||||
5b 01 80 00 00 02 1e 25 01 76 03 39 9a 04 40 00
|
||||
5b 01 60 00 00 d9 1e 2b c8 a1 02 39 44 20 c7 01
|
||||
5b 11 60 00 00 d9 1e 03 c8 21 00 39 de 20 c7 09
|
||||
5b 01 60 00 c0 00 1e 0c c8 11 01 39 0e 04 00 01
|
||||
5b 01 80 00 c0 00 1e 1b c8 61 01 39 26 04 40 07
|
||||
5b 01 60 80 00 04 1e 7d c8 91 00 39 0c 04 00 10
|
||||
5b 01 80 05 00 00 1e 05 c8 91 20 c0 05 04 d8 00
|
||||
5b 01 60 00 40 dc 1e 06 c8 71 03 39 04 20 c7 0b
|
||||
5b 01 60 01 00 00 1e 08 01 34 20 40 06 04 c0 00
|
||||
5b 11 60 00 40 dc 1e 3f c8 01 03 39 70 20 07 0b
|
||||
5b 01 60 02 40 00 1e 0a 01 c0 00 39 0e 20 87 02
|
||||
5b 01 80 02 40 00 1e 0f 01 10 02 39 12 20 47 04
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
math sqrt(16) g20<1>F g18<8,8,1>F null<8,8,1>F { align1 1H };
|
||||
math inv(8) g95<1>F g94<8,8,1>F null<8,8,1>F { align1 1Q };
|
||||
math inv(16) g10<1>F g8<8,8,1>F null<8,8,1>F { align1 1H };
|
||||
math intmod(8) g3<1>UD g1<0,1,0>UD g1.2<0,1,0>UD { align1 1Q };
|
||||
math intmod(8) g4<1>UD g1<0,1,0>UD g1.2<0,1,0>UD { align1 2Q };
|
||||
math sqrt(8) g24<1>F g23<8,8,1>F null<8,8,1>F { align1 1Q };
|
||||
math rsq(8) g5<1>F g2<8,8,1>F null<8,8,1>F { align1 1Q };
|
||||
math pow(8) g11<1>F g10<8,8,1>F 0x42fc6666F /* 126.2F */ { align1 1Q };
|
||||
math pow(16) g18<1>F g16<8,8,1>F 0x42fc6666F /* 126.2F */ { align1 1H };
|
||||
math log(8) g7<1>F g6<8,8,1>F null<8,8,1>F { align1 1Q };
|
||||
math log(16) g11<1>F g9<8,8,1>F null<8,8,1>F { align1 1H };
|
||||
math cos(8) g3<1>F g2<8,8,1>F null<8,8,1>F { align1 1Q };
|
||||
math cos(16) g4<1>F g2<8,8,1>F null<8,8,1>F { align1 1H };
|
||||
math intdiv(8) g4<1>UD g1<0,1,0>UD g1.4<0,1,0>UD { align1 1Q };
|
||||
math intdiv(8) g5<1>UD g1<0,1,0>UD g1.4<0,1,0>UD { align1 2Q };
|
||||
math intdiv(8) g24<1>D g4<0,1,0>D g2.2<0,1,0>D { align1 1Q };
|
||||
math sin(8) g10<1>F g9<8,8,1>F null<8,8,1>F { align1 1Q };
|
||||
math rsq(16) g68<1>F g66<8,8,1>F null<8,8,1>F { align1 1H };
|
||||
math exp(8) g124<1>F g10<8,8,1>F null<8,8,1>F { align1 1Q };
|
||||
math exp(16) g120<1>F g7<8,8,1>F null<8,8,1>F { align1 1H };
|
||||
math intdiv(8) g5<1>D g2<0,1,0>D g2.4<0,1,0>D { align1 2Q };
|
||||
math sin(16) g3<1>F g2<0,1,0>F null<8,8,1>F { align1 1H };
|
||||
math.sat pow(8) g3<1>F g2<0,1,0>F g2.4<0,1,0>F { align1 1Q };
|
||||
math.sat pow(16) g3<1>F g2<0,1,0>F g2.4<0,1,0>F { align1 1H };
|
||||
math.sat sqrt(8) g3<1>F g2<0,1,0>F null<8,8,1>F { align1 1Q };
|
||||
math.sat sqrt(16) g3<1>F g2<0,1,0>F null<8,8,1>F { align1 1H };
|
||||
math.sat exp(8) g3<1>F g2<0,1,0>F null<8,8,1>F { align1 1Q };
|
||||
math.sat exp(16) g3<1>F g2<0,1,0>F null<8,8,1>F { align1 1H };
|
||||
math.sat rsq(8) g127<1>F (abs)g7<8,8,1>F null<8,8,1>F { align1 1Q };
|
||||
math.sat inv(8) g124<1>F g2<0,1,0>F null<8,8,1>F { align1 1Q };
|
||||
math.sat log(8) g127<1>F g7<8,8,1>F null<8,8,1>F { align1 1Q };
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
38 00 80 04 e8 3a 80 22 40 02 8d 38 00 00 8d 00
|
||||
38 00 60 01 e8 3a e0 2b c0 0b 8d 38 00 00 8d 00
|
||||
38 00 80 01 e8 3a 40 21 00 01 8d 38 00 00 8d 00
|
||||
38 00 60 0d 08 02 60 20 20 00 00 02 28 00 00 00
|
||||
38 10 60 0d 08 02 80 20 20 00 00 02 28 00 00 00
|
||||
38 00 60 04 e8 3a 00 23 e0 02 8d 38 00 00 8d 00
|
||||
38 00 60 05 e8 3a a0 20 40 00 8d 38 00 00 8d 00
|
||||
38 00 60 0a e8 3a 60 21 40 01 8d 3e 66 66 fc 42
|
||||
38 00 80 0a e8 3a 40 22 00 02 8d 3e 66 66 fc 42
|
||||
38 00 60 02 e8 3a e0 20 c0 00 8d 38 00 00 8d 00
|
||||
38 00 80 02 e8 3a 60 21 20 01 8d 38 00 00 8d 00
|
||||
38 00 60 07 e8 3a 60 20 40 00 8d 38 00 00 8d 00
|
||||
38 00 80 07 e8 3a 80 20 40 00 8d 38 00 00 8d 00
|
||||
38 00 60 0c 08 02 80 20 20 00 00 02 30 00 00 00
|
||||
38 10 60 0c 08 02 a0 20 20 00 00 02 30 00 00 00
|
||||
38 00 60 0c 28 0a 00 23 80 00 00 0a 48 00 00 00
|
||||
38 00 60 06 e8 3a 40 21 20 01 8d 38 00 00 8d 00
|
||||
38 00 80 05 e8 3a 80 28 40 08 8d 38 00 00 8d 00
|
||||
38 00 60 03 e8 3a 80 2f 40 01 8d 38 00 00 8d 00
|
||||
38 00 80 03 e8 3a 00 2f e0 00 8d 38 00 00 8d 00
|
||||
38 10 60 0c 28 0a a0 20 40 00 00 0a 50 00 00 00
|
||||
38 00 80 06 e8 3a 60 20 40 00 00 38 00 00 8d 00
|
||||
38 00 60 8a e8 3a 60 20 40 00 00 3a 50 00 00 00
|
||||
38 00 80 8a e8 3a 60 20 40 00 00 3a 50 00 00 00
|
||||
38 00 60 84 e8 3a 60 20 40 00 00 38 00 00 8d 00
|
||||
38 00 80 84 e8 3a 60 20 40 00 00 38 00 00 8d 00
|
||||
38 00 60 83 e8 3a 60 20 40 00 00 38 00 00 8d 00
|
||||
38 00 80 83 e8 3a 60 20 40 00 00 38 00 00 8d 00
|
||||
38 00 60 85 e8 3a e0 2f e0 20 8d 38 00 00 8d 00
|
||||
38 00 60 81 e8 3a 80 2f 40 00 00 38 00 00 8d 00
|
||||
38 00 60 82 e8 3a e0 2f e0 00 8d 38 00 00 8d 00
|
||||
|
|
@ -1,139 +0,0 @@
|
|||
mov(8) g123<1>UD g1<8,8,1>UD { align1 WE_all 1Q };
|
||||
mov(8) g124<1>F 0x40c00000F /* 6F */ { align1 1Q };
|
||||
mov(8) g14<1>UD 0x00000000UD { align1 1Q };
|
||||
mov(8) g17<1>F g12<8,8,1>F { align1 1Q };
|
||||
mov.sat(8) g124<1>F g8<8,8,1>F { align1 1Q };
|
||||
mov(8) g61<2>D g22<8,8,1>D { align1 1Q };
|
||||
mov(8) g21<1>D g59<8,4,2>UD { align1 1Q };
|
||||
mov(8) g4<1>D -1D { align1 1Q };
|
||||
mov.nz.f0.0(8) null<1>D g4<8,8,1>D { align1 1Q };
|
||||
mov(1) g2.2<1>UD 0x00000000UD { align1 WE_all 1N };
|
||||
mov(4) g114<1>F g2.3<8,2,4>F { align1 WE_all 1N };
|
||||
mov(8) g126<1>F g4<8,8,1>D { align1 1Q };
|
||||
mov(16) g124<1>F g4<8,8,1>D { align1 1H };
|
||||
mov(16) g120<1>F g124<8,8,1>F { align1 1H };
|
||||
mov(16) g124<1>F 0x0F /* 0F */ { align1 1H };
|
||||
mov(16) g124<1>D 1065353216D { align1 1H };
|
||||
mov.nz.f0.0(16) null<1>D g2<0,1,0>D { align1 1H };
|
||||
mov(8) g3<1>UW 0x76543210V { align1 WE_all 1Q };
|
||||
mov(16) g20<1>UD g0.1<0,1,0>UD { align1 1H };
|
||||
mov(16) g6<1>D g3<8,8,1>UW { align1 1H };
|
||||
mov(8) g1<1>D g4<8,8,1>D { align1 2Q };
|
||||
mov(8) g5<1>D 0D { align1 2Q };
|
||||
mov(8) g2<1>F g6<8,4,1>UW { align1 1Q };
|
||||
mov(8) g7<1>D g2<8,8,1>F { align1 1Q };
|
||||
mov(16) g2<1>F g10<8,4,1>UW { align1 1H };
|
||||
mov(16) g11<1>D g2<8,8,1>F { align1 1H };
|
||||
mov(8) g80<1>DF g5<0,1,0>DF { align1 1Q };
|
||||
mov(8) g92<2>UD g6.4<0,1,0>UD { align1 1Q };
|
||||
mov(8) g62<1>Q 0xbff0000000000000Q { align1 1Q };
|
||||
mov(8) g92<2>F g92<4,4,1>DF { align1 1Q };
|
||||
mov(8) g92<1>DF g95<4,4,1>F { align1 1Q };
|
||||
mov(8) g106<1>DF g2<0,1,0>F { align1 2Q };
|
||||
mov(8) g48<1>Q 0xbff0000000000000Q { align1 2Q };
|
||||
mov(8) g127<1>UD g106.1<8,4,2>UD { align1 2Q };
|
||||
mov(8) g11<2>F g7<4,4,1>DF { align1 2Q };
|
||||
mov(8) g33<1>D g34<8,4,2>UD { align1 2Q };
|
||||
mov(8) g6<2>UD 0x00000000UD { align1 2Q };
|
||||
mov(8) g2<1>UW 0x76543210UV { align1 1Q };
|
||||
mov(8) g12<1>UD g2<8,8,1>UW { align1 1Q };
|
||||
mov(8) g7<1>UD 0x00080000UD { align1 WE_all 1Q };
|
||||
mov(1) g2<1>F 0x3e800000F /* 0.25F */ { align1 WE_all 1N };
|
||||
mov(8) g15<1>F g11<8,8,1>UD { align1 1Q };
|
||||
mov(1) f0.1<1>UW g1.14<0,1,0>UW { align1 WE_all 1N };
|
||||
mov(8) g18<1>UD g2<8,8,1>D { align1 1Q };
|
||||
mov(16) g18<1>UD g26<8,8,1>D { align1 1H };
|
||||
mov(16) g120<1>D g34<8,8,1>D { align1 1H };
|
||||
mov(8) g8<1>Q g13<4,4,1>Q { align1 1Q };
|
||||
mov(8) g21<1>UD g0<8,8,1>UD { align1 WE_all 2Q };
|
||||
mov(8) g23<1>F g6<0,1,0>F { align1 2Q };
|
||||
mov(1) g21.2<1>UD 0x000003f2UD { align1 WE_all 3N };
|
||||
mov.nz.f0.0(8) g19<1>D g3<8,4,2>UD { align1 1Q };
|
||||
mov(1) f1<1>UD g1.7<0,1,0>UD { align1 WE_all 1N };
|
||||
mov.sat(8) g126<1>F 0x0F /* 0F */ { align1 1Q };
|
||||
mov.sat(8) g124<1>F -g36<8,8,1>D { align1 1Q };
|
||||
mov(8) g41<1>F 0x0F /* 0F */ { align1 2Q };
|
||||
mov(8) g42<1>UD g11<8,8,1>D { align1 2Q };
|
||||
mov(16) g86<1>UD g88<8,8,1>UD { align1 WE_all 1H };
|
||||
mov.sat(16) g120<1>F g2<0,1,0>F { align1 1H };
|
||||
mov(16) g2<1>F g18<8,8,1>UD { align1 1H };
|
||||
mov(8) g4<1>UD 0x0F /* 0F */ { align1 1Q };
|
||||
mov(8) g8<1>DF g2<0,1,0>D { align1 1Q };
|
||||
mov(16) g8<1>UD 0x00000000UD { align1 1H };
|
||||
mov.nz.f0.0(8) g4<1>F -(abs)g2<0,1,0>F { align1 1Q };
|
||||
(+f0.0) mov(8) g4<1>F 0xbf800000F /* -1F */ { align1 1Q };
|
||||
mov.nz.f0.0(16) g4<1>F -(abs)g2<0,1,0>F { align1 1H };
|
||||
(+f0.0) mov(16) g4<1>F 0xbf800000F /* -1F */ { align1 1H };
|
||||
mov(1) f1<1>UD g1.7<0,1,0>UD { align1 WE_all 3N };
|
||||
mov(8) g32<1>DF g2<0,1,0>DF { align1 2Q };
|
||||
mov(8) g5<1>F g2<0,1,0>HF { align1 1Q };
|
||||
mov(16) g6<1>F g2<0,1,0>HF { align1 1H };
|
||||
mov(8) g7<1>UD g2<0,1,0>F { align1 1Q };
|
||||
mov(16) g15<1>UD g11<8,8,1>F { align1 1H };
|
||||
mov(16) g19<1>UD g15<16,8,2>UW { align1 1H };
|
||||
mov(1) g19<1>UD g[a0 64]<0,1,0>UD { align1 WE_all 1N };
|
||||
mov(16) g23<1>UD g21<32,8,4>UB { align1 1H };
|
||||
mov(8) g7<1>DF 0x0000000000000000DF /* 0DF */ { align1 1Q };
|
||||
mov(8) g5<1>F 0x0F /* 0F */ { align1 WE_all 1Q };
|
||||
mov(16) g4<1>UD 0x00000000UD { align1 WE_all 1H };
|
||||
mov(8) g5<2>UD g2<0,1,0>DF { align1 1Q };
|
||||
mov(8) g10<2>UD g2<0,1,0>DF { align1 2Q };
|
||||
mov(8) g3<1>DF g2<0,1,0>UD { align1 1Q };
|
||||
mov(8) g3<1>DF g2<0,1,0>UD { align1 2Q };
|
||||
mov(1) f0<1>UW 0x0000UW { align1 WE_all 1N };
|
||||
mov(1) g1<1>D 0D { align1 WE_all 1N };
|
||||
(+f0.0.any16h) mov(1) g1<1>D -1D { align1 WE_all 1N };
|
||||
mov(8) g9<1>F g2<0,1,0>W { align1 1Q };
|
||||
mov(8) g7<1>UQ g4<4,4,1>UQ { align1 1Q };
|
||||
mov(16) g11<1>UD 0x0F /* 0F */ { align1 1H };
|
||||
mov(8) g5<2>D g2<0,1,0>DF { align1 1Q };
|
||||
mov(8) g10<2>D g2<0,1,0>DF { align1 2Q };
|
||||
mov(1) f1<1>UW f0.1<0,1,0>UW { align1 WE_all 1N };
|
||||
mov(1) f1<1>UW f0.1<0,1,0>UW { align1 WE_all 3N };
|
||||
mov(16) g4<1>D 0D { align1 2H };
|
||||
mov(8) g14<1>UD g13<32,8,4>UB { align1 1Q };
|
||||
mov(16) g124<1>UD g15<8,8,1>UD { align1 2H };
|
||||
mov(16) g118<1>D g122<8,8,1>UW { align1 2H };
|
||||
mov(16) g101<1>UD 0x00000001UD { align1 2H };
|
||||
mov(1) g4<2>UW 0x00000000UD { align1 WE_all 1N };
|
||||
mov(8) g4<1>UD f0<0,1,0>UW { align1 1Q };
|
||||
mov(8) g8<1>D g2<8,8,1>UW { align1 1Q };
|
||||
mov(16) g4<1>UD f0<0,1,0>UW { align1 1H };
|
||||
mov(8) g3<1>DF -g2<0,1,0>D { align1 2Q };
|
||||
mov(8) g5<1>F g2<0,1,0>B { align1 1Q };
|
||||
mov(16) g6<1>F g2<0,1,0>B { align1 1H };
|
||||
mov(8) g4<1>DF 0x0000000000000000DF /* 0DF */ { align1 2Q };
|
||||
mov.nz.f0.0(8) g16<1>D g17<8,4,2>UD { align1 2Q };
|
||||
mov(8) g34<1>UW 0x76543210V { align1 1Q };
|
||||
mov(8) g8<1>UD 48D { align1 1Q };
|
||||
mov(16) g8<1>UD 0D { align1 1H };
|
||||
mov(8) g7<2>HF g2.1<0,1,0>F { align1 1Q };
|
||||
mov(1) g5<1>D g[a0 96]<0,1,0>D { align1 WE_all 1N };
|
||||
(+f0.0.any8h) mov(1) g2<1>D -1D { align1 WE_all 1N };
|
||||
mov(8) g9<1>UD 0D { align1 WE_all 1Q };
|
||||
mov(8) g2<2>UW g9<8,8,1>F { align1 1Q };
|
||||
mov(8) g3<1>UW g2<16,8,2>UW { align1 1Q };
|
||||
mov(8) g12<1>UW g8<16,8,2>UW { align1 WE_all 1Q };
|
||||
mov.sat(16) g13<1>F 0x3f800000F /* 1F */ { align1 1H };
|
||||
mov(16) g19<2>UW g17<8,8,1>F { align1 1H };
|
||||
mov(16) g4<1>UW g13<16,8,2>UW { align1 WE_all 1H };
|
||||
mov.nz.f0.0(8) null<1>D 0x00000000UD { align1 1Q };
|
||||
mov.nz.f0.0(16) null<1>D 0x00000000UD { align1 1H };
|
||||
mov(4) g3<1>UD tm0<4,4,1>UD { align1 WE_all 1N };
|
||||
(+f0.0.all16h) mov(1) g1<1>D -1D { align1 WE_all 1N };
|
||||
mov(8) g9<1>F g2<0,1,0>UB { align1 1Q };
|
||||
mov(16) g6<1>F g2<0,1,0>UB { align1 1H };
|
||||
mov(16) g10<2>HF g4<8,8,1>F { align1 1H };
|
||||
mov.z.f0.0(8) null<1>UD g2<8,8,1>UD { align1 1Q };
|
||||
mov.sat(8) g125<1>F g9<8,8,1>UD { align1 1Q };
|
||||
mov.z.f0.0(16) g1<1>UD g0.7<0,1,0>UD { align1 1H };
|
||||
mov.z.f0.0(8) g18<1>D g17<8,8,1>F { align1 1Q };
|
||||
mov(16) g35<1>F g15<16,8,2>W { align1 1H };
|
||||
mov(8) g23<1>Q g26<4,4,1>Q { align1 2Q };
|
||||
mov(8) g2<1>D 0x00000000UD { align1 1Q };
|
||||
mov(16) g2<1>D 0x00000000UD { align1 1H };
|
||||
(+f0.0.all8h) mov(1) g7<1>D -1D { align1 WE_all 1N };
|
||||
mov(8) g127<1>UB g2<0,1,0>UB { align1 WE_all 1Q };
|
||||
mov.z.f0.0(8) null<1>D g24<8,8,1>F { align1 1Q };
|
||||
mov.z.f0.0(16) null<1>D g76<8,8,1>F { align1 1H };
|
||||
mov(16) g7<1>D g2<16,8,2>B { align1 1H };
|
||||
|
|
@ -1,139 +0,0 @@
|
|||
01 00 60 00 0c 02 60 2f 20 00 8d 00 00 00 00 00
|
||||
01 00 60 00 e8 3e 80 2f 00 00 00 38 00 00 c0 40
|
||||
01 00 60 00 08 06 c0 21 00 00 00 00 00 00 00 00
|
||||
01 00 60 00 e8 3a 20 22 80 01 8d 00 00 00 00 00
|
||||
01 00 60 80 e8 3a 80 2f 00 01 8d 00 00 00 00 00
|
||||
01 00 60 00 28 0a a0 47 c0 02 8d 00 00 00 00 00
|
||||
01 00 60 00 28 02 a0 22 60 07 8a 00 00 00 00 00
|
||||
01 00 60 00 28 0e 80 20 00 00 00 08 ff ff ff ff
|
||||
01 00 60 02 20 0a 00 20 80 00 8d 00 00 00 00 00
|
||||
01 00 00 00 0c 06 48 20 00 00 00 00 00 00 00 00
|
||||
01 00 40 00 ec 3a 40 2e 4c 00 87 00 00 00 00 00
|
||||
01 00 60 00 e8 0a c0 2f 80 00 8d 00 00 00 00 00
|
||||
01 00 80 00 e8 0a 80 2f 80 00 8d 00 00 00 00 00
|
||||
01 00 80 00 e8 3a 00 2f 80 0f 8d 00 00 00 00 00
|
||||
01 00 80 00 e8 3e 80 2f 00 00 00 38 00 00 00 00
|
||||
01 00 80 00 28 0e 80 2f 00 00 00 08 00 00 80 3f
|
||||
01 00 80 02 20 0a 00 20 40 00 00 00 00 00 00 00
|
||||
01 00 60 00 4c 36 60 20 00 00 00 30 10 32 54 76
|
||||
01 00 80 00 08 02 80 22 04 00 00 00 00 00 00 00
|
||||
01 00 80 00 28 12 c0 20 60 00 8d 00 00 00 00 00
|
||||
01 10 60 00 28 0a 20 20 80 00 8d 00 00 00 00 00
|
||||
01 10 60 00 28 0e a0 20 00 00 00 08 00 00 00 00
|
||||
01 00 60 00 e8 12 40 20 c0 00 89 00 00 00 00 00
|
||||
01 00 60 00 28 3a e0 20 40 00 8d 00 00 00 00 00
|
||||
01 00 80 00 e8 12 40 20 40 01 89 00 00 00 00 00
|
||||
01 00 80 00 28 3a 60 21 40 00 8d 00 00 00 00 00
|
||||
01 00 60 00 c8 32 00 2a a0 00 00 00 00 00 00 00
|
||||
01 00 60 00 08 02 80 4b d0 00 00 00 00 00 00 00
|
||||
01 00 60 00 28 4f c0 27 00 00 00 00 00 00 f0 bf
|
||||
01 00 60 00 e8 32 80 4b 80 0b 69 00 00 00 00 00
|
||||
01 00 60 00 c8 3a 80 2b e0 0b 69 00 00 00 00 00
|
||||
01 10 60 00 c8 3a 40 2d 40 00 00 00 00 00 00 00
|
||||
01 10 60 00 28 4f 00 26 00 00 00 00 00 00 f0 bf
|
||||
01 10 60 00 08 02 e0 2f 44 0d 8a 00 00 00 00 00
|
||||
01 10 60 00 e8 32 60 41 e0 00 69 00 00 00 00 00
|
||||
01 10 60 00 28 02 20 24 40 04 8a 00 00 00 00 00
|
||||
01 10 60 00 08 06 c0 40 00 00 00 00 00 00 00 00
|
||||
01 00 60 00 48 26 40 20 00 00 00 20 10 32 54 76
|
||||
01 00 60 00 08 12 80 21 40 00 8d 00 00 00 00 00
|
||||
01 00 60 00 0c 06 e0 20 00 00 00 00 00 00 08 00
|
||||
01 00 00 00 ec 3e 40 20 00 00 00 38 00 00 80 3e
|
||||
01 00 60 00 e8 02 e0 21 60 01 8d 00 00 00 00 00
|
||||
01 00 00 00 44 12 02 26 3c 00 00 00 00 00 00 00
|
||||
01 00 60 00 08 0a 40 22 40 00 8d 00 00 00 00 00
|
||||
01 00 80 00 08 0a 40 22 40 03 8d 00 00 00 00 00
|
||||
01 00 80 00 28 0a 00 2f 40 04 8d 00 00 00 00 00
|
||||
01 00 60 00 28 4b 00 21 a0 01 69 00 00 00 00 00
|
||||
01 10 60 00 0c 02 a0 22 00 00 8d 00 00 00 00 00
|
||||
01 10 60 00 e8 3a e0 22 c0 00 00 00 00 00 00 00
|
||||
01 10 00 00 0c 06 a8 22 00 00 00 00 f2 03 00 00
|
||||
01 00 60 02 28 02 60 22 60 00 8a 00 00 00 00 00
|
||||
01 00 00 00 04 02 20 26 3c 00 00 00 00 00 00 00
|
||||
01 00 60 80 e8 3e c0 2f 00 00 00 38 00 00 00 00
|
||||
01 00 60 80 e8 0a 80 2f 80 44 8d 00 00 00 00 00
|
||||
01 10 60 00 e8 3e 20 25 00 00 00 38 00 00 00 00
|
||||
01 10 60 00 08 0a 40 25 60 01 8d 00 00 00 00 00
|
||||
01 00 80 00 0c 02 c0 2a 00 0b 8d 00 00 00 00 00
|
||||
01 00 80 80 e8 3a 00 2f 40 00 00 00 00 00 00 00
|
||||
01 00 80 00 e8 02 40 20 40 02 8d 00 00 00 00 00
|
||||
01 00 60 00 08 3e 80 20 00 00 00 38 00 00 00 00
|
||||
01 00 60 00 c8 0a 00 21 40 00 00 00 00 00 00 00
|
||||
01 00 80 00 08 06 00 21 00 00 00 00 00 00 00 00
|
||||
01 00 60 02 e8 3a 80 20 40 60 00 00 00 00 00 00
|
||||
01 00 61 00 e8 3e 80 20 00 00 00 38 00 00 80 bf
|
||||
01 00 80 02 e8 3a 80 20 40 60 00 00 00 00 00 00
|
||||
01 00 81 00 e8 3e 80 20 00 00 00 38 00 00 80 bf
|
||||
01 10 00 00 04 02 20 26 3c 00 00 00 00 00 00 00
|
||||
01 10 60 00 c8 32 00 24 40 00 00 00 00 00 00 00
|
||||
01 00 60 00 e8 52 a0 20 40 00 00 00 00 00 00 00
|
||||
01 00 80 00 e8 52 c0 20 40 00 00 00 00 00 00 00
|
||||
01 00 60 00 08 3a e0 20 40 00 00 00 00 00 00 00
|
||||
01 00 80 00 08 3a e0 21 60 01 8d 00 00 00 00 00
|
||||
01 00 80 00 08 12 60 22 e0 01 ae 00 00 00 00 00
|
||||
01 00 00 00 0c 02 60 22 40 80 00 00 00 00 00 00
|
||||
01 00 80 00 08 22 e0 22 a0 02 cf 00 00 00 00 00
|
||||
01 00 60 00 c8 56 e0 20 00 00 00 00 00 00 00 00
|
||||
01 00 60 00 ec 3e a0 20 00 00 00 38 00 00 00 00
|
||||
01 00 80 00 0c 06 80 20 00 00 00 00 00 00 00 00
|
||||
01 00 60 00 08 32 a0 40 40 00 00 00 00 00 00 00
|
||||
01 10 60 00 08 32 40 41 40 00 00 00 00 00 00 00
|
||||
01 00 60 00 c8 02 60 20 40 00 00 00 00 00 00 00
|
||||
01 10 60 00 c8 02 60 20 40 00 00 00 00 00 00 00
|
||||
01 00 00 00 44 16 00 26 00 00 00 10 00 00 00 00
|
||||
01 00 00 00 2c 0e 20 20 00 00 00 08 00 00 00 00
|
||||
01 00 0a 00 2c 0e 20 20 00 00 00 08 ff ff ff ff
|
||||
01 00 60 00 e8 1a 20 21 40 00 00 00 00 00 00 00
|
||||
01 00 60 00 08 43 e0 20 80 00 69 00 00 00 00 00
|
||||
01 00 80 00 08 3e 60 21 00 00 00 38 00 00 00 00
|
||||
01 00 60 00 28 32 a0 40 40 00 00 00 00 00 00 00
|
||||
01 10 60 00 28 32 40 41 40 00 00 00 00 00 00 00
|
||||
01 00 00 00 44 10 20 26 02 06 00 00 00 00 00 00
|
||||
01 10 00 00 44 10 20 26 02 06 00 00 00 00 00 00
|
||||
01 20 80 00 28 0e 80 20 00 00 00 08 00 00 00 00
|
||||
01 00 60 00 08 22 c0 21 a0 01 cf 00 00 00 00 00
|
||||
01 20 80 00 08 02 80 2f e0 01 8d 00 00 00 00 00
|
||||
01 20 80 00 28 12 c0 2e 40 0f 8d 00 00 00 00 00
|
||||
01 20 80 00 08 06 a0 2c 00 00 00 00 01 00 00 00
|
||||
01 00 00 00 4c 06 80 40 00 00 00 00 00 00 00 00
|
||||
01 00 60 00 08 10 80 20 00 06 00 00 00 00 00 00
|
||||
01 00 60 00 28 12 00 21 40 00 8d 00 00 00 00 00
|
||||
01 00 80 00 08 10 80 20 00 06 00 00 00 00 00 00
|
||||
01 10 60 00 c8 0a 60 20 40 40 00 00 00 00 00 00
|
||||
01 00 60 00 e8 2a a0 20 40 00 00 00 00 00 00 00
|
||||
01 00 80 00 e8 2a c0 20 40 00 00 00 00 00 00 00
|
||||
01 10 60 00 c8 56 80 20 00 00 00 00 00 00 00 00
|
||||
01 10 60 02 28 02 00 22 20 02 8a 00 00 00 00 00
|
||||
01 00 60 00 48 36 40 24 00 00 00 30 10 32 54 76
|
||||
01 00 60 00 08 0e 00 21 00 00 00 08 30 00 00 00
|
||||
01 00 80 00 08 0e 00 21 00 00 00 08 00 00 00 00
|
||||
01 00 60 00 48 3b e0 40 44 00 00 00 00 00 00 00
|
||||
01 00 00 00 2c 0a a0 20 60 80 00 00 00 00 00 00
|
||||
01 00 08 00 2c 0e 40 20 00 00 00 08 ff ff ff ff
|
||||
01 00 60 00 0c 0e 20 21 00 00 00 08 00 00 00 00
|
||||
01 00 60 00 48 3a 40 40 20 01 8d 00 00 00 00 00
|
||||
01 00 60 00 48 12 60 20 40 00 ae 00 00 00 00 00
|
||||
01 00 60 00 4c 12 80 21 00 01 ae 00 00 00 00 00
|
||||
01 00 80 80 e8 3e a0 21 00 00 00 38 00 00 80 3f
|
||||
01 00 80 00 48 3a 60 42 20 02 8d 00 00 00 00 00
|
||||
01 00 80 00 4c 12 80 20 a0 01 ae 00 00 00 00 00
|
||||
01 00 60 02 20 06 00 20 00 00 00 00 00 00 00 00
|
||||
01 00 80 02 20 06 00 20 00 00 00 00 00 00 00 00
|
||||
01 00 40 00 0c 00 60 20 00 18 69 00 00 00 00 00
|
||||
01 00 0b 00 2c 0e 20 20 00 00 00 08 ff ff ff ff
|
||||
01 00 60 00 e8 22 20 21 40 00 00 00 00 00 00 00
|
||||
01 00 80 00 e8 22 c0 20 40 00 00 00 00 00 00 00
|
||||
01 00 80 00 48 3b 40 41 80 00 8d 00 00 00 00 00
|
||||
01 00 60 01 00 02 00 20 40 00 8d 00 00 00 00 00
|
||||
01 00 60 80 e8 02 a0 2f 20 01 8d 00 00 00 00 00
|
||||
01 00 80 01 08 02 20 20 1c 00 00 00 00 00 00 00
|
||||
01 00 60 01 28 3a 40 22 20 02 8d 00 00 00 00 00
|
||||
01 00 80 00 e8 1a 60 24 e0 01 ae 00 00 00 00 00
|
||||
01 10 60 00 28 4b e0 22 40 03 69 00 00 00 00 00
|
||||
01 00 60 00 28 06 40 20 00 00 00 00 00 00 00 00
|
||||
01 00 80 00 28 06 40 20 00 00 00 00 00 00 00 00
|
||||
01 00 09 00 2c 0e e0 20 00 00 00 08 ff ff ff ff
|
||||
01 00 60 00 8c 22 e0 2f 40 00 00 00 00 00 00 00
|
||||
01 00 60 01 20 3a 00 20 00 03 8d 00 00 00 00 00
|
||||
01 00 80 01 20 3a 00 20 80 09 8d 00 00 00 00 00
|
||||
01 00 80 00 28 2a e0 20 40 00 ae 00 00 00 00 00
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
mul(8) g22<1>F g4<8,8,1>F g2<0,1,0>F { align1 1Q };
|
||||
mul(16) g26<1>F g2<0,1,0>F g2<0,1,0>F { align1 1H };
|
||||
mul(8) g36<1>DF g8<0,1,0>DF g8<0,1,0>DF { align1 1Q };
|
||||
mul(8) g9<1>UD g86<8,8,1>UD 0x00000004UD { align1 1Q };
|
||||
mul(8) acc0<1>UD g17<8,8,1>UD 0xaaabUW { align1 1Q };
|
||||
mul(8) acc0<1>D g17<8,8,1>D 0x5556UW { align1 1Q };
|
||||
mul(8) g21<1>D g20<8,8,1>D 3D { align1 1Q };
|
||||
mul(8) acc0<1>UD g39<8,8,1>UD 0xaaabUW { align1 2Q };
|
||||
mul(16) g45<1>D g43<8,8,1>D 3D { align1 1H };
|
||||
mul(8) acc0<1>D g39<8,8,1>D 0x5556UW { align1 2Q };
|
||||
mul.z.f0.0(8) g10<1>F g5<0,1,0>F g9<8,8,1>F { align1 1Q };
|
||||
mul(8) g39<1>DF g3.3<0,1,0>DF g3.3<0,1,0>DF { align1 2Q };
|
||||
mul.z.f0.0(16) g6<1>F g2<0,1,0>F g4<8,8,1>F { align1 1H };
|
||||
mul.sat(8) g17<1>F g4<8,8,1>F g16<8,8,1>F { align1 1Q };
|
||||
mul.sat(16) g9<1>F g3<8,8,1>F g7<8,8,1>F { align1 1H };
|
||||
mul.l.f0.0(8) null<1>F g6<0,1,0>F g5.7<0,1,0>F { align1 1Q };
|
||||
mul.sat(8) g8<1>DF g34<4,4,1>DF g5<4,4,1>DF { align1 1Q };
|
||||
mul(8) g4<1>UQ g8<4,4,1>UD g12<4,4,1>UD { align1 1Q };
|
||||
mul(8) g20<1>UQ g5<4,4,1>UD g13<4,4,1>UD { align1 2Q };
|
||||
mul(8) g5<1>Q g9<4,4,1>D g13<4,4,1>D { align1 1Q };
|
||||
mul.sat(8) g10<1>DF g10<4,4,1>DF g16<4,4,1>DF { align1 2Q };
|
||||
mul.l.f0.0(8) g20<1>F g2<8,8,1>F 0x42700000F /* 60F */ { align1 1Q };
|
||||
mul.l.f0.0(16) g32<1>F g2<8,8,1>F 0x42700000F /* 60F */ { align1 1H };
|
||||
mul(1) g6<1>UD g12<0,1,0>UD 0x00000101UD { align1 WE_all 1N };
|
||||
mul(8) g21<1>Q g6<4,4,1>D g14<4,4,1>D { align1 2Q };
|
||||
mul.l.f0.0(16) null<1>F g2.2<0,1,0>F g2.1<0,1,0>F { align1 1H };
|
||||
mul(8) g6<1>UW g6<8,8,1>UW 0x0808UW { align1 1Q };
|
||||
mul(16) g15<1>UW g14<16,16,1>UW 0x0808UW { align1 1H };
|
||||
mul.nz.f0.0(8) g6<1>F g12<8,8,1>F 0x3f808000F /* 1.00391F */ { align1 1Q };
|
||||
mul.nz.f0.0(16) g9<1>F g7<8,8,1>F 0x3f808000F /* 1.00391F */ { align1 1H };
|
||||
mul(1) g4<1>UD g4<0,1,0>UD 0x00000101UD { align1 WE_all 3N };
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
41 00 60 00 e8 3a c0 22 80 00 8d 3a 40 00 00 00
|
||||
41 00 80 00 e8 3a 40 23 40 00 00 3a 40 00 00 00
|
||||
41 00 60 00 c8 32 80 24 00 01 00 32 00 01 00 00
|
||||
41 00 60 00 08 02 20 21 c0 0a 8d 06 04 00 00 00
|
||||
41 00 60 00 00 02 00 24 20 02 8d 16 ab aa ab aa
|
||||
41 00 60 00 20 0a 00 24 20 02 8d 16 56 55 56 55
|
||||
41 00 60 00 28 0a a0 22 80 02 8d 0e 03 00 00 00
|
||||
41 10 60 00 00 02 00 24 e0 04 8d 16 ab aa ab aa
|
||||
41 00 80 00 28 0a a0 25 60 05 8d 0e 03 00 00 00
|
||||
41 10 60 00 20 0a 00 24 e0 04 8d 16 56 55 56 55
|
||||
41 00 60 01 e8 3a 40 21 a0 00 00 3a 20 01 8d 00
|
||||
41 10 60 00 c8 32 e0 24 78 00 00 32 78 00 00 00
|
||||
41 00 80 01 e8 3a c0 20 40 00 00 3a 80 00 8d 00
|
||||
41 00 60 80 e8 3a 20 22 80 00 8d 3a 00 02 8d 00
|
||||
41 00 80 80 e8 3a 20 21 60 00 8d 3a e0 00 8d 00
|
||||
41 00 60 05 e0 3a 00 20 c0 00 00 3a bc 00 00 00
|
||||
41 00 60 80 c8 32 00 21 40 04 69 32 a0 00 69 00
|
||||
41 00 60 00 08 03 80 20 00 01 69 02 80 01 69 00
|
||||
41 10 60 00 08 03 80 22 a0 00 69 02 a0 01 69 00
|
||||
41 00 60 00 28 0b a0 20 20 01 69 0a a0 01 69 00
|
||||
41 10 60 80 c8 32 40 21 40 01 69 32 00 02 69 00
|
||||
41 00 60 05 e8 3a 80 22 40 00 8d 3e 00 00 70 42
|
||||
41 00 80 05 e8 3a 00 24 40 00 8d 3e 00 00 70 42
|
||||
41 00 00 00 0c 02 c0 20 80 01 00 06 01 01 00 00
|
||||
41 10 60 00 28 0b a0 22 c0 00 69 0a c0 01 69 00
|
||||
41 00 80 05 e0 3a 00 20 48 00 00 3a 44 00 00 00
|
||||
41 00 60 00 48 12 c0 20 c0 00 8d 16 08 08 08 08
|
||||
41 00 80 00 48 12 e0 21 c0 01 b1 16 08 08 08 08
|
||||
41 00 60 02 e8 3a c0 20 80 01 8d 3e 00 80 80 3f
|
||||
41 00 80 02 e8 3a 20 21 e0 00 8d 3e 00 80 80 3f
|
||||
41 10 00 00 0c 02 80 20 80 00 00 06 01 01 00 00
|
||||
|
|
@ -1 +0,0 @@
|
|||
nop ;
|
||||
|
|
@ -1 +0,0 @@
|
|||
7e 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
not(16) g3<1>D g1.2<0,1,0>D { align1 1H };
|
||||
not(8) g4<1>D g8<8,8,1>D { align1 1Q };
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
04 00 80 00 28 0a 60 20 28 00 00 00 00 00 00 00
|
||||
04 00 60 00 28 0a 80 20 00 01 8d 00 00 00 00 00
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
or(8) g53<1>UD g49<8,8,1>UD g21<8,8,1>UD { align1 1Q };
|
||||
or.nz.f0.0(8) null<1>UD g21<8,8,1>UD g2<8,8,1>UD { align1 1Q };
|
||||
or.nz.f0.0(8) g5<1>UD g62<8,8,1>UD g67<8,8,1>UD { align1 1Q };
|
||||
or(8) g5<1>UD g106.1<8,4,2>UD 0x7ff00000UD { align1 2Q };
|
||||
or.nz.f0.0(16) null<1>UD g35<8,8,1>UD g32<8,8,1>UD { align1 1H };
|
||||
or(16) g36<1>UD g34<8,8,1>UD g20<8,8,1>UD { align1 1H };
|
||||
or.nz.f0.0(16) g53<1>UD g51<8,8,1>UD g49<8,8,1>UD { align1 1H };
|
||||
or(1) g8<1>UD g8<0,1,0>UD g4<0,1,0>UD { align1 WE_all 1N };
|
||||
or(1) a0<1>UD g8<0,1,0>UD 0x060ba000UD { align1 WE_all 1N };
|
||||
(+f0.0) or(8) g3<1>UD g3<8,8,1>UD 0x3f800000UD { align1 1Q };
|
||||
(+f0.0) or(16) g3<1>UD g3<8,8,1>UD 0x3f800000UD { align1 1H };
|
||||
or(1) a0<1>UD a0<0,1,0>UD 0x02280300UD { align1 WE_all 1N };
|
||||
or(1) a0<1>UD g4<0,1,0>UD 0x04036000UD { align1 WE_all 3N };
|
||||
(+f0.0) or(8) g17.1<2>UD g17.1<8,4,2>UD 0x3ff00000UD { align1 2Q };
|
||||
or(8) g4<1>UW g4<8,8,1>UW g6<8,8,1>UW { align1 1Q };
|
||||
or(16) g16<1>UW g14<16,16,1>UW g15<16,16,1>UW { align1 1H };
|
||||
or(8) g22<1>UD ~g2.2<0,1,0>D g21<8,8,1>UD { align1 1Q };
|
||||
or(16) g37<1>UD ~g2.2<0,1,0>D g35<8,8,1>UD { align1 1H };
|
||||
or(8) g9<1>D ~g8<8,8,1>D ~g7<8,8,1>D { align1 1Q };
|
||||
or(16) g13<1>D ~g11<8,8,1>D ~g9<8,8,1>D { align1 1H };
|
||||
or(1) g14<1>UD g14<0,1,0>UD g19<0,1,0>UD { align1 WE_all 3N };
|
||||
or.z.f0.0(8) null<1>UD g5<8,8,1>UD g6<8,8,1>UD { align1 1Q };
|
||||
or.z.f0.0(16) null<1>UD g17<8,8,1>UD g19<8,8,1>UD { align1 1H };
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
06 00 60 00 08 02 a0 26 20 06 8d 02 a0 02 8d 00
|
||||
06 00 60 02 00 02 00 20 a0 02 8d 02 40 00 8d 00
|
||||
06 00 60 02 08 02 a0 20 c0 07 8d 02 60 08 8d 00
|
||||
06 10 60 00 08 02 a0 20 44 0d 8a 06 00 00 f0 7f
|
||||
06 00 80 02 00 02 00 20 60 04 8d 02 00 04 8d 00
|
||||
06 00 80 00 08 02 80 24 40 04 8d 02 80 02 8d 00
|
||||
06 00 80 02 08 02 a0 26 60 06 8d 02 20 06 8d 00
|
||||
06 00 00 00 0c 02 00 21 00 01 00 02 80 00 00 00
|
||||
06 00 00 00 04 02 00 22 00 01 00 06 00 a0 0b 06
|
||||
06 00 61 00 08 02 60 20 60 00 8d 06 00 00 80 3f
|
||||
06 00 81 00 08 02 60 20 60 00 8d 06 00 00 80 3f
|
||||
06 00 00 00 04 00 00 22 00 02 00 06 00 03 28 02
|
||||
06 10 00 00 04 02 00 22 80 00 00 06 00 60 03 04
|
||||
06 10 61 00 08 02 24 42 24 02 8a 06 00 00 f0 3f
|
||||
06 00 60 00 48 12 80 20 80 00 8d 12 c0 00 8d 00
|
||||
06 00 80 00 48 12 00 22 c0 01 b1 12 e0 01 b1 00
|
||||
06 00 60 00 08 0a c0 22 48 40 00 02 a0 02 8d 00
|
||||
06 00 80 00 08 0a a0 24 48 40 00 02 60 04 8d 00
|
||||
06 00 60 00 28 0a 20 21 00 41 8d 0a e0 40 8d 00
|
||||
06 00 80 00 28 0a a0 21 60 41 8d 0a 20 41 8d 00
|
||||
06 10 00 00 0c 02 c0 21 c0 01 00 02 60 02 00 00
|
||||
06 00 60 01 00 02 00 20 a0 00 8d 02 c0 00 8d 00
|
||||
06 00 80 01 00 02 00 20 20 02 8d 02 60 02 8d 00
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
pln(8) g124<1>F g4<0,1,0>F g2<8,8,1>F { align1 1Q };
|
||||
pln(16) g120<1>F g6<0,1,0>F g2<8,8,1>F { align1 1H };
|
||||
pln.sat(8) g9<1>F g5<0,1,0>F g2<8,8,1>F { align1 1Q };
|
||||
pln.sat(16) g12<1>F g7<0,1,0>F g2<8,8,1>F { align1 1H };
|
||||
pln.g.f0.0(8) g7<1>F g4<0,1,0>F g2<8,8,1>F { align1 1Q };
|
||||
pln.g.f0.0(16) g11<1>F g6<0,1,0>F g2<8,8,1>F { align1 1H };
|
||||
pln.l.f0.0(8) g8<1>F g4<0,1,0>F g2<8,8,1>F { align1 1Q };
|
||||
pln.l.f0.0(16) g11<1>F g6<0,1,0>F g2<8,8,1>F { align1 1H };
|
||||
pln.nz.f0.0(8) g18<1>F g5<0,1,0>F g2<8,8,1>F { align1 1Q };
|
||||
pln.nz.f0.0(16) g14<1>F g7<0,1,0>F g2<8,8,1>F { align1 1H };
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
5a 00 60 00 e8 3a 80 2f 80 00 00 3a 40 00 8d 00
|
||||
5a 00 80 00 e8 3a 00 2f c0 00 00 3a 40 00 8d 00
|
||||
5a 00 60 80 e8 3a 20 21 a0 00 00 3a 40 00 8d 00
|
||||
5a 00 80 80 e8 3a 80 21 e0 00 00 3a 40 00 8d 00
|
||||
5a 00 60 03 e8 3a e0 20 80 00 00 3a 40 00 8d 00
|
||||
5a 00 80 03 e8 3a 60 21 c0 00 00 3a 40 00 8d 00
|
||||
5a 00 60 05 e8 3a 00 21 80 00 00 3a 40 00 8d 00
|
||||
5a 00 80 05 e8 3a 60 21 c0 00 00 3a 40 00 8d 00
|
||||
5a 00 60 02 e8 3a 40 22 a0 00 00 3a 40 00 8d 00
|
||||
5a 00 80 02 e8 3a c0 21 e0 00 00 3a 40 00 8d 00
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue