diff --git a/src/mesa/main/shader_types.h b/src/mesa/main/shader_types.h index c832ca1a095..4ab4600e72a 100644 --- a/src/mesa/main/shader_types.h +++ b/src/mesa/main/shader_types.h @@ -567,14 +567,12 @@ struct gl_program GLubyte SamplerUnits[MAX_SAMPLERS]; struct pipe_shader_state state; - struct glsl_to_tgsi_visitor* glsl_to_tgsi; struct ati_fragment_shader *ati_fs; uint64_t affected_states; /**< ST_NEW_* flags to mark dirty when binding */ void *serialized_nir; unsigned serialized_nir_size; - /* used when bypassing glsl_to_tgsi: */ struct gl_shader_program *shader_program; struct st_variant *variants; diff --git a/src/mesa/meson.build b/src/mesa/meson.build index 08b0cac4974..cccc1618e36 100644 --- a/src/mesa/meson.build +++ b/src/mesa/meson.build @@ -363,14 +363,6 @@ files_libmesa = files( 'state_tracker/st_glsl_to_ir.cpp', 'state_tracker/st_glsl_to_ir.h', 'state_tracker/st_glsl_to_nir.cpp', - 'state_tracker/st_glsl_to_tgsi.cpp', - 'state_tracker/st_glsl_to_tgsi.h', - 'state_tracker/st_glsl_to_tgsi_array_merge.cpp', - 'state_tracker/st_glsl_to_tgsi_array_merge.h', - 'state_tracker/st_glsl_to_tgsi_private.cpp', - 'state_tracker/st_glsl_to_tgsi_private.h', - 'state_tracker/st_glsl_to_tgsi_temprename.cpp', - 'state_tracker/st_glsl_to_tgsi_temprename.h', 'state_tracker/st_manager.c', 'state_tracker/st_manager.h', 'state_tracker/st_nir.h', diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index cd3b4668877..1cf8ae48004 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -45,7 +45,6 @@ #include "util/u_atomic.h" #include "state_tracker/st_program.h" -#include "state_tracker/st_glsl_to_tgsi.h" #include "state_tracker/st_context.h" /** @@ -250,9 +249,6 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog) st_release_variants(st, prog); - if (prog->glsl_to_tgsi) - free_glsl_to_tgsi_visitor(prog->glsl_to_tgsi); - free(prog->serialized_nir); if (prog == &_mesa_DummyProgram) diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index ac3e99ff336..38f306afaf7 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -173,15 +173,6 @@ st_nir_lookup_parameter_index(struct gl_program *prog, nir_variable *var) * fails. In this case just find the first matching "color.*".. * * Note for arrays you could end up w/ color[n].f, for example. - * - * glsl_to_tgsi works slightly differently in this regard. It is - * emitting something more low level, so it just translates the - * params list 1:1 to CONST[] regs. Going from GLSL IR to TGSI, - * it just calculates the additional offset of struct field members - * in glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) or - * glsl_to_tgsi_visitor::visit(ir_dereference_array *ir). It never - * needs to work backwards to get base var loc from the param-list - * which already has them separated out. */ if (!prog->sh.data->spirv) { int namelen = strlen(var->name); diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp deleted file mode 100644 index 59b342824b8..00000000000 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ /dev/null @@ -1,7527 +0,0 @@ -/* - * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. - * Copyright (C) 2008 VMware, Inc. All Rights Reserved. - * Copyright © 2010 Intel Corporation - * Copyright © 2011 Bryan Cain - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file glsl_to_tgsi.cpp - * - * Translate GLSL IR to TGSI. - */ - -#include "st_glsl_to_tgsi.h" -#include "st_program.h" - -#include "compiler/glsl/glsl_parser_extras.h" -#include "compiler/glsl/ir_optimization.h" -#include "compiler/glsl/linker.h" -#include "compiler/glsl/program.h" -#include "compiler/glsl/string_to_uint_map.h" - -#include "main/errors.h" -#include "main/shaderobj.h" -#include "main/uniforms.h" -#include "main/shaderapi.h" -#include "program/prog_instruction.h" - -#include "pipe/p_context.h" -#include "pipe/p_screen.h" -#include "tgsi/tgsi_ureg.h" -#include "tgsi/tgsi_info.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "st_program.h" -#include "st_format.h" -#include "st_glsl_to_tgsi_temprename.h" - -#include "util/hash_table.h" -#include - -#define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \ - (1 << PROGRAM_CONSTANT) | \ - (1 << PROGRAM_UNIFORM)) - -#define MAX_GLSL_TEXTURE_OFFSET 4 - -#ifndef NDEBUG -#include "util/u_atomic.h" -#include "util/simple_mtx.h" -#include -#include - -/* Prepare to make it possible to specify log file */ -static std::ofstream stats_log; - -/* Helper function to check whether we want to write some statistics - * of the shader conversion. - */ - -static simple_mtx_t print_stats_mutex = _SIMPLE_MTX_INITIALIZER_NP; - -static inline bool print_stats_enabled () -{ - static int stats_enabled = 0; - - if (!stats_enabled) { - simple_mtx_lock(&print_stats_mutex); - if (!stats_enabled) { - const char *stats_filename = getenv("GLSL_TO_TGSI_PRINT_STATS"); - if (stats_filename) { - bool write_header = std::ifstream(stats_filename).fail(); - stats_log.open(stats_filename, std::ios_base::out | std::ios_base::app); - stats_enabled = stats_log.good() ? 1 : -1; - if (write_header) - stats_log << "arrays,temps,temps in arrays,total,instructions\n"; - } else { - stats_enabled = -1; - } - } - simple_mtx_unlock(&print_stats_mutex); - } - return stats_enabled > 0; -} -#define PRINT_STATS(X) if (print_stats_enabled()) do { X; } while (false); -#else -#define PRINT_STATS(X) -#endif - - -namespace { - -class add_uniform_to_shader : public program_resource_visitor { -public: - add_uniform_to_shader(struct gl_context *ctx, - struct gl_shader_program *shader_program, - struct gl_program_parameter_list *params) - : ctx(ctx), shader_program(shader_program), params(params), idx(-1), - var(NULL) - { - /* empty */ - } - - void process(ir_variable *var) - { - this->idx = -1; - this->var = var; - this->program_resource_visitor::process(var, - ctx->Const.UseSTD430AsDefaultPacking); - var->data.param_index = this->idx; - } - -private: - virtual void visit_field(const glsl_type *type, const char *name, - bool row_major, const glsl_type *record_type, - const enum glsl_interface_packing packing, - bool last_field); - - struct gl_context *ctx; - struct gl_shader_program *shader_program; - struct gl_program_parameter_list *params; - int idx; - ir_variable *var; -}; - -} /* anonymous namespace */ - -void -add_uniform_to_shader::visit_field(const glsl_type *type, const char *name, - bool /* row_major */, - const glsl_type * /* record_type */, - const enum glsl_interface_packing, - bool /* last_field */) -{ - /* opaque types don't use storage in the param list unless they are - * bindless samplers or images. - */ - if (type->contains_opaque() && !var->data.bindless) - return; - - /* Add the uniform to the param list */ - assert(_mesa_lookup_parameter_index(params, name) < 0); - int index = _mesa_lookup_parameter_index(params, name); - - unsigned num_params = type->arrays_of_arrays_size(); - num_params = MAX2(num_params, 1); - num_params *= type->without_array()->matrix_columns; - - bool is_dual_slot = type->without_array()->is_dual_slot(); - if (is_dual_slot) - num_params *= 2; - - _mesa_reserve_parameter_storage(params, num_params, num_params); - index = params->NumParameters; - - if (ctx->Const.PackedDriverUniformStorage) { - for (unsigned i = 0; i < num_params; i++) { - unsigned dmul = type->without_array()->is_64bit() ? 2 : 1; - unsigned comps = type->without_array()->vector_elements * dmul; - if (is_dual_slot) { - if (i & 0x1) - comps -= 4; - else - comps = 4; - } - - _mesa_add_parameter(params, PROGRAM_UNIFORM, name, comps, - type->gl_type, NULL, NULL, false); - } - } else { - for (unsigned i = 0; i < num_params; i++) { - _mesa_add_parameter(params, PROGRAM_UNIFORM, name, 4, - type->gl_type, NULL, NULL, true); - } - } - - /* The first part of the uniform that's processed determines the base - * location of the whole uniform (for structures). - */ - if (this->idx < 0) - this->idx = index; - - /* Each Parameter will hold the index to the backing uniform storage. - * This avoids relying on names to match parameters and uniform - * storages later when associating uniform storage. - */ - unsigned location = -1; - ASSERTED const bool found = - shader_program->UniformHash->get(location, params->Parameters[index].Name); - assert(found); - - for (unsigned i = 0; i < num_params; i++) { - struct gl_program_parameter *param = ¶ms->Parameters[index + i]; - param->UniformStorageIndex = location; - param->MainUniformStorageIndex = params->Parameters[this->idx].UniformStorageIndex; - } -} - -/** - * Generate the program parameters list for the user uniforms in a shader - * - * \param shader_program Linked shader program. This is only used to - * emit possible link errors to the info log. - * \param sh Shader whose uniforms are to be processed. - * \param params Parameter list to be filled in. - */ -static void -generate_parameters_list_for_uniforms(struct gl_context *ctx, - struct gl_shader_program *shader_program, - struct gl_linked_shader *sh, - struct gl_program_parameter_list *params) -{ - add_uniform_to_shader add(ctx, shader_program, params); - - foreach_in_list(ir_instruction, node, sh->ir) { - ir_variable *var = node->as_variable(); - - if ((var == NULL) || (var->data.mode != ir_var_uniform) - || var->is_in_buffer_block() || (strncmp(var->name, "gl_", 3) == 0)) - continue; - - add.process(var); - } -} - -static unsigned is_precise(const ir_variable *ir) -{ - if (!ir) - return 0; - return ir->data.precise || ir->data.invariant; -} - -class variable_storage { - DECLARE_RZALLOC_CXX_OPERATORS(variable_storage) - -public: - variable_storage(ir_variable *var, gl_register_file file, int index, - unsigned array_id = 0) - : file(file), index(index), component(0), var(var), array_id(array_id) - { - assert(file != PROGRAM_ARRAY || array_id != 0); - } - - gl_register_file file; - int index; - - /* Explicit component location. This is given in terms of the GLSL-style - * swizzles where each double is a single component, i.e. for 64-bit types - * it can only be 0 or 1. - */ - int component; - ir_variable *var; /* variable that maps to this, if any */ - unsigned array_id; -}; - -class immediate_storage : public exec_node { -public: - immediate_storage(gl_constant_value *values, int size32, GLenum type) - { - memcpy(this->values, values, size32 * sizeof(gl_constant_value)); - this->size32 = size32; - this->type = type; - } - - /* doubles are stored across 2 gl_constant_values */ - gl_constant_value values[4]; - int size32; /**< Number of 32-bit components (1-4) */ - GLenum type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ -}; - -static const st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); -static const st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); - -struct inout_decl { - unsigned mesa_index; - unsigned array_id; /* TGSI ArrayID; 1-based: 0 means not an array */ - unsigned size; - unsigned interp_loc; - unsigned gs_out_streams; - enum glsl_interp_mode interp; - enum glsl_base_type base_type; - ubyte usage_mask; /* GLSL-style usage-mask, i.e. single bit per double */ - bool invariant; -}; - -static struct inout_decl * -find_inout_array(struct inout_decl *decls, unsigned count, unsigned array_id) -{ - assert(array_id != 0); - - for (unsigned i = 0; i < count; i++) { - struct inout_decl *decl = &decls[i]; - - if (array_id == decl->array_id) { - return decl; - } - } - - return NULL; -} - -static enum glsl_base_type -find_array_type(struct inout_decl *decls, unsigned count, unsigned array_id) -{ - if (!array_id) - return GLSL_TYPE_ERROR; - struct inout_decl *decl = find_inout_array(decls, count, array_id); - if (decl) - return decl->base_type; - return GLSL_TYPE_ERROR; -} - -struct hwatomic_decl { - unsigned location; - unsigned binding; - unsigned size; - unsigned array_id; -}; - -struct glsl_to_tgsi_visitor : public ir_visitor { -public: - glsl_to_tgsi_visitor(); - ~glsl_to_tgsi_visitor(); - - struct gl_context *ctx; - struct gl_program *prog; - struct gl_shader_program *shader_program; - struct gl_linked_shader *shader; - struct gl_shader_compiler_options *options; - - int next_temp; - - unsigned *array_sizes; - unsigned max_num_arrays; - unsigned next_array; - - struct inout_decl inputs[4 * PIPE_MAX_SHADER_INPUTS]; - unsigned num_inputs; - unsigned num_input_arrays; - struct inout_decl outputs[4 * PIPE_MAX_SHADER_OUTPUTS]; - unsigned num_outputs; - unsigned num_output_arrays; - - struct hwatomic_decl atomic_info[PIPE_MAX_HW_ATOMIC_BUFFERS]; - unsigned num_atomics; - unsigned num_atomic_arrays; - int num_address_regs; - uint32_t samplers_used; - glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; - enum tgsi_texture_type sampler_targets[PIPE_MAX_SAMPLERS]; - int images_used; - enum tgsi_texture_type image_targets[PIPE_MAX_SHADER_IMAGES]; - enum pipe_format image_formats[PIPE_MAX_SHADER_IMAGES]; - bool image_wr[PIPE_MAX_SHADER_IMAGES]; - bool indirect_addr_consts; - int wpos_transform_const; - - bool native_integers; - bool have_sqrt; - bool have_fma; - bool use_shared_memory; - bool has_tex_txf_lz; - bool precise; - bool tg4_component_in_swizzle; - - variable_storage *find_variable_storage(ir_variable *var); - - int add_constant(gl_register_file file, gl_constant_value values[8], - int size, GLenum datatype, uint16_t *swizzle_out); - - st_src_reg get_temp(const glsl_type *type); - void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); - - st_src_reg st_src_reg_for_double(double val); - st_src_reg st_src_reg_for_float(float val); - st_src_reg st_src_reg_for_int(int val); - st_src_reg st_src_reg_for_int64(int64_t val); - st_src_reg st_src_reg_for_type(enum glsl_base_type type, int val); - - /** - * \name Visit methods - * - * As typical for the visitor pattern, there must be one \c visit method for - * each concrete subclass of \c ir_instruction. Virtual base classes within - * the hierarchy should not have \c visit methods. - */ - /*@{*/ - virtual void visit(ir_variable *); - virtual void visit(ir_loop *); - virtual void visit(ir_loop_jump *); - virtual void visit(ir_function_signature *); - virtual void visit(ir_function *); - virtual void visit(ir_expression *); - virtual void visit(ir_swizzle *); - virtual void visit(ir_dereference_variable *); - virtual void visit(ir_dereference_array *); - virtual void visit(ir_dereference_record *); - virtual void visit(ir_assignment *); - virtual void visit(ir_constant *); - virtual void visit(ir_call *); - virtual void visit(ir_return *); - virtual void visit(ir_discard *); - virtual void visit(ir_demote *); - virtual void visit(ir_texture *); - virtual void visit(ir_if *); - virtual void visit(ir_emit_vertex *); - virtual void visit(ir_end_primitive *); - virtual void visit(ir_barrier *); - /*@}*/ - - void ATTRIBUTE_NOINLINE visit_expression(ir_expression *, st_src_reg *); - - void visit_atomic_counter_intrinsic(ir_call *); - void visit_ssbo_intrinsic(ir_call *); - void visit_membar_intrinsic(ir_call *); - void visit_shared_intrinsic(ir_call *); - void visit_image_intrinsic(ir_call *); - void visit_generic_intrinsic(ir_call *, enum tgsi_opcode op); - - st_src_reg result; - - /** List of variable_storage */ - struct hash_table *variables; - - /** List of immediate_storage */ - exec_list immediates; - unsigned num_immediates; - - /** List of glsl_to_tgsi_instruction */ - exec_list instructions; - - glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, enum tgsi_opcode op, - st_dst_reg dst = undef_dst, - st_src_reg src0 = undef_src, - st_src_reg src1 = undef_src, - st_src_reg src2 = undef_src, - st_src_reg src3 = undef_src); - - glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, enum tgsi_opcode op, - st_dst_reg dst, st_dst_reg dst1, - st_src_reg src0 = undef_src, - st_src_reg src1 = undef_src, - st_src_reg src2 = undef_src, - st_src_reg src3 = undef_src); - - enum tgsi_opcode get_opcode(enum tgsi_opcode op, - st_dst_reg dst, - st_src_reg src0, st_src_reg src1); - - /** - * Emit the correct dot-product instruction for the type of arguments - */ - glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, - st_dst_reg dst, - st_src_reg src0, - st_src_reg src1, - unsigned elements); - - void emit_scalar(ir_instruction *ir, enum tgsi_opcode op, - st_dst_reg dst, st_src_reg src0); - - void emit_scalar(ir_instruction *ir, enum tgsi_opcode op, - st_dst_reg dst, st_src_reg src0, st_src_reg src1); - - void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); - - void get_deref_offsets(ir_dereference *ir, - unsigned *array_size, - unsigned *base, - uint16_t *index, - st_src_reg *reladdr, - bool opaque); - void calc_deref_offsets(ir_dereference *tail, - unsigned *array_elements, - uint16_t *index, - st_src_reg *indirect, - unsigned *location); - st_src_reg canonicalize_gather_offset(st_src_reg offset); - bool handle_bound_deref(ir_dereference *ir); - - bool try_emit_mad(ir_expression *ir, - int mul_operand); - bool try_emit_mad_for_and_not(ir_expression *ir, - int mul_operand); - - void emit_swz(ir_expression *ir); - - bool process_move_condition(ir_rvalue *ir); - - void simplify_cmp(void); - - void rename_temp_registers(struct rename_reg_pair *renames); - void get_first_temp_read(int *first_reads); - void get_first_temp_write(int *first_writes); - void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes); - void get_last_temp_write(int *last_writes); - - void copy_propagate(void); - int eliminate_dead_code(void); - - void split_arrays(void); - void merge_two_dsts(void); - void merge_registers(void); - void renumber_registers(void); - - void emit_block_mov(ir_assignment *ir, const struct glsl_type *type, - st_dst_reg *l, st_src_reg *r, - st_src_reg *cond, bool cond_swap); - - void print_stats(); - - void *mem_ctx; -}; - -static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, - GLSL_TYPE_FLOAT, 0); -static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, - GLSL_TYPE_FLOAT, 1); -static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, - GLSL_TYPE_FLOAT, 2); - -static void -fail_link(struct gl_shader_program *prog, const char *fmt, ...) - PRINTFLIKE(2, 3); - -static void -fail_link(struct gl_shader_program *prog, const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - ralloc_vasprintf_append(&prog->data->InfoLog, fmt, args); - va_end(args); - - prog->data->LinkStatus = LINKING_FAILURE; -} - -int -swizzle_for_size(int size) -{ - static const int size_swizzles[4] = { - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), - }; - - assert((size >= 1) && (size <= 4)); - return size_swizzles[size - 1]; -} - - -/** - * Map mesa texture target to TGSI texture target. - */ -static enum tgsi_texture_type -st_translate_texture_target(gl_texture_index textarget, GLboolean shadow) -{ - if (shadow) { - switch (textarget) { - case TEXTURE_1D_INDEX: - return TGSI_TEXTURE_SHADOW1D; - case TEXTURE_2D_INDEX: - return TGSI_TEXTURE_SHADOW2D; - case TEXTURE_RECT_INDEX: - return TGSI_TEXTURE_SHADOWRECT; - case TEXTURE_1D_ARRAY_INDEX: - return TGSI_TEXTURE_SHADOW1D_ARRAY; - case TEXTURE_2D_ARRAY_INDEX: - return TGSI_TEXTURE_SHADOW2D_ARRAY; - case TEXTURE_CUBE_INDEX: - return TGSI_TEXTURE_SHADOWCUBE; - case TEXTURE_CUBE_ARRAY_INDEX: - return TGSI_TEXTURE_SHADOWCUBE_ARRAY; - default: - break; - } - } - - switch (textarget) { - case TEXTURE_2D_MULTISAMPLE_INDEX: - return TGSI_TEXTURE_2D_MSAA; - case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX: - return TGSI_TEXTURE_2D_ARRAY_MSAA; - case TEXTURE_BUFFER_INDEX: - return TGSI_TEXTURE_BUFFER; - case TEXTURE_1D_INDEX: - return TGSI_TEXTURE_1D; - case TEXTURE_2D_INDEX: - return TGSI_TEXTURE_2D; - case TEXTURE_3D_INDEX: - return TGSI_TEXTURE_3D; - case TEXTURE_CUBE_INDEX: - return TGSI_TEXTURE_CUBE; - case TEXTURE_CUBE_ARRAY_INDEX: - return TGSI_TEXTURE_CUBE_ARRAY; - case TEXTURE_RECT_INDEX: - return TGSI_TEXTURE_RECT; - case TEXTURE_1D_ARRAY_INDEX: - return TGSI_TEXTURE_1D_ARRAY; - case TEXTURE_2D_ARRAY_INDEX: - return TGSI_TEXTURE_2D_ARRAY; - case TEXTURE_EXTERNAL_INDEX: - return TGSI_TEXTURE_2D; - default: - debug_assert(!"unexpected texture target index"); - return TGSI_TEXTURE_1D; - } -} - - -/** - * Map GLSL base type to TGSI return type. - */ -static enum tgsi_return_type -st_translate_texture_type(enum glsl_base_type type) -{ - switch (type) { - case GLSL_TYPE_INT: - return TGSI_RETURN_TYPE_SINT; - case GLSL_TYPE_UINT: - return TGSI_RETURN_TYPE_UINT; - case GLSL_TYPE_FLOAT: - return TGSI_RETURN_TYPE_FLOAT; - default: - assert(!"unexpected texture type"); - return TGSI_RETURN_TYPE_UNKNOWN; - } -} - - -glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, enum tgsi_opcode op, - st_dst_reg dst, st_dst_reg dst1, - st_src_reg src0, st_src_reg src1, - st_src_reg src2, st_src_reg src3) -{ - glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); - int num_reladdr = 0, i, j; - bool dst_is_64bit[2]; - - op = get_opcode(op, dst, src0, src1); - - /* If we have to do relative addressing, we want to load the ARL - * reg directly for one of the regs, and preload the other reladdr - * sources into temps. - */ - num_reladdr += dst.reladdr != NULL || dst.reladdr2; - assert(!dst1.reladdr); /* should be lowered in earlier passes */ - num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL; - num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL; - num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL; - num_reladdr += src3.reladdr != NULL || src3.reladdr2 != NULL; - - reladdr_to_temp(ir, &src3, &num_reladdr); - reladdr_to_temp(ir, &src2, &num_reladdr); - reladdr_to_temp(ir, &src1, &num_reladdr); - reladdr_to_temp(ir, &src0, &num_reladdr); - - if (dst.reladdr || dst.reladdr2) { - if (dst.reladdr) - emit_arl(ir, address_reg, *dst.reladdr); - if (dst.reladdr2) - emit_arl(ir, address_reg2, *dst.reladdr2); - num_reladdr--; - } - - assert(num_reladdr == 0); - - /* inst->op has only 8 bits. */ - STATIC_ASSERT(TGSI_OPCODE_LAST <= 255); - - inst->op = op; - inst->precise = this->precise; - inst->info = tgsi_get_opcode_info(op); - inst->dst[0] = dst; - inst->dst[1] = dst1; - inst->src[0] = src0; - inst->src[1] = src1; - inst->src[2] = src2; - inst->src[3] = src3; - inst->is_64bit_expanded = false; - inst->ir = ir; - inst->dead_mask = 0; - inst->tex_offsets = NULL; - inst->tex_offset_num_offset = 0; - inst->saturate = 0; - inst->tex_shadow = 0; - /* default to float, for paths where this is not initialized - * (since 0==UINT which is likely wrong): - */ - inst->tex_type = GLSL_TYPE_FLOAT; - - /* Update indirect addressing status used by TGSI */ - if (dst.reladdr || dst.reladdr2) { - switch (dst.file) { - case PROGRAM_STATE_VAR: - case PROGRAM_CONSTANT: - case PROGRAM_UNIFORM: - this->indirect_addr_consts = true; - break; - case PROGRAM_IMMEDIATE: - assert(!"immediates should not have indirect addressing"); - break; - default: - break; - } - } - else { - for (i = 0; i < 4; i++) { - if (inst->src[i].reladdr) { - switch (inst->src[i].file) { - case PROGRAM_STATE_VAR: - case PROGRAM_CONSTANT: - case PROGRAM_UNIFORM: - this->indirect_addr_consts = true; - break; - case PROGRAM_IMMEDIATE: - assert(!"immediates should not have indirect addressing"); - break; - default: - break; - } - } - } - } - - /* - * This section contains the double processing. - * GLSL just represents doubles as single channel values, - * however most HW and TGSI represent doubles as pairs of register channels. - * - * so we have to fixup destination writemask/index and src swizzle/indexes. - * dest writemasks need to translate from single channel write mask - * to a dual-channel writemask, but also need to modify the index, - * if we are touching the Z,W fields in the pre-translated writemask. - * - * src channels have similiar index modifications along with swizzle - * changes to we pick the XY, ZW pairs from the correct index. - * - * GLSL [0].x -> TGSI [0].xy - * GLSL [0].y -> TGSI [0].zw - * GLSL [0].z -> TGSI [1].xy - * GLSL [0].w -> TGSI [1].zw - */ - for (j = 0; j < 2; j++) { - dst_is_64bit[j] = glsl_base_type_is_64bit(inst->dst[j].type); - if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT && - inst->dst[j].type == GLSL_TYPE_ARRAY) { - enum glsl_base_type type = find_array_type(this->outputs, - this->num_outputs, - inst->dst[j].array_id); - if (glsl_base_type_is_64bit(type)) - dst_is_64bit[j] = true; - } - } - - if (dst_is_64bit[0] || dst_is_64bit[1] || - glsl_base_type_is_64bit(inst->src[0].type)) { - glsl_to_tgsi_instruction *dinst = NULL; - int initial_src_swz[4], initial_src_idx[4]; - int initial_dst_idx[2], initial_dst_writemask[2]; - /* select the writemask for dst0 or dst1 */ - unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED - ? inst->dst[0].writemask : inst->dst[1].writemask; - - /* copy out the writemask, index and swizzles for all src/dsts. */ - for (j = 0; j < 2; j++) { - initial_dst_writemask[j] = inst->dst[j].writemask; - initial_dst_idx[j] = inst->dst[j].index; - } - - for (j = 0; j < 4; j++) { - initial_src_swz[j] = inst->src[j].swizzle; - initial_src_idx[j] = inst->src[j].index; - } - - /* - * scan all the components in the dst writemask - * generate an instruction for each of them if required. - */ - st_src_reg addr; - while (writemask) { - - int i = u_bit_scan(&writemask); - - /* before emitting the instruction, see if we have to adjust - * load / store address */ - if (i > 1 && (inst->op == TGSI_OPCODE_LOAD || - inst->op == TGSI_OPCODE_STORE) && - addr.file == PROGRAM_UNDEFINED) { - /* We have to advance the buffer address by 16 */ - addr = get_temp(glsl_type::uint_type); - emit_asm(ir, TGSI_OPCODE_UADD, st_dst_reg(addr), - inst->src[0], st_src_reg_for_int(16)); - } - - /* first time use previous instruction */ - if (dinst == NULL) { - dinst = inst; - } else { - /* create a new instructions for subsequent attempts */ - dinst = new(mem_ctx) glsl_to_tgsi_instruction(); - *dinst = *inst; - dinst->next = NULL; - dinst->prev = NULL; - } - this->instructions.push_tail(dinst); - dinst->is_64bit_expanded = true; - - /* modify the destination if we are splitting */ - for (j = 0; j < 2; j++) { - if (dst_is_64bit[j]) { - dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY; - dinst->dst[j].index = initial_dst_idx[j]; - if (i > 1) { - if (dinst->op == TGSI_OPCODE_LOAD || - dinst->op == TGSI_OPCODE_STORE) - dinst->src[0] = addr; - if (dinst->op != TGSI_OPCODE_STORE) - dinst->dst[j].index++; - } - } else { - /* if we aren't writing to a double, just get the bit of the - * initial writemask for this channel - */ - dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i); - } - } - - /* modify the src registers */ - for (j = 0; j < 4; j++) { - int swz = GET_SWZ(initial_src_swz[j], i); - - if (glsl_base_type_is_64bit(dinst->src[j].type)) { - dinst->src[j].index = initial_src_idx[j]; - if (swz > 1) { - dinst->src[j].double_reg2 = true; - dinst->src[j].index++; - } - - if (swz & 1) - dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, - SWIZZLE_Z, SWIZZLE_W); - else - dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, - SWIZZLE_X, SWIZZLE_Y); - - } else { - /* some opcodes are special case in what they use as sources - * - [FUI]2D/[UI]2I64 is a float/[u]int src0, (D)LDEXP is - * integer src1 - */ - if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_U2D || - op == TGSI_OPCODE_I2D || - op == TGSI_OPCODE_I2I64 || op == TGSI_OPCODE_U2I64 || - op == TGSI_OPCODE_DLDEXP || op == TGSI_OPCODE_LDEXP || - (op == TGSI_OPCODE_UCMP && dst_is_64bit[0])) { - dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz); - } - } - } - } - inst = dinst; - } else { - this->instructions.push_tail(inst); - } - - - return inst; -} - -glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, enum tgsi_opcode op, - st_dst_reg dst, - st_src_reg src0, st_src_reg src1, - st_src_reg src2, st_src_reg src3) -{ - return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, src3); -} - -/** - * Determines whether to use an integer, unsigned integer, or float opcode - * based on the operands and input opcode, then emits the result. - */ -enum tgsi_opcode -glsl_to_tgsi_visitor::get_opcode(enum tgsi_opcode op, - st_dst_reg dst, - st_src_reg src0, st_src_reg src1) -{ - enum glsl_base_type type = GLSL_TYPE_FLOAT; - - if (op == TGSI_OPCODE_MOV) - return op; - - assert(src0.type != GLSL_TYPE_ARRAY); - assert(src0.type != GLSL_TYPE_STRUCT); - assert(src1.type != GLSL_TYPE_ARRAY); - assert(src1.type != GLSL_TYPE_STRUCT); - - if (is_resource_instruction(op)) - type = src1.type; - else if (src0.type == GLSL_TYPE_INT64 || src1.type == GLSL_TYPE_INT64) - type = GLSL_TYPE_INT64; - else if (src0.type == GLSL_TYPE_UINT64 || src1.type == GLSL_TYPE_UINT64) - type = GLSL_TYPE_UINT64; - else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE) - type = GLSL_TYPE_DOUBLE; - else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) - type = GLSL_TYPE_FLOAT; - else if (native_integers) - type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type; - -#define case7(c, f, i, u, d, i64, ui64) \ - case TGSI_OPCODE_##c: \ - if (type == GLSL_TYPE_UINT64) \ - op = TGSI_OPCODE_##ui64; \ - else if (type == GLSL_TYPE_INT64) \ - op = TGSI_OPCODE_##i64; \ - else if (type == GLSL_TYPE_DOUBLE) \ - op = TGSI_OPCODE_##d; \ - else if (type == GLSL_TYPE_INT) \ - op = TGSI_OPCODE_##i; \ - else if (type == GLSL_TYPE_UINT) \ - op = TGSI_OPCODE_##u; \ - else \ - op = TGSI_OPCODE_##f; \ - break; - -#define casecomp(c, f, i, u, d, i64, ui64) \ - case TGSI_OPCODE_##c: \ - if (type == GLSL_TYPE_INT64) \ - op = TGSI_OPCODE_##i64; \ - else if (type == GLSL_TYPE_UINT64) \ - op = TGSI_OPCODE_##ui64; \ - else if (type == GLSL_TYPE_DOUBLE) \ - op = TGSI_OPCODE_##d; \ - else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE) \ - op = TGSI_OPCODE_##i; \ - else if (type == GLSL_TYPE_UINT) \ - op = TGSI_OPCODE_##u; \ - else if (native_integers) \ - op = TGSI_OPCODE_##f; \ - else \ - op = TGSI_OPCODE_##c; \ - break; - - switch (op) { - /* Some instructions are initially selected without considering the type. - * This fixes the type: - * - * INIT FLOAT SINT UINT DOUBLE SINT64 UINT64 - */ - case7(ADD, ADD, UADD, UADD, DADD, U64ADD, U64ADD); - case7(CEIL, CEIL, LAST, LAST, DCEIL, LAST, LAST); - case7(DIV, DIV, IDIV, UDIV, DDIV, I64DIV, U64DIV); - case7(FMA, FMA, UMAD, UMAD, DFMA, LAST, LAST); - case7(FLR, FLR, LAST, LAST, DFLR, LAST, LAST); - case7(FRC, FRC, LAST, LAST, DFRAC, LAST, LAST); - case7(MUL, MUL, UMUL, UMUL, DMUL, U64MUL, U64MUL); - case7(MAD, MAD, UMAD, UMAD, DMAD, LAST, LAST); - case7(MAX, MAX, IMAX, UMAX, DMAX, I64MAX, U64MAX); - case7(MIN, MIN, IMIN, UMIN, DMIN, I64MIN, U64MIN); - case7(RCP, RCP, LAST, LAST, DRCP, LAST, LAST); - case7(ROUND, ROUND,LAST, LAST, DROUND, LAST, LAST); - case7(RSQ, RSQ, LAST, LAST, DRSQ, LAST, LAST); - case7(SQRT, SQRT, LAST, LAST, DSQRT, LAST, LAST); - case7(SSG, SSG, ISSG, ISSG, DSSG, I64SSG, I64SSG); - case7(TRUNC, TRUNC,LAST, LAST, DTRUNC, LAST, LAST); - - case7(MOD, LAST, MOD, UMOD, LAST, I64MOD, U64MOD); - case7(SHL, LAST, SHL, SHL, LAST, U64SHL, U64SHL); - case7(IBFE, LAST, IBFE, UBFE, LAST, LAST, LAST); - case7(IMSB, LAST, IMSB, UMSB, LAST, LAST, LAST); - case7(IMUL_HI, LAST, IMUL_HI, UMUL_HI, LAST, LAST, LAST); - case7(ISHR, LAST, ISHR, USHR, LAST, I64SHR, U64SHR); - case7(ATOMIMAX,LAST, ATOMIMAX,ATOMUMAX,LAST, LAST, LAST); - case7(ATOMIMIN,LAST, ATOMIMIN,ATOMUMIN,LAST, LAST, LAST); - case7(ATOMUADD,ATOMFADD,ATOMUADD,ATOMUADD,LAST, LAST, LAST); - - casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ); - casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE); - casecomp(SGE, FSGE, ISGE, USGE, DSGE, I64SGE, U64SGE); - casecomp(SLT, FSLT, ISLT, USLT, DSLT, I64SLT, U64SLT); - - default: - break; - } - - assert(op != TGSI_OPCODE_LAST); - return op; -} - -glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, - st_dst_reg dst, st_src_reg src0, st_src_reg src1, - unsigned elements) -{ - static const enum tgsi_opcode dot_opcodes[] = { - TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 - }; - - return emit_asm(ir, dot_opcodes[elements - 2], dst, src0, src1); -} - -/** - * Emits TGSI scalar opcodes to produce unique answers across channels. - * - * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X - * channel determines the result across all channels. So to do a vec4 - * of this operation, we want to emit a scalar per source channel used - * to produce dest channels. - */ -void -glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum tgsi_opcode op, - st_dst_reg dst, - st_src_reg orig_src0, st_src_reg orig_src1) -{ - int i, j; - int done_mask = ~dst.writemask; - - /* TGSI RCP is a scalar operation splatting results to all channels, - * like ARB_fp/vp. So emit as many RCPs as necessary to cover our - * dst channels. - */ - for (i = 0; i < 4; i++) { - GLuint this_mask = (1 << i); - st_src_reg src0 = orig_src0; - st_src_reg src1 = orig_src1; - - if (done_mask & this_mask) - continue; - - GLuint src0_swiz = GET_SWZ(src0.swizzle, i); - GLuint src1_swiz = GET_SWZ(src1.swizzle, i); - for (j = i + 1; j < 4; j++) { - /* If there is another enabled component in the destination that is - * derived from the same inputs, generate its value on this pass as - * well. - */ - if (!(done_mask & (1 << j)) && - GET_SWZ(src0.swizzle, j) == src0_swiz && - GET_SWZ(src1.swizzle, j) == src1_swiz) { - this_mask |= (1 << j); - } - } - src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, - src0_swiz, src0_swiz); - src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, - src1_swiz, src1_swiz); - - dst.writemask = this_mask; - emit_asm(ir, op, dst, src0, src1); - done_mask |= this_mask; - } -} - -void -glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum tgsi_opcode op, - st_dst_reg dst, st_src_reg src0) -{ - st_src_reg undef = undef_src; - - undef.swizzle = SWIZZLE_XXXX; - - emit_scalar(ir, op, dst, src0, undef); -} - -void -glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, - st_dst_reg dst, st_src_reg src0) -{ - enum tgsi_opcode op = TGSI_OPCODE_ARL; - - if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) { - op = TGSI_OPCODE_UARL; - } - - assert(dst.file == PROGRAM_ADDRESS); - if (dst.index >= this->num_address_regs) - this->num_address_regs = dst.index + 1; - - emit_asm(NULL, op, dst, src0); -} - -int -glsl_to_tgsi_visitor::add_constant(gl_register_file file, - gl_constant_value values[8], int size, - GLenum datatype, - uint16_t *swizzle_out) -{ - if (file == PROGRAM_CONSTANT) { - GLuint swizzle = swizzle_out ? *swizzle_out : 0; - int result = _mesa_add_typed_unnamed_constant(this->prog->Parameters, - values, size, datatype, - &swizzle); - if (swizzle_out) - *swizzle_out = swizzle; - return result; - } - - assert(file == PROGRAM_IMMEDIATE); - - int index = 0; - immediate_storage *entry; - int size32 = size * ((datatype == GL_DOUBLE || - datatype == GL_INT64_ARB || - datatype == GL_UNSIGNED_INT64_ARB) ? 2 : 1); - int i; - - /* Search immediate storage to see if we already have an identical - * immediate that we can use instead of adding a duplicate entry. - */ - foreach_in_list(immediate_storage, entry, &this->immediates) { - immediate_storage *tmp = entry; - - for (i = 0; i * 4 < size32; i++) { - int slot_size = MIN2(size32 - (i * 4), 4); - if (tmp->type != datatype || tmp->size32 != slot_size) - break; - if (memcmp(tmp->values, &values[i * 4], - slot_size * sizeof(gl_constant_value))) - break; - - /* Everything matches, keep going until the full size is matched */ - tmp = (immediate_storage *)tmp->next; - } - - /* The full value matched */ - if (i * 4 >= size32) - return index; - - index++; - } - - for (i = 0; i * 4 < size32; i++) { - int slot_size = MIN2(size32 - (i * 4), 4); - /* Add this immediate to the list. */ - entry = new(mem_ctx) immediate_storage(&values[i * 4], - slot_size, datatype); - this->immediates.push_tail(entry); - this->num_immediates++; - } - return index; -} - -st_src_reg -glsl_to_tgsi_visitor::st_src_reg_for_float(float val) -{ - st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); - union gl_constant_value uval; - - uval.f = val; - src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); - - return src; -} - -st_src_reg -glsl_to_tgsi_visitor::st_src_reg_for_double(double val) -{ - st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_DOUBLE); - union gl_constant_value uval[2]; - - memcpy(uval, &val, sizeof(uval)); - src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle); - src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); - return src; -} - -st_src_reg -glsl_to_tgsi_visitor::st_src_reg_for_int(int val) -{ - st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); - union gl_constant_value uval; - - assert(native_integers); - - uval.i = val; - src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); - - return src; -} - -st_src_reg -glsl_to_tgsi_visitor::st_src_reg_for_int64(int64_t val) -{ - st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT64); - union gl_constant_value uval[2]; - - memcpy(uval, &val, sizeof(uval)); - src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle); - src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); - - return src; -} - -st_src_reg -glsl_to_tgsi_visitor::st_src_reg_for_type(enum glsl_base_type type, int val) -{ - if (native_integers) - return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : - st_src_reg_for_int(val); - else - return st_src_reg_for_float(val); -} - -static int -attrib_type_size(const struct glsl_type *type, bool is_vs_input) -{ - return type->count_attribute_slots(is_vs_input); -} - -static int -type_size(const struct glsl_type *type) -{ - return type->count_attribute_slots(false); -} - -static void -add_buffer_to_load_and_stores(glsl_to_tgsi_instruction *inst, st_src_reg *buf, - exec_list *instructions, ir_constant *access) -{ - /** - * emit_asm() might have actually split the op into pieces, e.g. for - * double stores. We have to go back and fix up all the generated ops. - */ - enum tgsi_opcode op = inst->op; - do { - inst->resource = *buf; - if (access) - inst->buffer_access = access->value.u[0]; - - if (inst == instructions->get_head_raw()) - break; - inst = (glsl_to_tgsi_instruction *)inst->get_prev(); - - if (inst->op == TGSI_OPCODE_UADD) { - if (inst == instructions->get_head_raw()) - break; - inst = (glsl_to_tgsi_instruction *)inst->get_prev(); - } - } while (inst->op == op && inst->resource.file == PROGRAM_UNDEFINED); -} - -/** - * If the given GLSL type is an array or matrix or a structure containing - * an array/matrix member, return true. Else return false. - * - * This is used to determine which kind of temp storage (PROGRAM_TEMPORARY - * or PROGRAM_ARRAY) should be used for variables of this type. Anytime - * we have an array that might be indexed with a variable, we need to use - * the later storage type. - */ -static bool -type_has_array_or_matrix(const glsl_type *type) -{ - if (type->is_array() || type->is_matrix()) - return true; - - if (type->is_struct()) { - for (unsigned i = 0; i < type->length; i++) { - if (type_has_array_or_matrix(type->fields.structure[i].type)) { - return true; - } - } - } - - return false; -} - - -/** - * In the initial pass of codegen, we assign temporary numbers to - * intermediate results. (not SSA -- variable assignments will reuse - * storage). - */ -st_src_reg -glsl_to_tgsi_visitor::get_temp(const glsl_type *type) -{ - st_src_reg src; - - src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; - src.reladdr = NULL; - src.negate = 0; - src.abs = 0; - - if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) { - if (next_array >= max_num_arrays) { - max_num_arrays += 32; - array_sizes = (unsigned*) - realloc(array_sizes, sizeof(array_sizes[0]) * max_num_arrays); - } - - src.file = PROGRAM_ARRAY; - src.index = 0; - src.array_id = next_array + 1; - array_sizes[next_array] = type_size(type); - ++next_array; - - } else { - src.file = PROGRAM_TEMPORARY; - src.index = next_temp; - next_temp += type_size(type); - } - - if (type->is_array() || type->is_struct()) { - src.swizzle = SWIZZLE_NOOP; - } else { - src.swizzle = swizzle_for_size(type->vector_elements); - } - - return src; -} - -variable_storage * -glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) -{ - struct hash_entry *entry; - - entry = _mesa_hash_table_search(this->variables, var); - if (!entry) - return NULL; - - return (variable_storage *)entry->data; -} - -void -glsl_to_tgsi_visitor::visit(ir_variable *ir) -{ - if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { - unsigned int i; - const ir_state_slot *const slots = ir->get_state_slots(); - assert(slots != NULL); - - /* Check if this statevar's setup in the STATE file exactly - * matches how we'll want to reference it as a - * struct/array/whatever. If not, then we need to move it into - * temporary storage and hope that it'll get copy-propagated - * out. - */ - for (i = 0; i < ir->get_num_state_slots(); i++) { - if (slots[i].swizzle != SWIZZLE_XYZW) { - break; - } - } - - variable_storage *storage; - st_dst_reg dst; - if (i == ir->get_num_state_slots()) { - /* We'll set the index later. */ - storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); - - _mesa_hash_table_insert(this->variables, ir, storage); - - dst = undef_dst; - } else { - /* The variable_storage constructor allocates slots based on the size - * of the type. However, this had better match the number of state - * elements that we're going to copy into the new temporary. - */ - assert((int) ir->get_num_state_slots() == type_size(ir->type)); - - dst = st_dst_reg(get_temp(ir->type)); - - storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index, - dst.array_id); - - _mesa_hash_table_insert(this->variables, ir, storage); - } - - - for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { - int index = _mesa_add_state_reference(this->prog->Parameters, - slots[i].tokens); - - if (storage->file == PROGRAM_STATE_VAR) { - if (storage->index == -1) { - storage->index = index; - } else { - assert(index == storage->index + (int)i); - } - } else { - /* We use GLSL_TYPE_FLOAT here regardless of the actual type of - * the data being moved since MOV does not care about the type of - * data it is moving, and we don't want to declare registers with - * array or struct types. - */ - st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT); - src.swizzle = slots[i].swizzle; - emit_asm(ir, TGSI_OPCODE_MOV, dst, src); - /* even a float takes up a whole vec4 reg in a struct/array. */ - dst.index++; - } - } - - if (storage->file == PROGRAM_TEMPORARY && - dst.index != storage->index + (int) ir->get_num_state_slots()) { - fail_link(this->shader_program, - "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", - ir->name, dst.index - storage->index, - type_size(ir->type)); - } - } -} - -void -glsl_to_tgsi_visitor::visit(ir_loop *ir) -{ - emit_asm(NULL, TGSI_OPCODE_BGNLOOP); - - visit_exec_list(&ir->body_instructions, this); - - emit_asm(NULL, TGSI_OPCODE_ENDLOOP); -} - -void -glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) -{ - switch (ir->mode) { - case ir_loop_jump::jump_break: - emit_asm(NULL, TGSI_OPCODE_BRK); - break; - case ir_loop_jump::jump_continue: - emit_asm(NULL, TGSI_OPCODE_CONT); - break; - } -} - - -void -glsl_to_tgsi_visitor::visit(ir_function_signature *ir) -{ - assert(0); - (void)ir; -} - -void -glsl_to_tgsi_visitor::visit(ir_function *ir) -{ - /* Ignore function bodies other than main() -- we shouldn't see calls to - * them since they should all be inlined before we get to glsl_to_tgsi. - */ - if (strcmp(ir->name, "main") == 0) { - const ir_function_signature *sig; - exec_list empty; - - sig = ir->matching_signature(NULL, &empty, false); - - assert(sig); - - foreach_in_list(ir_instruction, ir, &sig->body) { - ir->accept(this); - } - } -} - -bool -glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) -{ - int nonmul_operand = 1 - mul_operand; - st_src_reg a, b, c; - st_dst_reg result_dst; - - // there is no TGSI opcode for this - if (ir->type->is_integer_64()) - return false; - - ir_expression *expr = ir->operands[mul_operand]->as_expression(); - if (!expr || expr->operation != ir_binop_mul) - return false; - - expr->operands[0]->accept(this); - a = this->result; - expr->operands[1]->accept(this); - b = this->result; - ir->operands[nonmul_operand]->accept(this); - c = this->result; - - this->result = get_temp(ir->type); - result_dst = st_dst_reg(this->result); - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - emit_asm(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); - - return true; -} - -/** - * Emit MAD(a, -b, a) instead of AND(a, NOT(b)) - * - * The logic values are 1.0 for true and 0.0 for false. Logical-and is - * implemented using multiplication, and logical-or is implemented using - * addition. Logical-not can be implemented as (true - x), or (1.0 - x). - * As result, the logical expression (a & !b) can be rewritten as: - * - * - a * !b - * - a * (1 - b) - * - (a * 1) - (a * b) - * - a + -(a * b) - * - a + (a * -b) - * - * This final expression can be implemented as a single MAD(a, -b, a) - * instruction. - */ -bool -glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, - int try_operand) -{ - const int other_operand = 1 - try_operand; - st_src_reg a, b; - - ir_expression *expr = ir->operands[try_operand]->as_expression(); - if (!expr || expr->operation != ir_unop_logic_not) - return false; - - ir->operands[other_operand]->accept(this); - a = this->result; - expr->operands[0]->accept(this); - b = this->result; - - b.negate = ~b.negate; - - this->result = get_temp(ir->type); - emit_asm(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); - - return true; -} - -void -glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, - st_src_reg *reg, int *num_reladdr) -{ - if (!reg->reladdr && !reg->reladdr2) - return; - - if (reg->reladdr) - emit_arl(ir, address_reg, *reg->reladdr); - if (reg->reladdr2) - emit_arl(ir, address_reg2, *reg->reladdr2); - - if (*num_reladdr != 1) { - st_src_reg temp = get_temp(glsl_type::get_instance(reg->type, 4, 1)); - - emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); - *reg = temp; - } - - (*num_reladdr)--; -} - -void -glsl_to_tgsi_visitor::visit(ir_expression *ir) -{ - st_src_reg op[ARRAY_SIZE(ir->operands)]; - - /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) - */ - if (!this->precise && ir->operation == ir_binop_add) { - if (try_emit_mad(ir, 1)) - return; - if (try_emit_mad(ir, 0)) - return; - } - - /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) - */ - if (!native_integers && ir->operation == ir_binop_logic_and) { - if (try_emit_mad_for_and_not(ir, 1)) - return; - if (try_emit_mad_for_and_not(ir, 0)) - return; - } - - if (ir->operation == ir_quadop_vector) - assert(!"ir_quadop_vector should have been lowered"); - - for (unsigned int operand = 0; operand < ir->num_operands; operand++) { - this->result.file = PROGRAM_UNDEFINED; - ir->operands[operand]->accept(this); - if (this->result.file == PROGRAM_UNDEFINED) { - printf("Failed to get tree for expression operand:\n"); - ir->operands[operand]->print(); - printf("\n"); - exit(1); - } - op[operand] = this->result; - - /* Matrix expression operands should have been broken down to vector - * operations already. - */ - assert(!ir->operands[operand]->type->is_matrix()); - } - - visit_expression(ir, op); -} - -/* The non-recursive part of the expression visitor lives in a separate - * function and should be prevented from being inlined, to avoid a stack - * explosion when deeply nested expressions are visited. - */ -void -glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) -{ - st_src_reg result_src; - st_dst_reg result_dst; - - int vector_elements = ir->operands[0]->type->vector_elements; - if (ir->operands[1] && - ir->operation != ir_binop_interpolate_at_offset && - ir->operation != ir_binop_interpolate_at_sample) { - st_src_reg *swz_op = NULL; - if (vector_elements > ir->operands[1]->type->vector_elements) { - assert(ir->operands[1]->type->vector_elements == 1); - swz_op = &op[1]; - } else if (vector_elements < ir->operands[1]->type->vector_elements) { - assert(ir->operands[0]->type->vector_elements == 1); - swz_op = &op[0]; - } - if (swz_op) { - uint16_t swizzle_x = GET_SWZ(swz_op->swizzle, 0); - swz_op->swizzle = MAKE_SWIZZLE4(swizzle_x, swizzle_x, - swizzle_x, swizzle_x); - } - vector_elements = MAX2(vector_elements, - ir->operands[1]->type->vector_elements); - } - /* Swizzle the single scalar argument of an otherwise vector 3-operand instr - * (lrp for mix(), csel, etc.). - */ - if (ir->operands[2] && - ir->operands[2]->type->vector_elements != vector_elements) { - int i; - if (ir->operands[0]->type->vector_elements == 1) { - i = 0; - } else { - assert(ir->operands[2]->type->vector_elements == 1); - i = 2; - } - uint16_t swizzle_x = GET_SWZ(op[i].swizzle, 0); - op[i].swizzle = MAKE_SWIZZLE4(swizzle_x, swizzle_x, - swizzle_x, swizzle_x); - } - - this->result.file = PROGRAM_UNDEFINED; - - /* Storage for our result. Ideally for an assignment we'd be using - * the actual storage for the result here, instead. - */ - result_src = get_temp(ir->type); - /* convenience for the emit functions below. */ - result_dst = st_dst_reg(result_src); - /* Limit writes to the channels that will be used by result_src later. - * This does limit this temp's use as a temporary for multi-instruction - * sequences. - */ - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - - switch (ir->operation) { - case ir_unop_logic_not: - if (result_dst.type != GLSL_TYPE_FLOAT) - emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]); - else { - /* Previously 'SEQ dst, src, 0.0' was used for this. However, many - * older GPUs implement SEQ using multiple instructions (i915 uses two - * SGE instructions and a MUL instruction). Since our logic values are - * 0.0 and 1.0, 1-x also implements !x. - */ - op[0].negate = ~op[0].negate; - emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], - st_src_reg_for_float(1.0)); - } - break; - case ir_unop_neg: - if (result_dst.type == GLSL_TYPE_INT64 || - result_dst.type == GLSL_TYPE_UINT64) - emit_asm(ir, TGSI_OPCODE_I64NEG, result_dst, op[0]); - else if (result_dst.type == GLSL_TYPE_INT || - result_dst.type == GLSL_TYPE_UINT) - emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]); - else if (result_dst.type == GLSL_TYPE_DOUBLE) - emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]); - else { - op[0].negate = ~op[0].negate; - result_src = op[0]; - } - break; - case ir_unop_subroutine_to_int: - emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); - break; - case ir_unop_abs: - if (result_dst.type == GLSL_TYPE_FLOAT) - emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0].get_abs()); - else if (result_dst.type == GLSL_TYPE_DOUBLE) - emit_asm(ir, TGSI_OPCODE_DABS, result_dst, op[0]); - else if (result_dst.type == GLSL_TYPE_INT64 || - result_dst.type == GLSL_TYPE_UINT64) - emit_asm(ir, TGSI_OPCODE_I64ABS, result_dst, op[0]); - else - emit_asm(ir, TGSI_OPCODE_IABS, result_dst, op[0]); - break; - case ir_unop_sign: - emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]); - break; - case ir_unop_rcp: - emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); - break; - - case ir_unop_exp2: - emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); - break; - case ir_unop_exp: - assert(!"not reached: should be handled by exp_to_exp2"); - break; - case ir_unop_log: - assert(!"not reached: should be handled by log_to_log2"); - break; - case ir_unop_log2: - emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); - break; - case ir_unop_sin: - emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); - break; - case ir_unop_cos: - emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); - break; - case ir_unop_saturate: { - glsl_to_tgsi_instruction *inst; - inst = emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); - inst->saturate = true; - break; - } - - case ir_unop_dFdx: - case ir_unop_dFdx_coarse: - emit_asm(ir, TGSI_OPCODE_DDX, result_dst, op[0]); - break; - case ir_unop_dFdx_fine: - emit_asm(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]); - break; - case ir_unop_dFdy: - case ir_unop_dFdy_coarse: - case ir_unop_dFdy_fine: - { - /* The X component contains 1 or -1 depending on whether the framebuffer - * is a FBO or the window system buffer, respectively. - * It is then multiplied with the source operand of DDY. - */ - static const gl_state_index16 transform_y_state[STATE_LENGTH] - = { STATE_FB_WPOS_Y_TRANSFORM }; - - unsigned transform_y_index = - _mesa_add_state_reference(this->prog->Parameters, - transform_y_state); - - st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR, - transform_y_index, - glsl_type::vec4_type); - transform_y.swizzle = SWIZZLE_XXXX; - - st_src_reg temp = get_temp(glsl_type::vec4_type); - - emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]); - emit_asm(ir, ir->operation == ir_unop_dFdy_fine ? - TGSI_OPCODE_DDY_FINE : TGSI_OPCODE_DDY, result_dst, temp); - break; - } - - case ir_unop_frexp_sig: - emit_asm(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]); - break; - - case ir_unop_frexp_exp: - emit_asm(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]); - break; - - case ir_binop_add: - emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); - break; - case ir_binop_sub: - op[1].negate = ~op[1].negate; - emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); - break; - - case ir_binop_mul: - emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); - break; - case ir_binop_div: - emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); - break; - case ir_binop_mod: - if (result_dst.type == GLSL_TYPE_FLOAT) - assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); - else - emit_asm(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); - break; - - case ir_binop_less: - emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); - break; - case ir_binop_gequal: - emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); - break; - case ir_binop_equal: - emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); - break; - case ir_binop_nequal: - emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); - break; - case ir_binop_all_equal: - /* "==" operator producing a scalar boolean. */ - if (ir->operands[0]->type->is_vector() || - ir->operands[1]->type->is_vector()) { - st_src_reg temp = get_temp(native_integers ? - glsl_type::uvec4_type : - glsl_type::vec4_type); - - if (native_integers) { - st_dst_reg temp_dst = st_dst_reg(temp); - st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); - - if (ir->operands[0]->type->is_boolean() && - ir->operands[1]->as_constant() && - ir->operands[1]->as_constant()->is_one()) { - emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]); - } else { - emit_asm(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); - } - - /* Emit 1-3 AND operations to combine the SEQ results. */ - switch (ir->operands[0]->type->vector_elements) { - case 2: - break; - case 3: - temp_dst.writemask = WRITEMASK_Y; - temp1.swizzle = SWIZZLE_YYYY; - temp2.swizzle = SWIZZLE_ZZZZ; - emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); - break; - case 4: - temp_dst.writemask = WRITEMASK_X; - temp1.swizzle = SWIZZLE_XXXX; - temp2.swizzle = SWIZZLE_YYYY; - emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); - temp_dst.writemask = WRITEMASK_Y; - temp1.swizzle = SWIZZLE_ZZZZ; - temp2.swizzle = SWIZZLE_WWWW; - emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); - } - - temp1.swizzle = SWIZZLE_XXXX; - temp2.swizzle = SWIZZLE_YYYY; - emit_asm(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); - } else { - emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); - - /* After the dot-product, the value will be an integer on the - * range [0,4]. Zero becomes 1.0, and positive values become zero. - */ - emit_dp(ir, result_dst, temp, temp, vector_elements); - - /* Negating the result of the dot-product gives values on the range - * [-4, 0]. Zero becomes 1.0, and negative values become zero. - * This is achieved using SGE. - */ - st_src_reg sge_src = result_src; - sge_src.negate = ~sge_src.negate; - emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, - st_src_reg_for_float(0.0)); - } - } else { - emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); - } - break; - case ir_binop_any_nequal: - /* "!=" operator producing a scalar boolean. */ - if (ir->operands[0]->type->is_vector() || - ir->operands[1]->type->is_vector()) { - st_src_reg temp = get_temp(native_integers ? - glsl_type::uvec4_type : - glsl_type::vec4_type); - if (ir->operands[0]->type->is_boolean() && - ir->operands[1]->as_constant() && - ir->operands[1]->as_constant()->is_zero()) { - emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]); - } else { - emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); - } - - if (native_integers) { - st_dst_reg temp_dst = st_dst_reg(temp); - st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); - - /* Emit 1-3 OR operations to combine the SNE results. */ - switch (ir->operands[0]->type->vector_elements) { - case 2: - break; - case 3: - temp_dst.writemask = WRITEMASK_Y; - temp1.swizzle = SWIZZLE_YYYY; - temp2.swizzle = SWIZZLE_ZZZZ; - emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); - break; - case 4: - temp_dst.writemask = WRITEMASK_X; - temp1.swizzle = SWIZZLE_XXXX; - temp2.swizzle = SWIZZLE_YYYY; - emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); - temp_dst.writemask = WRITEMASK_Y; - temp1.swizzle = SWIZZLE_ZZZZ; - temp2.swizzle = SWIZZLE_WWWW; - emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); - } - - temp1.swizzle = SWIZZLE_XXXX; - temp2.swizzle = SWIZZLE_YYYY; - emit_asm(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); - } else { - /* After the dot-product, the value will be an integer on the - * range [0,4]. Zero stays zero, and positive values become 1.0. - */ - glsl_to_tgsi_instruction *const dp = - emit_dp(ir, result_dst, temp, temp, vector_elements); - if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { - /* The clamping to [0,1] can be done for free in the fragment - * shader with a saturate. - */ - dp->saturate = true; - } else { - /* Negating the result of the dot-product gives values on the - * range [-4, 0]. Zero stays zero, and negative values become - * 1.0. This achieved using SLT. - */ - st_src_reg slt_src = result_src; - slt_src.negate = ~slt_src.negate; - emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, - st_src_reg_for_float(0.0)); - } - } - } else { - emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); - } - break; - - case ir_binop_logic_xor: - if (native_integers) - emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); - else - emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); - break; - - case ir_binop_logic_or: { - if (native_integers) { - /* If integers are used as booleans, we can use an actual "or" - * instruction. - */ - assert(native_integers); - emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); - } else { - /* After the addition, the value will be an integer on the - * range [0,2]. Zero stays zero, and positive values become 1.0. - */ - glsl_to_tgsi_instruction *add = - emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); - if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { - /* The clamping to [0,1] can be done for free in the fragment - * shader with a saturate if floats are being used as boolean - * values. - */ - add->saturate = true; - } else { - /* Negating the result of the addition gives values on the range - * [-2, 0]. Zero stays zero, and negative values become 1.0 - * This is achieved using SLT. - */ - st_src_reg slt_src = result_src; - slt_src.negate = ~slt_src.negate; - emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, - st_src_reg_for_float(0.0)); - } - } - break; - } - - case ir_binop_logic_and: - /* If native integers are disabled, the bool args are stored as float 0.0 - * or 1.0, so "mul" gives us "and". If they're enabled, just use the - * actual AND opcode. - */ - if (native_integers) - emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); - else - emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); - break; - - case ir_binop_dot: - assert(ir->operands[0]->type->is_vector()); - assert(ir->operands[0]->type == ir->operands[1]->type); - emit_dp(ir, result_dst, op[0], op[1], - ir->operands[0]->type->vector_elements); - break; - - case ir_unop_sqrt: - if (have_sqrt) { - emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]); - } else { - /* This is the only instruction sequence that makes the game "Risen" - * render correctly. ABS is not required for the game, but since GLSL - * declares negative values as "undefined", allowing us to do whatever - * we want, I choose to use ABS to match DX9 and pre-GLSL RSQ - * behavior. - */ - emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0].get_abs()); - emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, result_src); - } - break; - case ir_unop_rsq: - emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); - break; - case ir_unop_i2f: - if (native_integers) { - emit_asm(ir, TGSI_OPCODE_I2F, result_dst, op[0]); - break; - } - FALLTHROUGH; - case ir_unop_b2f: - if (native_integers) { - emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], - st_src_reg_for_float(1.0)); - break; - } - FALLTHROUGH; - case ir_unop_i2u: - case ir_unop_u2i: - case ir_unop_i642u64: - case ir_unop_u642i64: - /* Converting between signed and unsigned integers is a no-op. */ - result_src = op[0]; - result_src.type = result_dst.type; - break; - case ir_unop_b2i: - if (native_integers) { - /* Booleans are stored as integers using ~0 for true and 0 for false. - * GLSL requires that int(bool) return 1 for true and 0 for false. - * This conversion is done with AND, but it could be done with NEG. - */ - emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], - st_src_reg_for_int(1)); - } else { - /* Booleans and integers are both stored as floats when native - * integers are disabled. - */ - result_src = op[0]; - } - break; - case ir_unop_f2i: - if (native_integers) - emit_asm(ir, TGSI_OPCODE_F2I, result_dst, op[0]); - else - emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); - break; - case ir_unop_f2u: - if (native_integers) - emit_asm(ir, TGSI_OPCODE_F2U, result_dst, op[0]); - else - emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); - break; - case ir_unop_bitcast_f2i: - case ir_unop_bitcast_f2u: - /* Make sure we don't propagate the negate modifier to integer opcodes. */ - if (op[0].negate || op[0].abs) - emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); - else - result_src = op[0]; - result_src.type = ir->operation == ir_unop_bitcast_f2i ? GLSL_TYPE_INT : - GLSL_TYPE_UINT; - break; - case ir_unop_bitcast_i2f: - case ir_unop_bitcast_u2f: - result_src = op[0]; - result_src.type = GLSL_TYPE_FLOAT; - break; - case ir_unop_f2b: - emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], - st_src_reg_for_float(0.0)); - break; - case ir_unop_d2b: - emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], - st_src_reg_for_double(0.0)); - break; - case ir_unop_i2b: - if (native_integers) - emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], - st_src_reg_for_int(0)); - else - emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], - st_src_reg_for_float(0.0)); - break; - case ir_unop_bitcast_u642d: - case ir_unop_bitcast_i642d: - result_src = op[0]; - result_src.type = GLSL_TYPE_DOUBLE; - break; - case ir_unop_bitcast_d2i64: - result_src = op[0]; - result_src.type = GLSL_TYPE_INT64; - break; - case ir_unop_bitcast_d2u64: - result_src = op[0]; - result_src.type = GLSL_TYPE_UINT64; - break; - case ir_unop_trunc: - emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); - break; - case ir_unop_ceil: - emit_asm(ir, TGSI_OPCODE_CEIL, result_dst, op[0]); - break; - case ir_unop_floor: - emit_asm(ir, TGSI_OPCODE_FLR, result_dst, op[0]); - break; - case ir_unop_round_even: - emit_asm(ir, TGSI_OPCODE_ROUND, result_dst, op[0]); - break; - case ir_unop_fract: - emit_asm(ir, TGSI_OPCODE_FRC, result_dst, op[0]); - break; - - case ir_binop_min: - emit_asm(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); - break; - case ir_binop_max: - emit_asm(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); - break; - case ir_binop_pow: - emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); - break; - - case ir_unop_bit_not: - if (native_integers) { - emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]); - break; - } - FALLTHROUGH; - case ir_unop_u2f: - if (native_integers) { - emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]); - break; - } - FALLTHROUGH; - case ir_binop_lshift: - case ir_binop_rshift: - if (native_integers) { - enum tgsi_opcode opcode = ir->operation == ir_binop_lshift - ? TGSI_OPCODE_SHL : TGSI_OPCODE_ISHR; - st_src_reg count; - - if (glsl_base_type_is_64bit(op[0].type)) { - /* GLSL shift operations have 32-bit shift counts, but TGSI uses - * 64 bits. - */ - count = get_temp(glsl_type::u64vec(ir->operands[1] - ->type->components())); - emit_asm(ir, TGSI_OPCODE_U2I64, st_dst_reg(count), op[1]); - } else { - count = op[1]; - } - - emit_asm(ir, opcode, result_dst, op[0], count); - break; - } - FALLTHROUGH; - case ir_binop_bit_and: - if (native_integers) { - emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); - break; - } - FALLTHROUGH; - case ir_binop_bit_xor: - if (native_integers) { - emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); - break; - } - FALLTHROUGH; - case ir_binop_bit_or: - if (native_integers) { - emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); - break; - } - - assert(!"GLSL 1.30 features unsupported"); - break; - - case ir_binop_ubo_load: { - if (ctx->Const.UseSTD430AsDefaultPacking) { - ir_rvalue *block = ir->operands[0]; - ir_rvalue *offset = ir->operands[1]; - ir_constant *const_block = block->as_constant(); - - st_src_reg cbuf(PROGRAM_CONSTANT, - (const_block ? const_block->value.u[0] + 1 : 1), - ir->type->base_type); - - cbuf.has_index2 = true; - - if (!const_block) { - block->accept(this); - cbuf.reladdr = ralloc(mem_ctx, st_src_reg); - *cbuf.reladdr = this->result; - emit_arl(ir, sampler_reladdr, this->result); - } - - /* Calculate the surface offset */ - offset->accept(this); - st_src_reg off = this->result; - - glsl_to_tgsi_instruction *inst = - emit_asm(ir, TGSI_OPCODE_LOAD, result_dst, off); - - if (result_dst.type == GLSL_TYPE_BOOL) - emit_asm(ir, TGSI_OPCODE_USNE, result_dst, st_src_reg(result_dst), - st_src_reg_for_int(0)); - - add_buffer_to_load_and_stores(inst, &cbuf, &this->instructions, - NULL); - } else { - ir_constant *const_uniform_block = ir->operands[0]->as_constant(); - ir_constant *const_offset_ir = ir->operands[1]->as_constant(); - unsigned const_offset = const_offset_ir ? - const_offset_ir->value.u[0] : 0; - unsigned const_block = const_uniform_block ? - const_uniform_block->value.u[0] + 1 : 1; - st_src_reg index_reg = get_temp(glsl_type::uint_type); - st_src_reg cbuf; - - cbuf.type = ir->type->base_type; - cbuf.file = PROGRAM_CONSTANT; - cbuf.index = 0; - cbuf.reladdr = NULL; - cbuf.negate = 0; - cbuf.abs = 0; - cbuf.index2D = const_block; - - assert(ir->type->is_vector() || ir->type->is_scalar()); - - if (const_offset_ir) { - /* Constant index into constant buffer */ - cbuf.reladdr = NULL; - cbuf.index = const_offset / 16; - } else { - ir_expression *offset_expr = ir->operands[1]->as_expression(); - st_src_reg offset = op[1]; - - /* The OpenGL spec is written in such a way that accesses with - * non-constant offset are almost always vec4-aligned. The only - * exception to this are members of structs in arrays of structs: - * each struct in an array of structs is at least vec4-aligned, - * but single-element and [ui]vec2 members of the struct may be at - * an offset that is not a multiple of 16 bytes. - * - * Here, we extract that offset, relying on previous passes to - * always generate offset expressions of the form - * (+ expr constant_offset). - * - * Note that the std430 layout, which allows more cases of - * alignment less than vec4 in arrays, is not supported for - * uniform blocks, so we do not have to deal with it here. - */ - if (offset_expr && offset_expr->operation == ir_binop_add) { - const_offset_ir = offset_expr->operands[1]->as_constant(); - if (const_offset_ir) { - const_offset = const_offset_ir->value.u[0]; - cbuf.index = const_offset / 16; - offset_expr->operands[0]->accept(this); - offset = this->result; - } - } - - /* Relative/variable index into constant buffer */ - emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), offset, - st_src_reg_for_int(4)); - cbuf.reladdr = ralloc(mem_ctx, st_src_reg); - *cbuf.reladdr = index_reg; - } - - if (const_uniform_block) { - /* Constant constant buffer */ - cbuf.reladdr2 = NULL; - } else { - /* Relative/variable constant buffer */ - cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg); - *cbuf.reladdr2 = op[0]; - } - cbuf.has_index2 = true; - - cbuf.swizzle = swizzle_for_size(ir->type->vector_elements); - if (glsl_base_type_is_64bit(cbuf.type)) - cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8, - const_offset % 16 / 8, - const_offset % 16 / 8, - const_offset % 16 / 8); - else - cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4, - const_offset % 16 / 4, - const_offset % 16 / 4, - const_offset % 16 / 4); - - if (ir->type->is_boolean()) { - emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, - st_src_reg_for_int(0)); - } else { - emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf); - } - } - break; - } - case ir_triop_lrp: - /* note: we have to reorder the three args here */ - emit_asm(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]); - break; - case ir_triop_csel: - if (this->ctx->Const.NativeIntegers) - emit_asm(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]); - else { - op[0].negate = ~op[0].negate; - emit_asm(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]); - } - break; - case ir_triop_bitfield_extract: - emit_asm(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]); - break; - case ir_quadop_bitfield_insert: - emit_asm(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]); - break; - case ir_unop_bitfield_reverse: - emit_asm(ir, TGSI_OPCODE_BREV, result_dst, op[0]); - break; - case ir_unop_bit_count: - emit_asm(ir, TGSI_OPCODE_POPC, result_dst, op[0]); - break; - case ir_unop_find_msb: - emit_asm(ir, TGSI_OPCODE_IMSB, result_dst, op[0]); - break; - case ir_unop_find_lsb: - emit_asm(ir, TGSI_OPCODE_LSB, result_dst, op[0]); - break; - case ir_binop_imul_high: - emit_asm(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]); - break; - case ir_triop_fma: - /* In theory, MAD is incorrect here. */ - if (have_fma) - emit_asm(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]); - else - emit_asm(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]); - break; - case ir_unop_interpolate_at_centroid: - emit_asm(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]); - break; - case ir_binop_interpolate_at_offset: { - /* The y coordinate needs to be flipped for the default fb */ - static const gl_state_index16 transform_y_state[STATE_LENGTH] - = { STATE_FB_WPOS_Y_TRANSFORM }; - - unsigned transform_y_index = - _mesa_add_state_reference(this->prog->Parameters, - transform_y_state); - - st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR, - transform_y_index, - glsl_type::vec4_type); - transform_y.swizzle = SWIZZLE_XXXX; - - st_src_reg temp = get_temp(glsl_type::vec2_type); - st_dst_reg temp_dst = st_dst_reg(temp); - - emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[1]); - temp_dst.writemask = WRITEMASK_Y; - emit_asm(ir, TGSI_OPCODE_MUL, temp_dst, transform_y, op[1]); - emit_asm(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], temp); - break; - } - case ir_binop_interpolate_at_sample: - emit_asm(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]); - break; - - case ir_unop_d2f: - emit_asm(ir, TGSI_OPCODE_D2F, result_dst, op[0]); - break; - case ir_unop_f2d: - emit_asm(ir, TGSI_OPCODE_F2D, result_dst, op[0]); - break; - case ir_unop_d2i: - emit_asm(ir, TGSI_OPCODE_D2I, result_dst, op[0]); - break; - case ir_unop_i2d: - emit_asm(ir, TGSI_OPCODE_I2D, result_dst, op[0]); - break; - case ir_unop_d2u: - emit_asm(ir, TGSI_OPCODE_D2U, result_dst, op[0]); - break; - case ir_unop_u2d: - emit_asm(ir, TGSI_OPCODE_U2D, result_dst, op[0]); - break; - case ir_unop_unpack_double_2x32: - case ir_unop_pack_double_2x32: - case ir_unop_unpack_int_2x32: - case ir_unop_pack_int_2x32: - case ir_unop_unpack_uint_2x32: - case ir_unop_pack_uint_2x32: - case ir_unop_unpack_sampler_2x32: - case ir_unop_pack_sampler_2x32: - case ir_unop_unpack_image_2x32: - case ir_unop_pack_image_2x32: - emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); - break; - - case ir_binop_ldexp: - if (ir->operands[0]->type->is_double()) { - emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]); - } else if (ir->operands[0]->type->is_float()) { - emit_asm(ir, TGSI_OPCODE_LDEXP, result_dst, op[0], op[1]); - } else { - assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()"); - } - break; - - case ir_unop_pack_half_2x16: - emit_asm(ir, TGSI_OPCODE_PK2H, result_dst, op[0]); - break; - case ir_unop_unpack_half_2x16: - emit_asm(ir, TGSI_OPCODE_UP2H, result_dst, op[0]); - break; - - case ir_unop_get_buffer_size: { - ir_constant *const_offset = ir->operands[0]->as_constant(); - st_src_reg buffer( - PROGRAM_BUFFER, - const_offset ? const_offset->value.u[0] : 0, - GLSL_TYPE_UINT); - if (!const_offset) { - buffer.reladdr = ralloc(mem_ctx, st_src_reg); - *buffer.reladdr = op[0]; - emit_arl(ir, sampler_reladdr, op[0]); - } - emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->resource = buffer; - break; - } - - case ir_unop_u2i64: - case ir_unop_u2u64: - case ir_unop_b2i64: { - st_src_reg temp = get_temp(glsl_type::uvec4_type); - st_dst_reg temp_dst = st_dst_reg(temp); - unsigned orig_swz = op[0].swizzle; - /* - * To convert unsigned to 64-bit: - * zero Y channel, copy X channel. - */ - temp_dst.writemask = WRITEMASK_Y; - if (vector_elements > 1) - temp_dst.writemask |= WRITEMASK_W; - emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0)); - temp_dst.writemask = WRITEMASK_X; - if (vector_elements > 1) - temp_dst.writemask |= WRITEMASK_Z; - op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 0), GET_SWZ(orig_swz, 0), - GET_SWZ(orig_swz, 1), GET_SWZ(orig_swz, 1)); - if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64) - emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]); - else - emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1)); - result_src = temp; - result_src.type = GLSL_TYPE_UINT64; - if (vector_elements > 2) { - /* Subtle: We rely on the fact that get_temp here returns the next - * TGSI temporary register directly after the temp register used for - * the first two components, so that the result gets picked up - * automatically. - */ - st_src_reg temp = get_temp(glsl_type::uvec4_type); - st_dst_reg temp_dst = st_dst_reg(temp); - temp_dst.writemask = WRITEMASK_Y; - if (vector_elements > 3) - temp_dst.writemask |= WRITEMASK_W; - emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0)); - - temp_dst.writemask = WRITEMASK_X; - if (vector_elements > 3) - temp_dst.writemask |= WRITEMASK_Z; - op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 2), - GET_SWZ(orig_swz, 2), - GET_SWZ(orig_swz, 3), - GET_SWZ(orig_swz, 3)); - if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64) - emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]); - else - emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], - st_src_reg_for_int(1)); - } - break; - } - case ir_unop_i642i: - case ir_unop_u642i: - case ir_unop_u642u: - case ir_unop_i642u: { - st_src_reg temp = get_temp(glsl_type::uvec4_type); - st_dst_reg temp_dst = st_dst_reg(temp); - unsigned orig_swz = op[0].swizzle; - unsigned orig_idx = op[0].index; - int el; - temp_dst.writemask = WRITEMASK_X; - - for (el = 0; el < vector_elements; el++) { - unsigned swz = GET_SWZ(orig_swz, el); - if (swz & 1) - op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, - SWIZZLE_Z, SWIZZLE_Z); - else - op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, - SWIZZLE_X, SWIZZLE_X); - if (swz > 2) - op[0].index = orig_idx + 1; - op[0].type = GLSL_TYPE_UINT; - temp_dst.writemask = WRITEMASK_X << el; - emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]); - } - result_src = temp; - if (ir->operation == ir_unop_u642u || ir->operation == ir_unop_i642u) - result_src.type = GLSL_TYPE_UINT; - else - result_src.type = GLSL_TYPE_INT; - break; - } - case ir_unop_i642b: - emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], - st_src_reg_for_int64(0)); - break; - case ir_unop_i642f: - emit_asm(ir, TGSI_OPCODE_I642F, result_dst, op[0]); - break; - case ir_unop_u642f: - emit_asm(ir, TGSI_OPCODE_U642F, result_dst, op[0]); - break; - case ir_unop_i642d: - emit_asm(ir, TGSI_OPCODE_I642D, result_dst, op[0]); - break; - case ir_unop_u642d: - emit_asm(ir, TGSI_OPCODE_U642D, result_dst, op[0]); - break; - case ir_unop_i2i64: - emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]); - break; - case ir_unop_f2i64: - emit_asm(ir, TGSI_OPCODE_F2I64, result_dst, op[0]); - break; - case ir_unop_d2i64: - emit_asm(ir, TGSI_OPCODE_D2I64, result_dst, op[0]); - break; - case ir_unop_i2u64: - emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]); - break; - case ir_unop_f2u64: - emit_asm(ir, TGSI_OPCODE_F2U64, result_dst, op[0]); - break; - case ir_unop_d2u64: - emit_asm(ir, TGSI_OPCODE_D2U64, result_dst, op[0]); - break; - /* these might be needed */ - case ir_unop_pack_snorm_2x16: - case ir_unop_pack_unorm_2x16: - case ir_unop_pack_snorm_4x8: - case ir_unop_pack_unorm_4x8: - - case ir_unop_unpack_snorm_2x16: - case ir_unop_unpack_unorm_2x16: - case ir_unop_unpack_snorm_4x8: - case ir_unop_unpack_unorm_4x8: - - case ir_quadop_vector: - case ir_binop_vector_extract: - case ir_triop_vector_insert: - case ir_binop_carry: - case ir_binop_borrow: - case ir_unop_ssbo_unsized_array_length: - case ir_unop_implicitly_sized_array_length: - case ir_unop_atan: - case ir_binop_atan2: - case ir_unop_clz: - case ir_binop_add_sat: - case ir_binop_sub_sat: - case ir_binop_abs_sub: - case ir_binop_avg: - case ir_binop_avg_round: - case ir_binop_mul_32x16: - case ir_unop_f162f: - case ir_unop_f2f16: - case ir_unop_f2fmp: - case ir_unop_f162b: - case ir_unop_b2f16: - case ir_unop_i2i: - case ir_unop_i2imp: - case ir_unop_u2u: - case ir_unop_u2ump: - /* This operation is not supported, or should have already been handled. - */ - assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()"); - break; - } - - this->result = result_src; -} - - -void -glsl_to_tgsi_visitor::visit(ir_swizzle *ir) -{ - st_src_reg src; - int i; - int swizzle[4] = {0}; - - /* Note that this is only swizzles in expressions, not those on the left - * hand side of an assignment, which do write masking. See ir_assignment - * for that. - */ - - ir->val->accept(this); - src = this->result; - assert(src.file != PROGRAM_UNDEFINED); - assert(ir->type->vector_elements > 0); - - for (i = 0; i < 4; i++) { - if (i < ir->type->vector_elements) { - switch (i) { - case 0: - swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); - break; - case 1: - swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); - break; - case 2: - swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); - break; - case 3: - swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); - break; - } - } else { - /* If the type is smaller than a vec4, replicate the last - * channel out. - */ - swizzle[i] = swizzle[ir->type->vector_elements - 1]; - } - } - - src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); - - this->result = src; -} - -/* Test if the variable is an array. Note that geometry and - * tessellation shader inputs are outputs are always arrays (except - * for patch inputs), so only the array element type is considered. - */ -static bool -is_inout_array(unsigned stage, ir_variable *var, bool *remove_array) -{ - const glsl_type *type = var->type; - - *remove_array = false; - - if ((stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) || - (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out)) - return false; - - if (((stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) || - (stage == MESA_SHADER_TESS_EVAL && var->data.mode == ir_var_shader_in) || - stage == MESA_SHADER_TESS_CTRL) && - !var->data.patch) { - if (!var->type->is_array()) - return false; /* a system value probably */ - - type = var->type->fields.array; - *remove_array = true; - } - - return type->is_array() || type->is_matrix(); -} - -static unsigned -st_translate_interp_loc(ir_variable *var) -{ - if (var->data.centroid) - return TGSI_INTERPOLATE_LOC_CENTROID; - else if (var->data.sample) - return TGSI_INTERPOLATE_LOC_SAMPLE; - else - return TGSI_INTERPOLATE_LOC_CENTER; -} - -void -glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) -{ - variable_storage *entry; - ir_variable *var = ir->var; - bool remove_array; - - if (handle_bound_deref(ir->as_dereference())) - return; - - entry = find_variable_storage(ir->var); - - if (!entry) { - switch (var->data.mode) { - case ir_var_uniform: - entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, - var->data.param_index); - _mesa_hash_table_insert(this->variables, var, entry); - break; - case ir_var_shader_in: { - /* The linker assigns locations for varyings and attributes, - * including deprecated builtins (like gl_Color), user-assign - * generic attributes (glBindVertexLocation), and - * user-defined varyings. - */ - assert(var->data.location != -1); - - const glsl_type *type_without_array = var->type->without_array(); - struct inout_decl *decl = &inputs[num_inputs]; - unsigned component = var->data.location_frac; - unsigned num_components; - num_inputs++; - - if (type_without_array->is_64bit()) - component = component / 2; - if (type_without_array->vector_elements) - num_components = type_without_array->vector_elements; - else - num_components = 4; - - decl->mesa_index = var->data.location; - decl->interp = (glsl_interp_mode) var->data.interpolation; - decl->interp_loc = st_translate_interp_loc(var); - decl->base_type = type_without_array->base_type; - decl->usage_mask = u_bit_consecutive(component, num_components); - - if (is_inout_array(shader->Stage, var, &remove_array)) { - decl->array_id = num_input_arrays + 1; - num_input_arrays++; - } else { - decl->array_id = 0; - } - - if (remove_array) - decl->size = type_size(var->type->fields.array); - else - decl->size = type_size(var->type); - - entry = new(mem_ctx) variable_storage(var, - PROGRAM_INPUT, - decl->mesa_index, - decl->array_id); - entry->component = component; - - _mesa_hash_table_insert(this->variables, var, entry); - - break; - } - case ir_var_shader_out: { - assert(var->data.location != -1); - - const glsl_type *type_without_array = var->type->without_array(); - struct inout_decl *decl = &outputs[num_outputs]; - unsigned component = var->data.location_frac; - unsigned num_components; - num_outputs++; - - decl->invariant = var->data.invariant; - - if (type_without_array->is_64bit()) - component = component / 2; - if (type_without_array->vector_elements) - num_components = type_without_array->vector_elements; - else - num_components = 4; - - decl->mesa_index = var->data.location + FRAG_RESULT_MAX * var->data.index; - decl->base_type = type_without_array->base_type; - decl->usage_mask = u_bit_consecutive(component, num_components); - if (var->data.stream & (1u << 31)) { - decl->gs_out_streams = var->data.stream & ~(1u << 31); - } else { - assert(var->data.stream < 4); - decl->gs_out_streams = 0; - for (unsigned i = 0; i < num_components; ++i) - decl->gs_out_streams |= var->data.stream << (2 * (component + i)); - } - - if (is_inout_array(shader->Stage, var, &remove_array)) { - decl->array_id = num_output_arrays + 1; - num_output_arrays++; - } else { - decl->array_id = 0; - } - - if (remove_array) - decl->size = type_size(var->type->fields.array); - else - decl->size = type_size(var->type); - - if (var->data.fb_fetch_output) { - st_dst_reg dst = st_dst_reg(get_temp(var->type)); - st_src_reg src = st_src_reg(PROGRAM_OUTPUT, decl->mesa_index, - var->type, component, decl->array_id); - emit_asm(NULL, TGSI_OPCODE_FBFETCH, dst, src); - entry = new(mem_ctx) variable_storage(var, dst.file, dst.index, - dst.array_id); - } else { - entry = new(mem_ctx) variable_storage(var, - PROGRAM_OUTPUT, - decl->mesa_index, - decl->array_id); - } - entry->component = component; - - _mesa_hash_table_insert(this->variables, var, entry); - - break; - } - case ir_var_system_value: - entry = new(mem_ctx) variable_storage(var, - PROGRAM_SYSTEM_VALUE, - var->data.location); - break; - case ir_var_auto: - case ir_var_temporary: - st_src_reg src = get_temp(var->type); - - entry = new(mem_ctx) variable_storage(var, src.file, src.index, - src.array_id); - _mesa_hash_table_insert(this->variables, var, entry); - - break; - } - - if (!entry) { - printf("Failed to make storage for %s\n", var->name); - exit(1); - } - } - - this->result = st_src_reg(entry->file, entry->index, var->type, - entry->component, entry->array_id); - if (this->shader->Stage == MESA_SHADER_VERTEX && - var->data.mode == ir_var_shader_in && - var->type->without_array()->is_double()) - this->result.is_double_vertex_input = true; - if (!native_integers) - this->result.type = GLSL_TYPE_FLOAT; -} - -static void -shrink_array_declarations(struct inout_decl *decls, unsigned count, - GLbitfield64* usage_mask, - GLbitfield64 double_usage_mask, - GLbitfield* patch_usage_mask) -{ - unsigned i; - int j; - - /* Fix array declarations by removing unused array elements at both ends - * of the arrays. For example, mat4[3] where only mat[1] is used. - */ - for (i = 0; i < count; i++) { - struct inout_decl *decl = &decls[i]; - if (!decl->array_id) - continue; - - /* Shrink the beginning. */ - for (j = 0; j < (int)decl->size; j++) { - if (decl->mesa_index >= VARYING_SLOT_PATCH0) { - if (*patch_usage_mask & - BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j)) - break; - } - else { - if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) - break; - if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1)) - break; - } - - decl->mesa_index++; - decl->size--; - j--; - } - - /* Shrink the end. */ - for (j = decl->size-1; j >= 0; j--) { - if (decl->mesa_index >= VARYING_SLOT_PATCH0) { - if (*patch_usage_mask & - BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j)) - break; - } - else { - if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) - break; - if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1)) - break; - } - - decl->size--; - } - - /* When not all entries of an array are accessed, we mark them as used - * here anyway, to ensure that the input/output mapping logic doesn't get - * confused. - * - * TODO This happens when an array isn't used via indirect access, which - * some game ports do (at least eON-based). There is an optimization - * opportunity here by replacing the array declaration with non-array - * declarations of those slots that are actually used. - */ - for (j = 1; j < (int)decl->size; ++j) { - if (decl->mesa_index >= VARYING_SLOT_PATCH0) - *patch_usage_mask |= BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j); - else - *usage_mask |= BITFIELD64_BIT(decl->mesa_index + j); - } - } -} - - -static void -mark_array_io(struct inout_decl *decls, unsigned count, - GLbitfield64* usage_mask, - GLbitfield64 double_usage_mask, - GLbitfield* patch_usage_mask) -{ - unsigned i; - int j; - - /* Fix array declarations by removing unused array elements at both ends - * of the arrays. For example, mat4[3] where only mat[1] is used. - */ - for (i = 0; i < count; i++) { - struct inout_decl *decl = &decls[i]; - if (!decl->array_id) - continue; - - /* When not all entries of an array are accessed, we mark them as used - * here anyway, to ensure that the input/output mapping logic doesn't get - * confused. - * - * TODO This happens when an array isn't used via indirect access, which - * some game ports do (at least eON-based). There is an optimization - * opportunity here by replacing the array declaration with non-array - * declarations of those slots that are actually used. - */ - for (j = 0; j < (int)decl->size; ++j) { - if (decl->mesa_index >= VARYING_SLOT_PATCH0) - *patch_usage_mask |= BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j); - else - *usage_mask |= BITFIELD64_BIT(decl->mesa_index + j); - } - } -} - -void -glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) -{ - ir_constant *index; - st_src_reg src; - bool is_2D = false; - ir_variable *var = ir->variable_referenced(); - - if (handle_bound_deref(ir->as_dereference())) - return; - - /* We only need the logic provided by count_vec4_slots() - * for arrays of structs. Indirect sampler and image indexing is handled - * elsewhere. - */ - int element_size = ir->type->without_array()->is_struct() ? - ir->type->count_vec4_slots(false, var->data.bindless) : - type_size(ir->type); - - index = ir->array_index->constant_expression_value(ralloc_parent(ir)); - - ir->array->accept(this); - src = this->result; - - if (!src.has_index2) { - switch (this->prog->Target) { - case GL_TESS_CONTROL_PROGRAM_NV: - is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) && - !ir->variable_referenced()->data.patch; - break; - case GL_TESS_EVALUATION_PROGRAM_NV: - is_2D = src.file == PROGRAM_INPUT && - !ir->variable_referenced()->data.patch; - break; - case GL_GEOMETRY_PROGRAM_NV: - is_2D = src.file == PROGRAM_INPUT; - break; - } - } - - if (is_2D) - element_size = 1; - - if (index) { - - if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && - src.file == PROGRAM_INPUT) - element_size = attrib_type_size(ir->type, true); - if (is_2D) { - src.index2D = index->value.i[0]; - src.has_index2 = true; - } else - src.index += index->value.i[0] * element_size; - } else { - /* Variable index array dereference. It eats the "vec4" of the - * base of the array and an index that offsets the TGSI register - * index. - */ - ir->array_index->accept(this); - - st_src_reg index_reg; - - if (element_size == 1) { - index_reg = this->result; - } else { - index_reg = get_temp(native_integers ? - glsl_type::int_type : glsl_type::float_type); - - emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), - this->result, st_src_reg_for_type(index_reg.type, element_size)); - } - - /* If there was already a relative address register involved, add the - * new and the old together to get the new offset. - */ - if (!is_2D && src.reladdr != NULL) { - st_src_reg accum_reg = get_temp(native_integers ? - glsl_type::int_type : glsl_type::float_type); - - emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), - index_reg, *src.reladdr); - - index_reg = accum_reg; - } - - if (is_2D) { - src.reladdr2 = ralloc(mem_ctx, st_src_reg); - *src.reladdr2 = index_reg; - src.index2D = 0; - src.has_index2 = true; - } else { - src.reladdr = ralloc(mem_ctx, st_src_reg); - *src.reladdr = index_reg; - } - } - - /* Change the register type to the element type of the array. */ - src.type = ir->type->base_type; - - this->result = src; -} - -void -glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) -{ - unsigned int i; - const glsl_type *struct_type = ir->record->type; - ir_variable *var = ir->record->variable_referenced(); - int offset = 0; - - if (handle_bound_deref(ir->as_dereference())) - return; - - ir->record->accept(this); - - assert(ir->field_idx >= 0); - assert(var); - for (i = 0; i < struct_type->length; i++) { - if (i == (unsigned) ir->field_idx) - break; - const glsl_type *member_type = struct_type->fields.structure[i].type; - offset += member_type->count_vec4_slots(false, var->data.bindless); - } - - /* If the type is smaller than a vec4, replicate the last channel out. */ - if (ir->type->is_scalar() || ir->type->is_vector()) - this->result.swizzle = swizzle_for_size(ir->type->vector_elements); - else - this->result.swizzle = SWIZZLE_NOOP; - - this->result.index += offset; - this->result.type = ir->type->base_type; -} - -/** - * We want to be careful in assignment setup to hit the actual storage - * instead of potentially using a temporary like we might with the - * ir_dereference handler. - */ -static st_dst_reg -get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v, int *component) -{ - /* The LHS must be a dereference. If the LHS is a variable indexed array - * access of a vector, it must be separated into a series conditional moves - * before reaching this point (see ir_vec_index_to_cond_assign). - */ - assert(ir->as_dereference()); - ir_dereference_array *deref_array = ir->as_dereference_array(); - if (deref_array) { - assert(!deref_array->array->type->is_vector()); - } - - /* Use the rvalue deref handler for the most part. We write swizzles using - * the writemask, but we do extract the base component for enhanced layouts - * from the source swizzle. - */ - ir->accept(v); - *component = GET_SWZ(v->result.swizzle, 0); - return st_dst_reg(v->result); -} - -/** - * Process the condition of a conditional assignment - * - * Examines the condition of a conditional assignment to generate the optimal - * first operand of a \c CMP instruction. If the condition is a relational - * operator with 0 (e.g., \c ir_binop_less), the value being compared will be - * used as the source for the \c CMP instruction. Otherwise the comparison - * is processed to a boolean result, and the boolean result is used as the - * operand to the CMP instruction. - */ -bool -glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) -{ - ir_rvalue *src_ir = ir; - bool negate = true; - bool switch_order = false; - - ir_expression *const expr = ir->as_expression(); - - if (native_integers) { - if ((expr != NULL) && (expr->num_operands == 2)) { - enum glsl_base_type type = expr->operands[0]->type->base_type; - if (type == GLSL_TYPE_INT || type == GLSL_TYPE_UINT || - type == GLSL_TYPE_BOOL) { - if (expr->operation == ir_binop_equal) { - if (expr->operands[0]->is_zero()) { - src_ir = expr->operands[1]; - switch_order = true; - } - else if (expr->operands[1]->is_zero()) { - src_ir = expr->operands[0]; - switch_order = true; - } - } - else if (expr->operation == ir_binop_nequal) { - if (expr->operands[0]->is_zero()) { - src_ir = expr->operands[1]; - } - else if (expr->operands[1]->is_zero()) { - src_ir = expr->operands[0]; - } - } - } - } - - src_ir->accept(this); - return switch_order; - } - - if ((expr != NULL) && (expr->num_operands == 2)) { - bool zero_on_left = false; - - if (expr->operands[0]->is_zero()) { - src_ir = expr->operands[1]; - zero_on_left = true; - } else if (expr->operands[1]->is_zero()) { - src_ir = expr->operands[0]; - zero_on_left = false; - } - - /* a is - 0 + - 0 + - * (a < 0) T F F ( a < 0) T F F - * (0 < a) F F T (-a < 0) F F T - * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) - * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) - * - * Note that exchanging the order of 0 and 'a' in the comparison simply - * means that the value of 'a' should be negated. - */ - if (src_ir != ir) { - switch (expr->operation) { - case ir_binop_less: - switch_order = false; - negate = zero_on_left; - break; - - case ir_binop_gequal: - switch_order = true; - negate = zero_on_left; - break; - - default: - /* This isn't the right kind of comparison afterall, so make sure - * the whole condition is visited. - */ - src_ir = ir; - break; - } - } - } - - src_ir->accept(this); - - /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the - * condition we produced is 0.0 or 1.0. By flipping the sign, we can - * choose which value TGSI_OPCODE_CMP produces without an extra instruction - * computing the condition. - */ - if (negate) - this->result.negate = ~this->result.negate; - - return switch_order; -} - -void -glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *type, - st_dst_reg *l, st_src_reg *r, - st_src_reg *cond, bool cond_swap) -{ - if (type->is_struct()) { - for (unsigned int i = 0; i < type->length; i++) { - emit_block_mov(ir, type->fields.structure[i].type, l, r, - cond, cond_swap); - } - return; - } - - if (type->is_array()) { - for (unsigned int i = 0; i < type->length; i++) { - emit_block_mov(ir, type->fields.array, l, r, cond, cond_swap); - } - return; - } - - if (type->is_matrix()) { - const struct glsl_type *vec_type; - - vec_type = glsl_type::get_instance(type->is_double() - ? GLSL_TYPE_DOUBLE : GLSL_TYPE_FLOAT, - type->vector_elements, 1); - - for (int i = 0; i < type->matrix_columns; i++) { - emit_block_mov(ir, vec_type, l, r, cond, cond_swap); - } - return; - } - - assert(type->is_scalar() || type->is_vector()); - - l->type = type->base_type; - r->type = type->base_type; - if (cond) { - st_src_reg l_src = st_src_reg(*l); - - if (l_src.file == PROGRAM_OUTPUT && - this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && - (l_src.index == FRAG_RESULT_DEPTH || - l_src.index == FRAG_RESULT_STENCIL)) { - /* This is a special case because the source swizzles will be shifted - * later to account for the difference between GLSL (where they're - * plain floats) and TGSI (where they're Z and Y components). */ - l_src.swizzle = SWIZZLE_XXXX; - } - - if (native_integers) { - emit_asm(ir, TGSI_OPCODE_UCMP, *l, *cond, - cond_swap ? l_src : *r, - cond_swap ? *r : l_src); - } else { - emit_asm(ir, TGSI_OPCODE_CMP, *l, *cond, - cond_swap ? l_src : *r, - cond_swap ? *r : l_src); - } - } else { - emit_asm(ir, TGSI_OPCODE_MOV, *l, *r); - } - l->index++; - r->index++; - if (type->is_dual_slot()) { - l->index++; - if (r->is_double_vertex_input == false) - r->index++; - } -} - -void -glsl_to_tgsi_visitor::visit(ir_assignment *ir) -{ - int dst_component; - st_dst_reg l; - st_src_reg r; - - /* all generated instructions need to be flaged as precise */ - this->precise = is_precise(ir->lhs->variable_referenced()); - ir->rhs->accept(this); - r = this->result; - - l = get_assignment_lhs(ir->lhs, this, &dst_component); - - { - int swizzles[4]; - int first_enabled_chan = 0; - int rhs_chan = 0; - ir_variable *variable = ir->lhs->variable_referenced(); - - if (shader->Stage == MESA_SHADER_FRAGMENT && - variable->data.mode == ir_var_shader_out && - (variable->data.location == FRAG_RESULT_DEPTH || - variable->data.location == FRAG_RESULT_STENCIL)) { - assert(ir->lhs->type->is_scalar()); - assert(ir->write_mask == WRITEMASK_X); - - if (variable->data.location == FRAG_RESULT_DEPTH) - l.writemask = WRITEMASK_Z; - else { - assert(variable->data.location == FRAG_RESULT_STENCIL); - l.writemask = WRITEMASK_Y; - } - } else if (ir->write_mask == 0) { - assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); - - unsigned num_elements = - ir->lhs->type->without_array()->vector_elements; - - if (num_elements) { - l.writemask = u_bit_consecutive(0, num_elements); - } else { - /* The type is a struct or an array of (array of) structs. */ - l.writemask = WRITEMASK_XYZW; - } - } else { - l.writemask = ir->write_mask; - } - - for (int i = 0; i < 4; i++) { - if (l.writemask & (1 << i)) { - first_enabled_chan = GET_SWZ(r.swizzle, i); - break; - } - } - - l.writemask = l.writemask << dst_component; - - /* Swizzle a small RHS vector into the channels being written. - * - * glsl ir treats write_mask as dictating how many channels are - * present on the RHS while TGSI treats write_mask as just - * showing which channels of the vec4 RHS get written. - */ - for (int i = 0; i < 4; i++) { - if (l.writemask & (1 << i)) - swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); - else - swizzles[i] = first_enabled_chan; - } - r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], - swizzles[2], swizzles[3]); - } - - assert(l.file != PROGRAM_UNDEFINED); - assert(r.file != PROGRAM_UNDEFINED); - - if (ir->rhs->as_expression() && - this->instructions.get_tail() && - ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && - !((glsl_to_tgsi_instruction *)this->instructions.get_tail())->is_64bit_expanded && - type_size(ir->lhs->type) == 1 && - !ir->lhs->type->is_64bit() && - l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst[0].writemask) { - /* To avoid emitting an extra MOV when assigning an expression to a - * variable, emit the last instruction of the expression again, but - * replace the destination register with the target of the assignment. - * Dead code elimination will remove the original instruction. - */ - glsl_to_tgsi_instruction *inst, *new_inst; - inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); - new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2], inst->src[3]); - new_inst->saturate = inst->saturate; - new_inst->resource = inst->resource; - inst->dead_mask = inst->dst[0].writemask; - } else { - emit_block_mov(ir, ir->rhs->type, &l, &r, NULL, false); - } - this->precise = 0; -} - - -void -glsl_to_tgsi_visitor::visit(ir_constant *ir) -{ - st_src_reg src; - GLdouble stack_vals[4] = { 0 }; - gl_constant_value *values = (gl_constant_value *) stack_vals; - GLenum gl_type = GL_NONE; - unsigned int i, elements; - static int in_array = 0; - gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; - - /* Unfortunately, 4 floats is all we can get into - * _mesa_add_typed_unnamed_constant. So, make a temp to store an - * aggregate constant and move each constant value into it. If we - * get lucky, copy propagation will eliminate the extra moves. - */ - if (ir->type->is_struct()) { - st_src_reg temp_base = get_temp(ir->type); - st_dst_reg temp = st_dst_reg(temp_base); - - for (i = 0; i < ir->type->length; i++) { - ir_constant *const field_value = ir->get_record_field(i); - int size = type_size(field_value->type); - - assert(size > 0); - - field_value->accept(this); - src = this->result; - - for (unsigned j = 0; j < (unsigned int)size; j++) { - emit_asm(ir, TGSI_OPCODE_MOV, temp, src); - - src.index++; - temp.index++; - } - } - this->result = temp_base; - return; - } - - if (ir->type->is_array()) { - st_src_reg temp_base = get_temp(ir->type); - st_dst_reg temp = st_dst_reg(temp_base); - int size = type_size(ir->type->fields.array); - - assert(size > 0); - in_array++; - - for (i = 0; i < ir->type->length; i++) { - ir->const_elements[i]->accept(this); - src = this->result; - for (int j = 0; j < size; j++) { - emit_asm(ir, TGSI_OPCODE_MOV, temp, src); - - src.index++; - temp.index++; - } - } - this->result = temp_base; - in_array--; - return; - } - - if (ir->type->is_matrix()) { - st_src_reg mat = get_temp(ir->type); - st_dst_reg mat_column = st_dst_reg(mat); - - for (i = 0; i < ir->type->matrix_columns; i++) { - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - values = (gl_constant_value *) - &ir->value.f[i * ir->type->vector_elements]; - - src = st_src_reg(file, -1, ir->type->base_type); - src.index = add_constant(file, - values, - ir->type->vector_elements, - GL_FLOAT, - &src.swizzle); - emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); - break; - case GLSL_TYPE_DOUBLE: - values = (gl_constant_value *) - &ir->value.d[i * ir->type->vector_elements]; - src = st_src_reg(file, -1, ir->type->base_type); - src.index = add_constant(file, - values, - ir->type->vector_elements, - GL_DOUBLE, - &src.swizzle); - if (ir->type->vector_elements >= 2) { - mat_column.writemask = WRITEMASK_XY; - src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, - SWIZZLE_X, SWIZZLE_Y); - emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); - } else { - mat_column.writemask = WRITEMASK_X; - src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, - SWIZZLE_X, SWIZZLE_X); - emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); - } - src.index++; - if (ir->type->vector_elements > 2) { - if (ir->type->vector_elements == 4) { - mat_column.writemask = WRITEMASK_ZW; - src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, - SWIZZLE_X, SWIZZLE_Y); - emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); - } else { - mat_column.writemask = WRITEMASK_Z; - src.swizzle = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, - SWIZZLE_Y, SWIZZLE_Y); - emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); - mat_column.writemask = WRITEMASK_XYZW; - src.swizzle = SWIZZLE_XYZW; - } - mat_column.index++; - } - break; - default: - unreachable("Illegal matrix constant type.\n"); - break; - } - mat_column.index++; - } - this->result = mat; - return; - } - - elements = ir->type->vector_elements; - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - gl_type = GL_FLOAT; - for (i = 0; i < ir->type->vector_elements; i++) { - values[i].f = ir->value.f[i]; - } - break; - case GLSL_TYPE_DOUBLE: - gl_type = GL_DOUBLE; - for (i = 0; i < ir->type->vector_elements; i++) { - memcpy(&values[i * 2], &ir->value.d[i], sizeof(double)); - } - break; - case GLSL_TYPE_INT64: - gl_type = GL_INT64_ARB; - for (i = 0; i < ir->type->vector_elements; i++) { - memcpy(&values[i * 2], &ir->value.d[i], sizeof(int64_t)); - } - break; - case GLSL_TYPE_UINT64: - gl_type = GL_UNSIGNED_INT64_ARB; - for (i = 0; i < ir->type->vector_elements; i++) { - memcpy(&values[i * 2], &ir->value.d[i], sizeof(uint64_t)); - } - break; - case GLSL_TYPE_UINT: - gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; - for (i = 0; i < ir->type->vector_elements; i++) { - if (native_integers) - values[i].u = ir->value.u[i]; - else - values[i].f = ir->value.u[i]; - } - break; - case GLSL_TYPE_INT: - gl_type = native_integers ? GL_INT : GL_FLOAT; - for (i = 0; i < ir->type->vector_elements; i++) { - if (native_integers) - values[i].i = ir->value.i[i]; - else - values[i].f = ir->value.i[i]; - } - break; - case GLSL_TYPE_BOOL: - gl_type = native_integers ? GL_BOOL : GL_FLOAT; - for (i = 0; i < ir->type->vector_elements; i++) { - values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0; - } - break; - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: - gl_type = GL_UNSIGNED_INT; - elements = 2; - values[0].u = ir->value.u64[0] & 0xffffffff; - values[1].u = ir->value.u64[0] >> 32; - break; - default: - assert(!"Non-float/uint/int/bool/sampler/image constant"); - } - - this->result = st_src_reg(file, -1, ir->type); - this->result.index = add_constant(file, - values, - elements, - gl_type, - &this->result.swizzle); -} - -void -glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) -{ - exec_node *param = ir->actual_parameters.get_head(); - ir_dereference *deref = static_cast(param); - ir_variable *location = deref->variable_referenced(); - bool has_hw_atomics = st_context(ctx)->has_hw_atomics; - /* Calculate the surface offset */ - st_src_reg offset; - unsigned array_size = 0, base = 0; - uint16_t index = 0; - st_src_reg resource; - - get_deref_offsets(deref, &array_size, &base, &index, &offset, false); - - if (has_hw_atomics) { - variable_storage *entry = find_variable_storage(location); - st_src_reg buffer(PROGRAM_HW_ATOMIC, 0, GLSL_TYPE_ATOMIC_UINT, - location->data.binding); - - if (!entry) { - entry = new(mem_ctx) variable_storage(location, PROGRAM_HW_ATOMIC, - num_atomics); - _mesa_hash_table_insert(this->variables, location, entry); - - atomic_info[num_atomics].location = location->data.location; - atomic_info[num_atomics].binding = location->data.binding; - atomic_info[num_atomics].size = location->type->arrays_of_arrays_size(); - if (atomic_info[num_atomics].size == 0) - atomic_info[num_atomics].size = 1; - atomic_info[num_atomics].array_id = 0; - num_atomics++; - } - - if (offset.file != PROGRAM_UNDEFINED) { - if (atomic_info[entry->index].array_id == 0) { - num_atomic_arrays++; - atomic_info[entry->index].array_id = num_atomic_arrays; - } - buffer.array_id = atomic_info[entry->index].array_id; - } - - buffer.index = index; - buffer.index += location->data.offset / ATOMIC_COUNTER_SIZE; - buffer.has_index2 = true; - - if (offset.file != PROGRAM_UNDEFINED) { - buffer.reladdr = ralloc(mem_ctx, st_src_reg); - *buffer.reladdr = offset; - emit_arl(ir, sampler_reladdr, offset); - } - offset = st_src_reg_for_int(0); - - resource = buffer; - } else { - st_src_reg buffer(PROGRAM_BUFFER, - prog->info.num_ssbos + - location->data.binding, - GLSL_TYPE_ATOMIC_UINT); - - if (offset.file != PROGRAM_UNDEFINED) { - emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset), - offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE)); - emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset), - offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE)); - } else { - offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE); - } - resource = buffer; - } - - ir->return_deref->accept(this); - st_dst_reg dst(this->result); - dst.writemask = WRITEMASK_X; - - glsl_to_tgsi_instruction *inst; - - if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_read) { - inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset); - } else if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_increment) { - inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, - st_src_reg_for_int(1)); - } else if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_predecrement) { - inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, - st_src_reg_for_int(-1)); - emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1)); - } else { - param = param->get_next(); - ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); - val->accept(this); - - st_src_reg data = this->result, data2 = undef_src; - enum tgsi_opcode opcode; - switch (ir->callee->intrinsic_id) { - case ir_intrinsic_atomic_counter_add: - opcode = TGSI_OPCODE_ATOMUADD; - break; - case ir_intrinsic_atomic_counter_min: - opcode = TGSI_OPCODE_ATOMIMIN; - break; - case ir_intrinsic_atomic_counter_max: - opcode = TGSI_OPCODE_ATOMIMAX; - break; - case ir_intrinsic_atomic_counter_and: - opcode = TGSI_OPCODE_ATOMAND; - break; - case ir_intrinsic_atomic_counter_or: - opcode = TGSI_OPCODE_ATOMOR; - break; - case ir_intrinsic_atomic_counter_xor: - opcode = TGSI_OPCODE_ATOMXOR; - break; - case ir_intrinsic_atomic_counter_exchange: - opcode = TGSI_OPCODE_ATOMXCHG; - break; - case ir_intrinsic_atomic_counter_comp_swap: { - opcode = TGSI_OPCODE_ATOMCAS; - param = param->get_next(); - val = ((ir_instruction *)param)->as_rvalue(); - val->accept(this); - data2 = this->result; - break; - } - default: - assert(!"Unexpected intrinsic"); - return; - } - - inst = emit_asm(ir, opcode, dst, offset, data, data2); - } - - inst->resource = resource; -} - -void -glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir) -{ - exec_node *param = ir->actual_parameters.get_head(); - - ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); - - param = param->get_next(); - ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); - - ir_constant *const_block = block->as_constant(); - st_src_reg buffer( - PROGRAM_BUFFER, - const_block ? const_block->value.u[0] : 0, - GLSL_TYPE_UINT); - - if (!const_block) { - block->accept(this); - buffer.reladdr = ralloc(mem_ctx, st_src_reg); - *buffer.reladdr = this->result; - emit_arl(ir, sampler_reladdr, this->result); - } - - /* Calculate the surface offset */ - offset->accept(this); - st_src_reg off = this->result; - - st_dst_reg dst = undef_dst; - if (ir->return_deref) { - ir->return_deref->accept(this); - dst = st_dst_reg(this->result); - dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; - } - - glsl_to_tgsi_instruction *inst; - - if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_load) { - inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off); - if (dst.type == GLSL_TYPE_BOOL) - emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), - st_src_reg_for_int(0)); - } else if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_store) { - param = param->get_next(); - ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); - val->accept(this); - - param = param->get_next(); - ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); - assert(write_mask); - dst.writemask = write_mask->value.u[0]; - - dst.type = this->result.type; - inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result); - } else { - param = param->get_next(); - ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); - val->accept(this); - - st_src_reg data = this->result, data2 = undef_src; - enum tgsi_opcode opcode; - switch (ir->callee->intrinsic_id) { - case ir_intrinsic_ssbo_atomic_add: - opcode = TGSI_OPCODE_ATOMUADD; - break; - case ir_intrinsic_ssbo_atomic_min: - opcode = TGSI_OPCODE_ATOMIMIN; - break; - case ir_intrinsic_ssbo_atomic_max: - opcode = TGSI_OPCODE_ATOMIMAX; - break; - case ir_intrinsic_ssbo_atomic_and: - opcode = TGSI_OPCODE_ATOMAND; - break; - case ir_intrinsic_ssbo_atomic_or: - opcode = TGSI_OPCODE_ATOMOR; - break; - case ir_intrinsic_ssbo_atomic_xor: - opcode = TGSI_OPCODE_ATOMXOR; - break; - case ir_intrinsic_ssbo_atomic_exchange: - opcode = TGSI_OPCODE_ATOMXCHG; - break; - case ir_intrinsic_ssbo_atomic_comp_swap: - opcode = TGSI_OPCODE_ATOMCAS; - param = param->get_next(); - val = ((ir_instruction *)param)->as_rvalue(); - val->accept(this); - data2 = this->result; - break; - default: - assert(!"Unexpected intrinsic"); - return; - } - - inst = emit_asm(ir, opcode, dst, off, data, data2); - } - - param = param->get_next(); - ir_constant *access = NULL; - if (!param->is_tail_sentinel()) { - access = ((ir_instruction *)param)->as_constant(); - assert(access); - } - - add_buffer_to_load_and_stores(inst, &buffer, &this->instructions, access); -} - -void -glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir) -{ - switch (ir->callee->intrinsic_id) { - case ir_intrinsic_memory_barrier: - emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, - st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER | - TGSI_MEMBAR_ATOMIC_BUFFER | - TGSI_MEMBAR_SHADER_IMAGE | - TGSI_MEMBAR_SHARED)); - break; - case ir_intrinsic_memory_barrier_atomic_counter: - emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, - st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER)); - break; - case ir_intrinsic_memory_barrier_buffer: - emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, - st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER)); - break; - case ir_intrinsic_memory_barrier_image: - emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, - st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE)); - break; - case ir_intrinsic_memory_barrier_shared: - emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, - st_src_reg_for_int(TGSI_MEMBAR_SHARED)); - break; - case ir_intrinsic_group_memory_barrier: - emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, - st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER | - TGSI_MEMBAR_ATOMIC_BUFFER | - TGSI_MEMBAR_SHADER_IMAGE | - TGSI_MEMBAR_SHARED | - TGSI_MEMBAR_THREAD_GROUP)); - break; - default: - assert(!"Unexpected memory barrier intrinsic"); - } -} - -void -glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir) -{ - exec_node *param = ir->actual_parameters.get_head(); - - ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); - - st_src_reg buffer(PROGRAM_MEMORY, 0, GLSL_TYPE_UINT); - - /* Calculate the surface offset */ - offset->accept(this); - st_src_reg off = this->result; - - st_dst_reg dst = undef_dst; - if (ir->return_deref) { - ir->return_deref->accept(this); - dst = st_dst_reg(this->result); - dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; - } - - glsl_to_tgsi_instruction *inst; - - if (ir->callee->intrinsic_id == ir_intrinsic_shared_load) { - inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off); - inst->resource = buffer; - } else if (ir->callee->intrinsic_id == ir_intrinsic_shared_store) { - param = param->get_next(); - ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); - val->accept(this); - - param = param->get_next(); - ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); - assert(write_mask); - dst.writemask = write_mask->value.u[0]; - - dst.type = this->result.type; - inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result); - inst->resource = buffer; - } else { - param = param->get_next(); - ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); - val->accept(this); - - st_src_reg data = this->result, data2 = undef_src; - enum tgsi_opcode opcode; - switch (ir->callee->intrinsic_id) { - case ir_intrinsic_shared_atomic_add: - opcode = TGSI_OPCODE_ATOMUADD; - break; - case ir_intrinsic_shared_atomic_min: - opcode = TGSI_OPCODE_ATOMIMIN; - break; - case ir_intrinsic_shared_atomic_max: - opcode = TGSI_OPCODE_ATOMIMAX; - break; - case ir_intrinsic_shared_atomic_and: - opcode = TGSI_OPCODE_ATOMAND; - break; - case ir_intrinsic_shared_atomic_or: - opcode = TGSI_OPCODE_ATOMOR; - break; - case ir_intrinsic_shared_atomic_xor: - opcode = TGSI_OPCODE_ATOMXOR; - break; - case ir_intrinsic_shared_atomic_exchange: - opcode = TGSI_OPCODE_ATOMXCHG; - break; - case ir_intrinsic_shared_atomic_comp_swap: - opcode = TGSI_OPCODE_ATOMCAS; - param = param->get_next(); - val = ((ir_instruction *)param)->as_rvalue(); - val->accept(this); - data2 = this->result; - break; - default: - assert(!"Unexpected intrinsic"); - return; - } - - inst = emit_asm(ir, opcode, dst, off, data, data2); - inst->resource = buffer; - } -} - -static void -get_image_qualifiers(ir_dereference *ir, const glsl_type **type, - bool *memory_coherent, bool *memory_volatile, - bool *memory_restrict, bool *memory_read_only, - enum pipe_format *image_format) -{ - - switch (ir->ir_type) { - case ir_type_dereference_record: { - ir_dereference_record *deref_record = ir->as_dereference_record(); - const glsl_type *struct_type = deref_record->record->type; - int fild_idx = deref_record->field_idx; - - *type = struct_type->fields.structure[fild_idx].type->without_array(); - *memory_coherent = - struct_type->fields.structure[fild_idx].memory_coherent; - *memory_volatile = - struct_type->fields.structure[fild_idx].memory_volatile; - *memory_restrict = - struct_type->fields.structure[fild_idx].memory_restrict; - *memory_read_only = - struct_type->fields.structure[fild_idx].memory_read_only; - *image_format = - struct_type->fields.structure[fild_idx].image_format; - break; - } - - case ir_type_dereference_array: { - ir_dereference_array *deref_arr = ir->as_dereference_array(); - get_image_qualifiers((ir_dereference *)deref_arr->array, type, - memory_coherent, memory_volatile, memory_restrict, - memory_read_only, image_format); - break; - } - - case ir_type_dereference_variable: { - ir_variable *var = ir->variable_referenced(); - - *type = var->type->without_array(); - *memory_coherent = var->data.memory_coherent; - *memory_volatile = var->data.memory_volatile; - *memory_restrict = var->data.memory_restrict; - *memory_read_only = var->data.memory_read_only; - *image_format = var->data.image_format; - break; - } - - default: - break; - } -} - -void -glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) -{ - exec_node *param = ir->actual_parameters.get_head(); - - ir_dereference *img = (ir_dereference *)param; - const ir_variable *imgvar = img->variable_referenced(); - unsigned sampler_array_size = 1, sampler_base = 0; - bool memory_coherent = false, memory_volatile = false, - memory_restrict = false, memory_read_only = false; - enum pipe_format image_format = PIPE_FORMAT_NONE; - const glsl_type *type = NULL; - - get_image_qualifiers(img, &type, &memory_coherent, &memory_volatile, - &memory_restrict, &memory_read_only, &image_format); - - st_src_reg reladdr; - st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT); - uint16_t index = 0; - get_deref_offsets(img, &sampler_array_size, &sampler_base, - &index, &reladdr, !imgvar->contains_bindless()); - - image.index = index; - if (reladdr.file != PROGRAM_UNDEFINED) { - image.reladdr = ralloc(mem_ctx, st_src_reg); - *image.reladdr = reladdr; - emit_arl(ir, sampler_reladdr, reladdr); - } - - st_dst_reg dst = undef_dst; - if (ir->return_deref) { - ir->return_deref->accept(this); - dst = st_dst_reg(this->result); - dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; - } - - glsl_to_tgsi_instruction *inst; - - st_src_reg bindless; - if (imgvar->contains_bindless()) { - img->accept(this); - bindless = this->result; - } - - if (ir->callee->intrinsic_id == ir_intrinsic_image_size) { - dst.writemask = WRITEMASK_XYZ; - inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst); - } else if (ir->callee->intrinsic_id == ir_intrinsic_image_samples) { - st_src_reg res = get_temp(glsl_type::ivec4_type); - st_dst_reg dstres = st_dst_reg(res); - dstres.writemask = WRITEMASK_W; - inst = emit_asm(ir, TGSI_OPCODE_RESQ, dstres); - res.swizzle = SWIZZLE_WWWW; - emit_asm(ir, TGSI_OPCODE_MOV, dst, res); - } else { - st_src_reg arg1 = undef_src, arg2 = undef_src; - st_src_reg coord; - st_dst_reg coord_dst; - coord = get_temp(glsl_type::ivec4_type); - coord_dst = st_dst_reg(coord); - coord_dst.writemask = (1 << type->coordinate_components()) - 1; - param = param->get_next(); - ((ir_dereference *)param)->accept(this); - emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); - coord.swizzle = SWIZZLE_XXXX; - switch (type->coordinate_components()) { - case 4: assert(!"unexpected coord count"); - FALLTHROUGH; - case 3: coord.swizzle |= SWIZZLE_Z << 6; - FALLTHROUGH; - case 2: coord.swizzle |= SWIZZLE_Y << 3; - } - - if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { - param = param->get_next(); - ((ir_dereference *)param)->accept(this); - st_src_reg sample = this->result; - sample.swizzle = SWIZZLE_XXXX; - coord_dst.writemask = WRITEMASK_W; - emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample); - coord.swizzle |= SWIZZLE_W << 9; - } - - param = param->get_next(); - if (!param->is_tail_sentinel()) { - ((ir_dereference *)param)->accept(this); - arg1 = this->result; - param = param->get_next(); - } - - if (!param->is_tail_sentinel()) { - ((ir_dereference *)param)->accept(this); - arg2 = this->result; - param = param->get_next(); - } - - assert(param->is_tail_sentinel()); - - enum tgsi_opcode opcode; - switch (ir->callee->intrinsic_id) { - case ir_intrinsic_image_load: - opcode = TGSI_OPCODE_LOAD; - break; - case ir_intrinsic_image_store: - opcode = TGSI_OPCODE_STORE; - break; - case ir_intrinsic_image_atomic_add: - opcode = TGSI_OPCODE_ATOMUADD; - break; - case ir_intrinsic_image_atomic_min: - opcode = TGSI_OPCODE_ATOMIMIN; - break; - case ir_intrinsic_image_atomic_max: - opcode = TGSI_OPCODE_ATOMIMAX; - break; - case ir_intrinsic_image_atomic_and: - opcode = TGSI_OPCODE_ATOMAND; - break; - case ir_intrinsic_image_atomic_or: - opcode = TGSI_OPCODE_ATOMOR; - break; - case ir_intrinsic_image_atomic_xor: - opcode = TGSI_OPCODE_ATOMXOR; - break; - case ir_intrinsic_image_atomic_exchange: - opcode = TGSI_OPCODE_ATOMXCHG; - break; - case ir_intrinsic_image_atomic_comp_swap: - opcode = TGSI_OPCODE_ATOMCAS; - break; - case ir_intrinsic_image_atomic_inc_wrap: { - /* There's a bit of disagreement between GLSL and the hardware. The - * hardware wants to wrap after the given wrap value, while GLSL - * wants to wrap at the value. Subtract 1 to make up the difference. - */ - st_src_reg wrap = get_temp(glsl_type::uint_type); - emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(wrap), - arg1, st_src_reg_for_int(-1)); - arg1 = wrap; - opcode = TGSI_OPCODE_ATOMINC_WRAP; - break; - } - case ir_intrinsic_image_atomic_dec_wrap: - opcode = TGSI_OPCODE_ATOMDEC_WRAP; - break; - default: - assert(!"Unexpected intrinsic"); - return; - } - - inst = emit_asm(ir, opcode, dst, coord, arg1, arg2); - if (opcode == TGSI_OPCODE_STORE) - inst->dst[0].writemask = WRITEMASK_XYZW; - } - - if (imgvar->contains_bindless()) { - inst->resource = bindless; - inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, - SWIZZLE_X, SWIZZLE_Y); - } else { - inst->resource = image; - inst->sampler_array_size = sampler_array_size; - inst->sampler_base = sampler_base; - } - - inst->tex_target = type->sampler_index(); - inst->image_format = image_format; - inst->read_only = memory_read_only; - - if (memory_coherent) - inst->buffer_access |= TGSI_MEMORY_COHERENT; - if (memory_restrict) - inst->buffer_access |= TGSI_MEMORY_RESTRICT; - if (memory_volatile) - inst->buffer_access |= TGSI_MEMORY_VOLATILE; -} - -void -glsl_to_tgsi_visitor::visit_generic_intrinsic(ir_call *ir, enum tgsi_opcode op) -{ - ir->return_deref->accept(this); - st_dst_reg dst = st_dst_reg(this->result); - - dst.writemask = u_bit_consecutive(0, ir->return_deref->var->type->vector_elements); - - st_src_reg src[4] = { undef_src, undef_src, undef_src, undef_src }; - unsigned num_src = 0; - foreach_in_list(ir_rvalue, param, &ir->actual_parameters) { - assert(num_src < ARRAY_SIZE(src)); - - this->result.file = PROGRAM_UNDEFINED; - param->accept(this); - assert(this->result.file != PROGRAM_UNDEFINED); - - src[num_src] = this->result; - num_src++; - } - - emit_asm(ir, op, dst, src[0], src[1], src[2], src[3]); -} - -void -glsl_to_tgsi_visitor::visit(ir_call *ir) -{ - ir_function_signature *sig = ir->callee; - - /* Filter out intrinsics */ - switch (sig->intrinsic_id) { - case ir_intrinsic_atomic_counter_read: - case ir_intrinsic_atomic_counter_increment: - case ir_intrinsic_atomic_counter_predecrement: - case ir_intrinsic_atomic_counter_add: - case ir_intrinsic_atomic_counter_min: - case ir_intrinsic_atomic_counter_max: - case ir_intrinsic_atomic_counter_and: - case ir_intrinsic_atomic_counter_or: - case ir_intrinsic_atomic_counter_xor: - case ir_intrinsic_atomic_counter_exchange: - case ir_intrinsic_atomic_counter_comp_swap: - visit_atomic_counter_intrinsic(ir); - return; - - case ir_intrinsic_ssbo_load: - case ir_intrinsic_ssbo_store: - case ir_intrinsic_ssbo_atomic_add: - case ir_intrinsic_ssbo_atomic_min: - case ir_intrinsic_ssbo_atomic_max: - case ir_intrinsic_ssbo_atomic_and: - case ir_intrinsic_ssbo_atomic_or: - case ir_intrinsic_ssbo_atomic_xor: - case ir_intrinsic_ssbo_atomic_exchange: - case ir_intrinsic_ssbo_atomic_comp_swap: - visit_ssbo_intrinsic(ir); - return; - - case ir_intrinsic_memory_barrier: - case ir_intrinsic_memory_barrier_atomic_counter: - case ir_intrinsic_memory_barrier_buffer: - case ir_intrinsic_memory_barrier_image: - case ir_intrinsic_memory_barrier_shared: - case ir_intrinsic_group_memory_barrier: - visit_membar_intrinsic(ir); - return; - - case ir_intrinsic_shared_load: - case ir_intrinsic_shared_store: - case ir_intrinsic_shared_atomic_add: - case ir_intrinsic_shared_atomic_min: - case ir_intrinsic_shared_atomic_max: - case ir_intrinsic_shared_atomic_and: - case ir_intrinsic_shared_atomic_or: - case ir_intrinsic_shared_atomic_xor: - case ir_intrinsic_shared_atomic_exchange: - case ir_intrinsic_shared_atomic_comp_swap: - visit_shared_intrinsic(ir); - return; - - case ir_intrinsic_image_load: - case ir_intrinsic_image_store: - case ir_intrinsic_image_atomic_add: - case ir_intrinsic_image_atomic_min: - case ir_intrinsic_image_atomic_max: - case ir_intrinsic_image_atomic_and: - case ir_intrinsic_image_atomic_or: - case ir_intrinsic_image_atomic_xor: - case ir_intrinsic_image_atomic_exchange: - case ir_intrinsic_image_atomic_comp_swap: - case ir_intrinsic_image_size: - case ir_intrinsic_image_samples: - case ir_intrinsic_image_atomic_inc_wrap: - case ir_intrinsic_image_atomic_dec_wrap: - visit_image_intrinsic(ir); - return; - - case ir_intrinsic_shader_clock: - visit_generic_intrinsic(ir, TGSI_OPCODE_CLOCK); - return; - - case ir_intrinsic_vote_all: - visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_ALL); - return; - case ir_intrinsic_vote_any: - visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_ANY); - return; - case ir_intrinsic_vote_eq: - visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_EQ); - return; - case ir_intrinsic_ballot: - visit_generic_intrinsic(ir, TGSI_OPCODE_BALLOT); - return; - case ir_intrinsic_read_first_invocation: - visit_generic_intrinsic(ir, TGSI_OPCODE_READ_FIRST); - return; - case ir_intrinsic_read_invocation: - visit_generic_intrinsic(ir, TGSI_OPCODE_READ_INVOC); - return; - - case ir_intrinsic_helper_invocation: - visit_generic_intrinsic(ir, TGSI_OPCODE_READ_HELPER); - return; - - case ir_intrinsic_invalid: - case ir_intrinsic_generic_load: - case ir_intrinsic_generic_store: - case ir_intrinsic_generic_atomic_add: - case ir_intrinsic_generic_atomic_and: - case ir_intrinsic_generic_atomic_or: - case ir_intrinsic_generic_atomic_xor: - case ir_intrinsic_generic_atomic_min: - case ir_intrinsic_generic_atomic_max: - case ir_intrinsic_generic_atomic_exchange: - case ir_intrinsic_generic_atomic_comp_swap: - case ir_intrinsic_begin_invocation_interlock: - case ir_intrinsic_end_invocation_interlock: - case ir_intrinsic_image_sparse_load: - case ir_intrinsic_is_sparse_texels_resident: - unreachable("Invalid intrinsic"); - } -} - -void -glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *tail, - unsigned *array_elements, - uint16_t *index, - st_src_reg *indirect, - unsigned *location) -{ - switch (tail->ir_type) { - case ir_type_dereference_record: { - ir_dereference_record *deref_record = tail->as_dereference_record(); - const glsl_type *struct_type = deref_record->record->type; - int field_index = deref_record->field_idx; - - calc_deref_offsets(deref_record->record->as_dereference(), array_elements, index, indirect, location); - - assert(field_index >= 0); - *location += struct_type->struct_location_offset(field_index); - break; - } - - case ir_type_dereference_array: { - ir_dereference_array *deref_arr = tail->as_dereference_array(); - - void *mem_ctx = ralloc_parent(deref_arr); - ir_constant *array_index = - deref_arr->array_index->constant_expression_value(mem_ctx); - - if (!array_index) { - st_src_reg temp_reg; - st_dst_reg temp_dst; - - temp_reg = get_temp(glsl_type::uint_type); - temp_dst = st_dst_reg(temp_reg); - temp_dst.writemask = 1; - - deref_arr->array_index->accept(this); - if (*array_elements != 1) - emit_asm(NULL, TGSI_OPCODE_MUL, temp_dst, this->result, st_src_reg_for_int(*array_elements)); - else - emit_asm(NULL, TGSI_OPCODE_MOV, temp_dst, this->result); - - if (indirect->file == PROGRAM_UNDEFINED) - *indirect = temp_reg; - else { - temp_dst = st_dst_reg(*indirect); - temp_dst.writemask = 1; - emit_asm(NULL, TGSI_OPCODE_ADD, temp_dst, *indirect, temp_reg); - } - } else - *index += array_index->value.u[0] * *array_elements; - - *array_elements *= deref_arr->array->type->length; - - calc_deref_offsets(deref_arr->array->as_dereference(), array_elements, index, indirect, location); - break; - } - default: - break; - } -} - -void -glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir, - unsigned *array_size, - unsigned *base, - uint16_t *index, - st_src_reg *reladdr, - bool opaque) -{ - GLuint shader = _mesa_program_enum_to_shader_stage(this->prog->Target); - unsigned location = 0; - ir_variable *var = ir->variable_referenced(); - - reladdr->reset(); - - *base = 0; - *array_size = 1; - - assert(var); - location = var->data.location; - calc_deref_offsets(ir, array_size, index, reladdr, &location); - - /* - * If we end up with no indirect then adjust the base to the index, - * and set the array size to 1. - */ - if (reladdr->file == PROGRAM_UNDEFINED) { - *base = *index; - *array_size = 1; - } - - if (opaque) { - assert(location != 0xffffffff); - *base += this->shader_program->data->UniformStorage[location].opaque[shader].index; - *index += this->shader_program->data->UniformStorage[location].opaque[shader].index; - } -} - -st_src_reg -glsl_to_tgsi_visitor::canonicalize_gather_offset(st_src_reg offset) -{ - if (offset.reladdr || offset.reladdr2 || - offset.has_index2 || - offset.file == PROGRAM_UNIFORM || - offset.file == PROGRAM_CONSTANT || - offset.file == PROGRAM_STATE_VAR) { - st_src_reg tmp = get_temp(glsl_type::ivec2_type); - st_dst_reg tmp_dst = st_dst_reg(tmp); - tmp_dst.writemask = WRITEMASK_XY; - emit_asm(NULL, TGSI_OPCODE_MOV, tmp_dst, offset); - return tmp; - } - - return offset; -} - -bool -glsl_to_tgsi_visitor::handle_bound_deref(ir_dereference *ir) -{ - ir_variable *var = ir->variable_referenced(); - - if (!var || var->data.mode != ir_var_uniform || var->data.bindless || - !(ir->type->is_image() || ir->type->is_sampler())) - return false; - - /* Convert from bound sampler/image to bindless handle. */ - bool is_image = ir->type->is_image(); - st_src_reg resource(is_image ? PROGRAM_IMAGE : PROGRAM_SAMPLER, 0, GLSL_TYPE_UINT); - uint16_t index = 0; - unsigned array_size = 1, base = 0; - st_src_reg reladdr; - get_deref_offsets(ir, &array_size, &base, &index, &reladdr, true); - - resource.index = index; - if (reladdr.file != PROGRAM_UNDEFINED) { - resource.reladdr = ralloc(mem_ctx, st_src_reg); - *resource.reladdr = reladdr; - emit_arl(ir, sampler_reladdr, reladdr); - } - - this->result = get_temp(glsl_type::uvec2_type); - st_dst_reg dst(this->result); - dst.writemask = WRITEMASK_XY; - - glsl_to_tgsi_instruction *inst = emit_asm( - ir, is_image ? TGSI_OPCODE_IMG2HND : TGSI_OPCODE_SAMP2HND, dst); - - inst->tex_target = ir->type->sampler_index(); - inst->resource = resource; - inst->sampler_array_size = array_size; - inst->sampler_base = base; - - return true; -} - -void -glsl_to_tgsi_visitor::visit(ir_texture *ir) -{ - st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy; - st_src_reg offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component; - st_src_reg levels_src, reladdr; - st_dst_reg result_dst, coord_dst, cube_sc_dst; - glsl_to_tgsi_instruction *inst = NULL; - enum tgsi_opcode opcode = TGSI_OPCODE_NOP; - const glsl_type *sampler_type = ir->sampler->type; - unsigned sampler_array_size = 1, sampler_base = 0; - bool is_cube_array = false; - ir_variable *var = ir->sampler->variable_referenced(); - unsigned i; - - /* if we are a cube array sampler or a cube shadow */ - if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { - is_cube_array = sampler_type->sampler_array; - } - - if (ir->coordinate) { - ir->coordinate->accept(this); - - /* Put our coords in a temp. We'll need to modify them for shadow, - * projection, or LOD, so the only case we'd use it as-is is if - * we're doing plain old texturing. The optimization passes on - * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. - */ - coord = get_temp(glsl_type::vec4_type); - coord_dst = st_dst_reg(coord); - coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1; - emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); - } - - if (ir->projector) { - ir->projector->accept(this); - projector = this->result; - } - - /* Storage for our result. Ideally for an assignment we'd be using - * the actual storage for the result here, instead. - */ - result_src = get_temp(ir->type); - result_dst = st_dst_reg(result_src); - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - - switch (ir->op) { - case ir_tex: - opcode = (is_cube_array && ir->shadow_comparator) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX; - if (ir->offset) { - ir->offset->accept(this); - offset[0] = this->result; - } - break; - case ir_txb: - if (is_cube_array || - (sampler_type->sampler_shadow && sampler_type->coordinate_components() >= 3)) { - opcode = TGSI_OPCODE_TXB2; - } - else { - opcode = TGSI_OPCODE_TXB; - } - ir->lod_info.bias->accept(this); - lod_info = this->result; - if (ir->offset) { - ir->offset->accept(this); - offset[0] = this->result; - } - break; - case ir_txl: - if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) { - opcode = TGSI_OPCODE_TEX_LZ; - } else { - opcode = (is_cube_array || (sampler_type->sampler_shadow && sampler_type->coordinate_components() >= 3)) ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL; - ir->lod_info.lod->accept(this); - lod_info = this->result; - } - if (ir->offset) { - ir->offset->accept(this); - offset[0] = this->result; - } - break; - case ir_txd: - opcode = TGSI_OPCODE_TXD; - ir->lod_info.grad.dPdx->accept(this); - dx = this->result; - ir->lod_info.grad.dPdy->accept(this); - dy = this->result; - if (ir->offset) { - ir->offset->accept(this); - offset[0] = this->result; - } - break; - case ir_txs: - opcode = TGSI_OPCODE_TXQ; - ir->lod_info.lod->accept(this); - lod_info = this->result; - break; - case ir_query_levels: - opcode = TGSI_OPCODE_TXQ; - lod_info = undef_src; - levels_src = get_temp(ir->type); - break; - case ir_txf: - if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) { - opcode = TGSI_OPCODE_TXF_LZ; - } else { - opcode = TGSI_OPCODE_TXF; - ir->lod_info.lod->accept(this); - lod_info = this->result; - } - if (ir->offset) { - ir->offset->accept(this); - offset[0] = this->result; - } - break; - case ir_txf_ms: - opcode = TGSI_OPCODE_TXF; - ir->lod_info.sample_index->accept(this); - sample_index = this->result; - break; - case ir_tg4: - opcode = TGSI_OPCODE_TG4; - ir->lod_info.component->accept(this); - component = this->result; - if (ir->offset) { - ir->offset->accept(this); - if (ir->offset->type->is_array()) { - const glsl_type *elt_type = ir->offset->type->fields.array; - for (i = 0; i < ir->offset->type->length; i++) { - offset[i] = this->result; - offset[i].index += i * type_size(elt_type); - offset[i].type = elt_type->base_type; - offset[i].swizzle = swizzle_for_size(elt_type->vector_elements); - offset[i] = canonicalize_gather_offset(offset[i]); - } - } else { - offset[0] = canonicalize_gather_offset(this->result); - } - } - break; - case ir_lod: - opcode = TGSI_OPCODE_LODQ; - break; - case ir_texture_samples: - opcode = TGSI_OPCODE_TXQS; - break; - case ir_samples_identical: - unreachable("Unexpected ir_samples_identical opcode"); - } - - if (ir->projector) { - if (opcode == TGSI_OPCODE_TEX) { - /* Slot the projector in as the last component of the coord. */ - coord_dst.writemask = WRITEMASK_W; - emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, projector); - coord_dst.writemask = WRITEMASK_XYZW; - opcode = TGSI_OPCODE_TXP; - } else { - st_src_reg coord_w = coord; - coord_w.swizzle = SWIZZLE_WWWW; - - /* For the other TEX opcodes there's no projective version - * since the last slot is taken up by LOD info. Do the - * projective divide now. - */ - coord_dst.writemask = WRITEMASK_W; - emit_asm(ir, TGSI_OPCODE_RCP, coord_dst, projector); - - /* In the case where we have to project the coordinates "by hand," - * the shadow comparator value must also be projected. - */ - st_src_reg tmp_src = coord; - if (ir->shadow_comparator) { - /* Slot the shadow value in as the second to last component of the - * coord. - */ - ir->shadow_comparator->accept(this); - - tmp_src = get_temp(glsl_type::vec4_type); - st_dst_reg tmp_dst = st_dst_reg(tmp_src); - - /* Projective division not allowed for array samplers. */ - assert(!sampler_type->sampler_array); - - tmp_dst.writemask = WRITEMASK_Z; - emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); - - tmp_dst.writemask = WRITEMASK_XY; - emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, coord); - } - - coord_dst.writemask = WRITEMASK_XYZ; - emit_asm(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); - - coord_dst.writemask = WRITEMASK_XYZW; - coord.swizzle = SWIZZLE_XYZW; - } - } - - /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the - * shadow comparator was put in the correct place (and projected) by the - * code, above, that handles by-hand projection. - */ - if (ir->shadow_comparator && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { - /* Slot the shadow value in as the second to last component of the - * coord. - */ - ir->shadow_comparator->accept(this); - - if (is_cube_array) { - if (lod_info.file != PROGRAM_UNDEFINED) { - // If we have both a cube array *and* a bias/lod, stick the - // comparator into the .Y of the second argument. - st_src_reg tmp = get_temp(glsl_type::vec2_type); - cube_sc_dst = st_dst_reg(tmp); - cube_sc_dst.writemask = WRITEMASK_X; - emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, lod_info); - lod_info = tmp; - cube_sc_dst.writemask = WRITEMASK_Y; - } else { - cube_sc = get_temp(glsl_type::float_type); - cube_sc_dst = st_dst_reg(cube_sc); - cube_sc_dst.writemask = WRITEMASK_X; - } - emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result); - } - else { - if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && - sampler_type->sampler_array) || - sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { - coord_dst.writemask = WRITEMASK_W; - } else { - coord_dst.writemask = WRITEMASK_Z; - } - emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); - coord_dst.writemask = WRITEMASK_XYZW; - } - } - - if (ir->op == ir_txf_ms) { - coord_dst.writemask = WRITEMASK_W; - emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample_index); - coord_dst.writemask = WRITEMASK_XYZW; - } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || - opcode == TGSI_OPCODE_TXF) { - /* TGSI stores LOD or LOD bias in the last channel of the coords. */ - coord_dst.writemask = WRITEMASK_W; - emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); - coord_dst.writemask = WRITEMASK_XYZW; - } - - st_src_reg sampler(PROGRAM_SAMPLER, 0, GLSL_TYPE_UINT); - - uint16_t index = 0; - get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base, - &index, &reladdr, !var->contains_bindless()); - - sampler.index = index; - if (reladdr.file != PROGRAM_UNDEFINED) { - sampler.reladdr = ralloc(mem_ctx, st_src_reg); - *sampler.reladdr = reladdr; - emit_arl(ir, sampler_reladdr, reladdr); - } - - st_src_reg bindless; - if (var->contains_bindless()) { - ir->sampler->accept(this); - bindless = this->result; - } - - if (opcode == TGSI_OPCODE_TXD) - inst = emit_asm(ir, opcode, result_dst, coord, dx, dy); - else if (opcode == TGSI_OPCODE_TXQ) { - if (ir->op == ir_query_levels) { - /* the level is stored in W */ - inst = emit_asm(ir, opcode, st_dst_reg(levels_src), lod_info); - result_dst.writemask = WRITEMASK_X; - levels_src.swizzle = SWIZZLE_WWWW; - emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src); - } else - inst = emit_asm(ir, opcode, result_dst, lod_info); - } else if (opcode == TGSI_OPCODE_TXQS) { - inst = emit_asm(ir, opcode, result_dst); - } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) { - inst = emit_asm(ir, opcode, result_dst, coord, lod_info); - } else if (opcode == TGSI_OPCODE_TEX2) { - inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); - } else if (opcode == TGSI_OPCODE_TG4) { - if (is_cube_array && ir->shadow_comparator) { - inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); - } else { - if (this->tg4_component_in_swizzle) { - inst = emit_asm(ir, opcode, result_dst, coord); - int idx = 0; - foreach_in_list(immediate_storage, entry, &this->immediates) { - if (component.index == idx) { - gl_constant_value value = entry->values[component.swizzle]; - inst->gather_component = value.i; - break; - } - idx++; - } - } else { - inst = emit_asm(ir, opcode, result_dst, coord, component); - } - } - } else - inst = emit_asm(ir, opcode, result_dst, coord); - - if (ir->shadow_comparator) - inst->tex_shadow = GL_TRUE; - - if (var->contains_bindless()) { - inst->resource = bindless; - inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, - SWIZZLE_X, SWIZZLE_Y); - } else { - inst->resource = sampler; - inst->sampler_array_size = sampler_array_size; - inst->sampler_base = sampler_base; - } - - if (ir->offset) { - if (!inst->tex_offsets) - inst->tex_offsets = rzalloc_array(inst, st_src_reg, - MAX_GLSL_TEXTURE_OFFSET); - - for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && - offset[i].file != PROGRAM_UNDEFINED; i++) - inst->tex_offsets[i] = offset[i]; - inst->tex_offset_num_offset = i; - } - - inst->tex_target = sampler_type->sampler_index(); - inst->tex_type = ir->type->base_type; - - this->result = result_src; -} - -void -glsl_to_tgsi_visitor::visit(ir_return *ir) -{ - assert(!ir->get_value()); - - emit_asm(ir, TGSI_OPCODE_RET); -} - -void -glsl_to_tgsi_visitor::visit(ir_discard *ir) -{ - if (ir->condition) { - ir->condition->accept(this); - st_src_reg condition = this->result; - - /* Convert the bool condition to a float so we can negate. */ - if (native_integers) { - st_src_reg temp = get_temp(ir->condition->type); - emit_asm(ir, TGSI_OPCODE_AND, st_dst_reg(temp), - condition, st_src_reg_for_float(1.0)); - condition = temp; - } - - condition.negate = ~condition.negate; - emit_asm(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition); - } else { - /* unconditional kil */ - emit_asm(ir, TGSI_OPCODE_KILL); - } -} - -void -glsl_to_tgsi_visitor::visit(ir_demote *ir) -{ - emit_asm(ir, TGSI_OPCODE_DEMOTE); -} - -void -glsl_to_tgsi_visitor::visit(ir_if *ir) -{ - enum tgsi_opcode if_opcode; - glsl_to_tgsi_instruction *if_inst; - - ir->condition->accept(this); - assert(this->result.file != PROGRAM_UNDEFINED); - - if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF; - - if_inst = emit_asm(ir->condition, if_opcode, undef_dst, this->result); - - this->instructions.push_tail(if_inst); - - visit_exec_list(&ir->then_instructions, this); - - if (!ir->else_instructions.is_empty()) { - emit_asm(ir->condition, TGSI_OPCODE_ELSE); - visit_exec_list(&ir->else_instructions, this); - } - - if_inst = emit_asm(ir->condition, TGSI_OPCODE_ENDIF); -} - - -void -glsl_to_tgsi_visitor::visit(ir_emit_vertex *ir) -{ - assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV); - - ir->stream->accept(this); - emit_asm(ir, TGSI_OPCODE_EMIT, undef_dst, this->result); -} - -void -glsl_to_tgsi_visitor::visit(ir_end_primitive *ir) -{ - assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV); - - ir->stream->accept(this); - emit_asm(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result); -} - -void -glsl_to_tgsi_visitor::visit(ir_barrier *ir) -{ - assert(this->prog->Target == GL_TESS_CONTROL_PROGRAM_NV || - this->prog->Target == GL_COMPUTE_PROGRAM_NV); - - emit_asm(ir, TGSI_OPCODE_BARRIER); -} - -glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() -{ - STATIC_ASSERT(sizeof(samplers_used) * 8 >= PIPE_MAX_SAMPLERS); - - result.file = PROGRAM_UNDEFINED; - next_temp = 1; - array_sizes = NULL; - max_num_arrays = 0; - next_array = 0; - num_inputs = 0; - num_outputs = 0; - num_input_arrays = 0; - num_output_arrays = 0; - num_atomics = 0; - num_atomic_arrays = 0; - num_immediates = 0; - num_address_regs = 0; - samplers_used = 0; - images_used = 0; - indirect_addr_consts = false; - wpos_transform_const = -1; - native_integers = false; - mem_ctx = ralloc_context(NULL); - ctx = NULL; - prog = NULL; - precise = 0; - tg4_component_in_swizzle = false; - shader_program = NULL; - shader = NULL; - options = NULL; - have_sqrt = false; - have_fma = false; - use_shared_memory = false; - has_tex_txf_lz = false; - variables = NULL; -} - -static void var_destroy(struct hash_entry *entry) -{ - variable_storage *storage = (variable_storage *)entry->data; - - delete storage; -} - -glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() -{ - _mesa_hash_table_destroy(variables, var_destroy); - free(array_sizes); - ralloc_free(mem_ctx); -} - -extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) -{ - delete v; -} - - -/** - * Count resources used by the given gpu program (number of texture - * samplers, etc). - */ -static void -count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) -{ - v->samplers_used = 0; - v->images_used = 0; - BITSET_ZERO(prog->info.textures_used_by_txf); - - foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) { - if (inst->info->is_tex) { - for (int i = 0; i < inst->sampler_array_size; i++) { - unsigned idx = inst->sampler_base + i; - v->samplers_used |= 1u << idx; - - debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types)); - v->sampler_types[idx] = inst->tex_type; - v->sampler_targets[idx] = - st_translate_texture_target(inst->tex_target, inst->tex_shadow); - - if (inst->op == TGSI_OPCODE_TXF || inst->op == TGSI_OPCODE_TXF_LZ) { - BITSET_SET(prog->info.textures_used_by_txf, idx); - } - } - } - - if (inst->tex_target == TEXTURE_EXTERNAL_INDEX) - prog->ExternalSamplersUsed |= 1 << inst->resource.index; - - if (inst->resource.file != PROGRAM_UNDEFINED && ( - is_resource_instruction(inst->op) || - inst->op == TGSI_OPCODE_STORE)) { - if (inst->resource.file == PROGRAM_MEMORY) { - v->use_shared_memory = true; - } else if (inst->resource.file == PROGRAM_IMAGE) { - for (int i = 0; i < inst->sampler_array_size; i++) { - unsigned idx = inst->sampler_base + i; - v->images_used |= 1 << idx; - v->image_targets[idx] = - st_translate_texture_target(inst->tex_target, false); - v->image_formats[idx] = inst->image_format; - v->image_wr[idx] = !inst->read_only; - } - } - } - } - prog->SamplersUsed = v->samplers_used; - - if (v->shader_program != NULL) - _mesa_update_shader_textures_used(v->shader_program, prog); -} - -/** - * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which - * are read from the given src in this instruction - */ -static int -get_src_arg_mask(st_dst_reg dst, st_src_reg src) -{ - int read_mask = 0, comp; - - /* Now, given the src swizzle and the written channels, find which - * components are actually read - */ - for (comp = 0; comp < 4; ++comp) { - const unsigned coord = GET_SWZ(src.swizzle, comp); - assert(coord < 4); - if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) - read_mask |= 1 << coord; - } - - return read_mask; -} - -/** - * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP - * instruction is the first instruction to write to register T0. There are - * several lowering passes done in GLSL IR (e.g. branches and - * relative addressing) that create a large number of conditional assignments - * that glsl_to_tgsi converts to CMP instructions like the one mentioned above. - * - * Here is why this conversion is safe: - * CMP T0, T1 T2 T0 can be expanded to: - * if (T1 < 0.0) - * MOV T0, T2; - * else - * MOV T0, T0; - * - * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same - * as the original program. If (T1 < 0.0) evaluates to false, executing - * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. - * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 - * because any instruction that was going to read from T0 after this was going - * to read a garbage value anyway. - */ -void -glsl_to_tgsi_visitor::simplify_cmp(void) -{ - int tempWritesSize = 0; - unsigned *tempWrites = NULL; - unsigned outputWrites[VARYING_SLOT_TESS_MAX]; - - memset(outputWrites, 0, sizeof(outputWrites)); - - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { - unsigned prevWriteMask = 0; - - /* Give up if we encounter relative addressing or flow control. */ - if (inst->dst[0].reladdr || inst->dst[0].reladdr2 || - inst->dst[1].reladdr || inst->dst[1].reladdr2 || - inst->info->is_branch || - inst->op == TGSI_OPCODE_CONT || - inst->op == TGSI_OPCODE_END || - inst->op == TGSI_OPCODE_RET) { - break; - } - - if (inst->dst[0].file == PROGRAM_OUTPUT) { - assert(inst->dst[0].index < (signed)ARRAY_SIZE(outputWrites)); - prevWriteMask = outputWrites[inst->dst[0].index]; - outputWrites[inst->dst[0].index] |= inst->dst[0].writemask; - } else if (inst->dst[0].file == PROGRAM_TEMPORARY) { - if (inst->dst[0].index >= tempWritesSize) { - const int inc = 4096; - - tempWrites = (unsigned*) - realloc(tempWrites, - (tempWritesSize + inc) * sizeof(unsigned)); - if (!tempWrites) - return; - - memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned)); - tempWritesSize += inc; - } - - prevWriteMask = tempWrites[inst->dst[0].index]; - tempWrites[inst->dst[0].index] |= inst->dst[0].writemask; - } else - continue; - - /* For a CMP to be considered a conditional write, the destination - * register and source register two must be the same. */ - if (inst->op == TGSI_OPCODE_CMP - && !(inst->dst[0].writemask & prevWriteMask) - && inst->src[2].file == inst->dst[0].file - && inst->src[2].index == inst->dst[0].index - && inst->dst[0].writemask == - get_src_arg_mask(inst->dst[0], inst->src[2])) { - - inst->op = TGSI_OPCODE_MOV; - inst->info = tgsi_get_opcode_info(inst->op); - inst->src[0] = inst->src[1]; - } - } - - free(tempWrites); -} - -static void -rename_temp_handle_src(struct rename_reg_pair *renames, st_src_reg *src) -{ - if (src && src->file == PROGRAM_TEMPORARY) { - int old_idx = src->index; - if (renames[old_idx].valid) - src->index = renames[old_idx].new_reg; - } -} - -/* Replaces all references to a temporary register index with another index. */ -void -glsl_to_tgsi_visitor::rename_temp_registers(struct rename_reg_pair *renames) -{ - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { - unsigned j; - for (j = 0; j < num_inst_src_regs(inst); j++) { - rename_temp_handle_src(renames, &inst->src[j]); - rename_temp_handle_src(renames, inst->src[j].reladdr); - rename_temp_handle_src(renames, inst->src[j].reladdr2); - } - - for (j = 0; j < inst->tex_offset_num_offset; j++) { - rename_temp_handle_src(renames, &inst->tex_offsets[j]); - rename_temp_handle_src(renames, inst->tex_offsets[j].reladdr); - rename_temp_handle_src(renames, inst->tex_offsets[j].reladdr2); - } - - rename_temp_handle_src(renames, &inst->resource); - rename_temp_handle_src(renames, inst->resource.reladdr); - rename_temp_handle_src(renames, inst->resource.reladdr2); - - for (j = 0; j < num_inst_dst_regs(inst); j++) { - if (inst->dst[j].file == PROGRAM_TEMPORARY) { - int old_idx = inst->dst[j].index; - if (renames[old_idx].valid) - inst->dst[j].index = renames[old_idx].new_reg; - } - rename_temp_handle_src(renames, inst->dst[j].reladdr); - rename_temp_handle_src(renames, inst->dst[j].reladdr2); - } - } -} - -void -glsl_to_tgsi_visitor::get_first_temp_write(int *first_writes) -{ - int depth = 0; /* loop depth */ - int loop_start = -1; /* index of the first active BGNLOOP (if any) */ - unsigned i = 0, j; - - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { - for (j = 0; j < num_inst_dst_regs(inst); j++) { - if (inst->dst[j].file == PROGRAM_TEMPORARY) { - if (first_writes[inst->dst[j].index] == -1) - first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start; - } - } - - if (inst->op == TGSI_OPCODE_BGNLOOP) { - if (depth++ == 0) - loop_start = i; - } else if (inst->op == TGSI_OPCODE_ENDLOOP) { - if (--depth == 0) - loop_start = -1; - } - assert(depth >= 0); - i++; - } -} - -void -glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads) -{ - int depth = 0; /* loop depth */ - int loop_start = -1; /* index of the first active BGNLOOP (if any) */ - unsigned i = 0, j; - - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { - for (j = 0; j < num_inst_src_regs(inst); j++) { - if (inst->src[j].file == PROGRAM_TEMPORARY) { - if (first_reads[inst->src[j].index] == -1) - first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start; - } - } - for (j = 0; j < inst->tex_offset_num_offset; j++) { - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) { - if (first_reads[inst->tex_offsets[j].index] == -1) - first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start; - } - } - if (inst->op == TGSI_OPCODE_BGNLOOP) { - if (depth++ == 0) - loop_start = i; - } else if (inst->op == TGSI_OPCODE_ENDLOOP) { - if (--depth == 0) - loop_start = -1; - } - assert(depth >= 0); - i++; - } -} - -void -glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes) -{ - int depth = 0; /* loop depth */ - int loop_start = -1; /* index of the first active BGNLOOP (if any) */ - unsigned i = 0, j; - int k; - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { - for (j = 0; j < num_inst_src_regs(inst); j++) { - if (inst->src[j].file == PROGRAM_TEMPORARY) - last_reads[inst->src[j].index] = (depth == 0) ? i : -2; - } - for (j = 0; j < num_inst_dst_regs(inst); j++) { - if (inst->dst[j].file == PROGRAM_TEMPORARY) { - if (first_writes[inst->dst[j].index] == -1) - first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start; - last_reads[inst->dst[j].index] = (depth == 0) ? i : -2; - } - } - for (j = 0; j < inst->tex_offset_num_offset; j++) { - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) - last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2; - } - if (inst->op == TGSI_OPCODE_BGNLOOP) { - if (depth++ == 0) - loop_start = i; - } else if (inst->op == TGSI_OPCODE_ENDLOOP) { - if (--depth == 0) { - loop_start = -1; - for (k = 0; k < this->next_temp; k++) { - if (last_reads[k] == -2) { - last_reads[k] = i; - } - } - } - } - assert(depth >= 0); - i++; - } -} - -void -glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes) -{ - int depth = 0; /* loop depth */ - int i = 0, k; - unsigned j; - - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { - for (j = 0; j < num_inst_dst_regs(inst); j++) { - if (inst->dst[j].file == PROGRAM_TEMPORARY) - last_writes[inst->dst[j].index] = (depth == 0) ? i : -2; - } - - if (inst->op == TGSI_OPCODE_BGNLOOP) - depth++; - else if (inst->op == TGSI_OPCODE_ENDLOOP) - if (--depth == 0) { - for (k = 0; k < this->next_temp; k++) { - if (last_writes[k] == -2) { - last_writes[k] = i; - } - } - } - assert(depth >= 0); - i++; - } -} - -/* - * On a basic block basis, tracks available PROGRAM_TEMPORARY register - * channels for copy propagation and updates following instructions to - * use the original versions. - * - * The glsl_to_tgsi_visitor lazily produces code assuming that this pass - * will occur. As an example, a TXP production before this pass: - * - * 0: MOV TEMP[1], INPUT[4].xyyy; - * 1: MOV TEMP[1].w, INPUT[4].wwww; - * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; - * - * and after: - * - * 0: MOV TEMP[1], INPUT[4].xyyy; - * 1: MOV TEMP[1].w, INPUT[4].wwww; - * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; - * - * which allows for dead code elimination on TEMP[1]'s writes. - */ -void -glsl_to_tgsi_visitor::copy_propagate(void) -{ - glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, - glsl_to_tgsi_instruction *, - this->next_temp * 4); - int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); - int level = 0; - - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { - assert(inst->dst[0].file != PROGRAM_TEMPORARY - || inst->dst[0].index < this->next_temp); - - /* First, do any copy propagation possible into the src regs. */ - for (int r = 0; r < 3; r++) { - glsl_to_tgsi_instruction *first = NULL; - bool good = true; - int acp_base = inst->src[r].index * 4; - - if (inst->src[r].file != PROGRAM_TEMPORARY || - inst->src[r].reladdr || - inst->src[r].reladdr2) - continue; - - /* See if we can find entries in the ACP consisting of MOVs - * from the same src register for all the swizzled channels - * of this src register reference. - */ - for (int i = 0; i < 4; i++) { - int src_chan = GET_SWZ(inst->src[r].swizzle, i); - glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; - - if (!copy_chan) { - good = false; - break; - } - - assert(acp_level[acp_base + src_chan] <= level); - - if (!first) { - first = copy_chan; - } else { - if (first->src[0].file != copy_chan->src[0].file || - first->src[0].index != copy_chan->src[0].index || - first->src[0].double_reg2 != copy_chan->src[0].double_reg2 || - first->src[0].index2D != copy_chan->src[0].index2D) { - good = false; - break; - } - } - } - - if (good) { - /* We've now validated that we can copy-propagate to - * replace this src register reference. Do it. - */ - inst->src[r].file = first->src[0].file; - inst->src[r].index = first->src[0].index; - inst->src[r].index2D = first->src[0].index2D; - inst->src[r].has_index2 = first->src[0].has_index2; - inst->src[r].double_reg2 = first->src[0].double_reg2; - inst->src[r].array_id = first->src[0].array_id; - - int swizzle = 0; - for (int i = 0; i < 4; i++) { - int src_chan = GET_SWZ(inst->src[r].swizzle, i); - glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; - swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << (3 * i)); - } - inst->src[r].swizzle = swizzle; - } - } - - switch (inst->op) { - case TGSI_OPCODE_BGNLOOP: - case TGSI_OPCODE_ENDLOOP: - /* End of a basic block, clear the ACP entirely. */ - memset(acp, 0, sizeof(*acp) * this->next_temp * 4); - break; - - case TGSI_OPCODE_IF: - case TGSI_OPCODE_UIF: - ++level; - break; - - case TGSI_OPCODE_ENDIF: - case TGSI_OPCODE_ELSE: - /* Clear all channels written inside the block from the ACP, but - * leaving those that were not touched. - */ - for (int r = 0; r < this->next_temp; r++) { - for (int c = 0; c < 4; c++) { - if (!acp[4 * r + c]) - continue; - - if (acp_level[4 * r + c] >= level) - acp[4 * r + c] = NULL; - } - } - if (inst->op == TGSI_OPCODE_ENDIF) - --level; - break; - - default: - /* Continuing the block, clear any written channels from - * the ACP. - */ - for (int d = 0; d < 2; d++) { - if (inst->dst[d].file == PROGRAM_TEMPORARY && inst->dst[d].reladdr) { - /* Any temporary might be written, so no copy propagation - * across this instruction. - */ - memset(acp, 0, sizeof(*acp) * this->next_temp * 4); - } else if (inst->dst[d].file == PROGRAM_OUTPUT && - inst->dst[d].reladdr) { - /* Any output might be written, so no copy propagation - * from outputs across this instruction. - */ - for (int r = 0; r < this->next_temp; r++) { - for (int c = 0; c < 4; c++) { - if (!acp[4 * r + c]) - continue; - - if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) - acp[4 * r + c] = NULL; - } - } - } else if (inst->dst[d].file == PROGRAM_TEMPORARY || - inst->dst[d].file == PROGRAM_OUTPUT) { - /* Clear where it's used as dst. */ - if (inst->dst[d].file == PROGRAM_TEMPORARY) { - for (int c = 0; c < 4; c++) { - if (inst->dst[d].writemask & (1 << c)) - acp[4 * inst->dst[d].index + c] = NULL; - } - } - - /* Clear where it's used as src. */ - for (int r = 0; r < this->next_temp; r++) { - for (int c = 0; c < 4; c++) { - if (!acp[4 * r + c]) - continue; - - int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); - - if (acp[4 * r + c]->src[0].file == inst->dst[d].file && - acp[4 * r + c]->src[0].index == inst->dst[d].index && - inst->dst[d].writemask & (1 << src_chan)) { - acp[4 * r + c] = NULL; - } - } - } - } - } - break; - } - - /* If this is a copy, add it to the ACP. */ - if (inst->op == TGSI_OPCODE_MOV && - inst->dst[0].file == PROGRAM_TEMPORARY && - !(inst->dst[0].file == inst->src[0].file && - inst->dst[0].index == inst->src[0].index) && - !inst->dst[0].reladdr && - !inst->dst[0].reladdr2 && - !inst->saturate && - inst->src[0].file != PROGRAM_ARRAY && - (inst->src[0].file != PROGRAM_OUTPUT || - this->shader->Stage != MESA_SHADER_TESS_CTRL) && - !inst->src[0].reladdr && - !inst->src[0].reladdr2 && - !inst->src[0].negate && - !inst->src[0].abs) { - for (int i = 0; i < 4; i++) { - if (inst->dst[0].writemask & (1 << i)) { - acp[4 * inst->dst[0].index + i] = inst; - acp_level[4 * inst->dst[0].index + i] = level; - } - } - } - } - - ralloc_free(acp_level); - ralloc_free(acp); -} - -static void -dead_code_handle_reladdr(glsl_to_tgsi_instruction **writes, st_src_reg *reladdr) -{ - if (reladdr && reladdr->file == PROGRAM_TEMPORARY) { - /* Clear where it's used as src. */ - int swz = GET_SWZ(reladdr->swizzle, 0); - writes[4 * reladdr->index + swz] = NULL; - } -} - -/* - * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead - * code elimination. - * - * The glsl_to_tgsi_visitor lazily produces code assuming that this pass - * will occur. As an example, a TXP production after copy propagation but - * before this pass: - * - * 0: MOV TEMP[1], INPUT[4].xyyy; - * 1: MOV TEMP[1].w, INPUT[4].wwww; - * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; - * - * and after this pass: - * - * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; - */ -int -glsl_to_tgsi_visitor::eliminate_dead_code(void) -{ - glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, - glsl_to_tgsi_instruction *, - this->next_temp * 4); - int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); - int level = 0; - int removed = 0; - - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { - assert(inst->dst[0].file != PROGRAM_TEMPORARY - || inst->dst[0].index < this->next_temp); - - switch (inst->op) { - case TGSI_OPCODE_BGNLOOP: - case TGSI_OPCODE_ENDLOOP: - case TGSI_OPCODE_CONT: - case TGSI_OPCODE_BRK: - /* End of a basic block, clear the write array entirely. - * - * This keeps us from killing dead code when the writes are - * on either side of a loop, even when the register isn't touched - * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit - * dead code of this type, so it shouldn't make a difference as long as - * the dead code elimination pass in the GLSL compiler does its job. - */ - memset(writes, 0, sizeof(*writes) * this->next_temp * 4); - break; - - case TGSI_OPCODE_ENDIF: - case TGSI_OPCODE_ELSE: - /* Promote the recorded level of all channels written inside the - * preceding if or else block to the level above the if/else block. - */ - for (int r = 0; r < this->next_temp; r++) { - for (int c = 0; c < 4; c++) { - if (!writes[4 * r + c]) - continue; - - if (write_level[4 * r + c] == level) - write_level[4 * r + c] = level-1; - } - } - if (inst->op == TGSI_OPCODE_ENDIF) - --level; - break; - - case TGSI_OPCODE_IF: - case TGSI_OPCODE_UIF: - ++level; - FALLTHROUGH; /* to mark the condition as read */ - default: - /* Continuing the block, clear any channels from the write array that - * are read by this instruction. - */ - for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) { - if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ - /* Any temporary might be read, so no dead code elimination - * across this instruction. - */ - memset(writes, 0, sizeof(*writes) * this->next_temp * 4); - } else if (inst->src[i].file == PROGRAM_TEMPORARY) { - /* Clear where it's used as src. */ - int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); - src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); - src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); - src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); - - for (int c = 0; c < 4; c++) { - if (src_chans & (1 << c)) - writes[4 * inst->src[i].index + c] = NULL; - } - } - dead_code_handle_reladdr(writes, inst->src[i].reladdr); - dead_code_handle_reladdr(writes, inst->src[i].reladdr2); - } - for (unsigned i = 0; i < inst->tex_offset_num_offset; i++) { - if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY && inst->tex_offsets[i].reladdr){ - /* Any temporary might be read, so no dead code elimination - * across this instruction. - */ - memset(writes, 0, sizeof(*writes) * this->next_temp * 4); - } else if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY) { - /* Clear where it's used as src. */ - int src_chans = 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 0); - src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 1); - src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 2); - src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 3); - - for (int c = 0; c < 4; c++) { - if (src_chans & (1 << c)) - writes[4 * inst->tex_offsets[i].index + c] = NULL; - } - } - dead_code_handle_reladdr(writes, inst->tex_offsets[i].reladdr); - dead_code_handle_reladdr(writes, inst->tex_offsets[i].reladdr2); - } - - if (inst->resource.file == PROGRAM_TEMPORARY) { - int src_chans; - - src_chans = 1 << GET_SWZ(inst->resource.swizzle, 0); - src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 1); - src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 2); - src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 3); - - for (int c = 0; c < 4; c++) { - if (src_chans & (1 << c)) - writes[4 * inst->resource.index + c] = NULL; - } - } - dead_code_handle_reladdr(writes, inst->resource.reladdr); - dead_code_handle_reladdr(writes, inst->resource.reladdr2); - - for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) { - dead_code_handle_reladdr(writes, inst->dst[i].reladdr); - dead_code_handle_reladdr(writes, inst->dst[i].reladdr2); - } - break; - } - - /* If this instruction writes to a temporary, add it to the write array. - * If there is already an instruction in the write array for one or more - * of the channels, flag that channel write as dead. - */ - for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) { - if (inst->dst[i].file == PROGRAM_TEMPORARY && - !inst->dst[i].reladdr) { - for (int c = 0; c < 4; c++) { - if (inst->dst[i].writemask & (1 << c)) { - if (writes[4 * inst->dst[i].index + c]) { - if (write_level[4 * inst->dst[i].index + c] < level) - continue; - else - writes[4 * inst->dst[i].index + c]->dead_mask |= (1 << c); - } - writes[4 * inst->dst[i].index + c] = inst; - write_level[4 * inst->dst[i].index + c] = level; - } - } - } - } - } - - /* Anything still in the write array at this point is dead code. */ - for (int r = 0; r < this->next_temp; r++) { - for (int c = 0; c < 4; c++) { - glsl_to_tgsi_instruction *inst = writes[4 * r + c]; - if (inst) - inst->dead_mask |= (1 << c); - } - } - - /* Now actually remove the instructions that are completely dead and update - * the writemask of other instructions with dead channels. - */ - foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) { - if (!inst->dead_mask || !inst->dst[0].writemask) - continue; - /* No amount of dead masks should remove memory stores */ - if (inst->info->is_store) - continue; - - if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) { - inst->remove(); - delete inst; - removed++; - } else { - if (glsl_base_type_is_64bit(inst->dst[0].type)) { - if (inst->dead_mask == WRITEMASK_XY || - inst->dead_mask == WRITEMASK_ZW) - inst->dst[0].writemask &= ~(inst->dead_mask); - } else - inst->dst[0].writemask &= ~(inst->dead_mask); - } - } - - ralloc_free(write_level); - ralloc_free(writes); - - return removed; -} - -/* merge DFRACEXP instructions into one. */ -void -glsl_to_tgsi_visitor::merge_two_dsts(void) -{ - /* We never delete inst, but we may delete its successor. */ - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { - glsl_to_tgsi_instruction *inst2; - unsigned defined; - - if (num_inst_dst_regs(inst) != 2) - continue; - - if (inst->dst[0].file != PROGRAM_UNDEFINED && - inst->dst[1].file != PROGRAM_UNDEFINED) - continue; - - assert(inst->dst[0].file != PROGRAM_UNDEFINED || - inst->dst[1].file != PROGRAM_UNDEFINED); - - if (inst->dst[0].file == PROGRAM_UNDEFINED) - defined = 1; - else - defined = 0; - - inst2 = (glsl_to_tgsi_instruction *) inst->next; - while (!inst2->is_tail_sentinel()) { - if (inst->op == inst2->op && - inst2->dst[defined].file == PROGRAM_UNDEFINED && - inst->src[0].file == inst2->src[0].file && - inst->src[0].index == inst2->src[0].index && - inst->src[0].type == inst2->src[0].type && - inst->src[0].swizzle == inst2->src[0].swizzle) - break; - inst2 = (glsl_to_tgsi_instruction *) inst2->next; - } - - if (inst2->is_tail_sentinel()) { - /* Undefined destinations are not allowed, substitute with an unused - * temporary register. - */ - st_src_reg tmp = get_temp(glsl_type::vec4_type); - inst->dst[defined ^ 1] = st_dst_reg(tmp); - inst->dst[defined ^ 1].writemask = 0; - continue; - } - - inst->dst[defined ^ 1] = inst2->dst[defined ^ 1]; - inst2->remove(); - delete inst2; - } -} - -template -void test_indirect_access(const st_reg& reg, bool *has_indirect_access) -{ - if (reg.file == PROGRAM_ARRAY) { - if (reg.reladdr || reg.reladdr2 || reg.has_index2) { - has_indirect_access[reg.array_id] = true; - if (reg.reladdr) - test_indirect_access(*reg.reladdr, has_indirect_access); - if (reg.reladdr2) - test_indirect_access(*reg.reladdr2, has_indirect_access); - } - } -} - -template -void remap_array(st_reg& reg, const int *array_remap_info, - const bool *has_indirect_access) -{ - if (reg.file == PROGRAM_ARRAY) { - if (!has_indirect_access[reg.array_id]) { - reg.file = PROGRAM_TEMPORARY; - reg.index = reg.index + array_remap_info[reg.array_id]; - reg.array_id = 0; - } else { - reg.array_id = array_remap_info[reg.array_id]; - } - - if (reg.reladdr) - remap_array(*reg.reladdr, array_remap_info, has_indirect_access); - - if (reg.reladdr2) - remap_array(*reg.reladdr2, array_remap_info, has_indirect_access); - } -} - -/* One-dimensional arrays whose elements are only accessed directly are - * replaced by an according set of temporary registers that then can become - * subject to further optimization steps like copy propagation and - * register merging. - */ -void -glsl_to_tgsi_visitor::split_arrays(void) -{ - if (!next_array) - return; - - bool *has_indirect_access = rzalloc_array(mem_ctx, bool, next_array + 1); - - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { - for (unsigned j = 0; j < num_inst_src_regs(inst); j++) - test_indirect_access(inst->src[j], has_indirect_access); - - for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) - test_indirect_access(inst->tex_offsets[j], has_indirect_access); - - for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) - test_indirect_access(inst->dst[j], has_indirect_access); - - test_indirect_access(inst->resource, has_indirect_access); - } - - unsigned array_offset = 0; - unsigned n_remaining_arrays = 0; - - /* Double use: For arrays that get split this value will contain - * the base index of the temporary registers this array is replaced - * with. For arrays that remain it contains the new array ID. - */ - int *array_remap_info = rzalloc_array(has_indirect_access, int, - next_array + 1); - - for (unsigned i = 1; i <= next_array; ++i) { - if (!has_indirect_access[i]) { - array_remap_info[i] = this->next_temp + array_offset; - array_offset += array_sizes[i - 1]; - } else { - array_sizes[n_remaining_arrays] = array_sizes[i-1]; - array_remap_info[i] = ++n_remaining_arrays; - } - } - - if (next_array != n_remaining_arrays) { - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { - for (unsigned j = 0; j < num_inst_src_regs(inst); j++) - remap_array(inst->src[j], array_remap_info, has_indirect_access); - - for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) - remap_array(inst->tex_offsets[j], array_remap_info, has_indirect_access); - - for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) { - remap_array(inst->dst[j], array_remap_info, has_indirect_access); - } - remap_array(inst->resource, array_remap_info, has_indirect_access); - } - } - - ralloc_free(has_indirect_access); - this->next_temp += array_offset; - next_array = n_remaining_arrays; -} - -/* Merges temporary registers together where possible to reduce the number of - * registers needed to run a program. - * - * Produces optimal code only after copy propagation and dead code elimination - * have been run. */ -void -glsl_to_tgsi_visitor::merge_registers(void) -{ - class array_live_range *arr_live_ranges = NULL; - - struct register_live_range *reg_live_ranges = - rzalloc_array(mem_ctx, struct register_live_range, this->next_temp); - - if (this->next_array > 0) { - arr_live_ranges = new array_live_range[this->next_array]; - for (unsigned i = 0; i < this->next_array; ++i) - arr_live_ranges[i] = array_live_range(i+1, this->array_sizes[i]); - } - - - if (get_temp_registers_required_live_ranges(reg_live_ranges, &this->instructions, - this->next_temp, reg_live_ranges, - this->next_array, arr_live_ranges)) { - struct rename_reg_pair *renames = - rzalloc_array(reg_live_ranges, struct rename_reg_pair, this->next_temp); - get_temp_registers_remapping(reg_live_ranges, this->next_temp, - reg_live_ranges, renames); - rename_temp_registers(renames); - - this->next_array = merge_arrays(this->next_array, this->array_sizes, - &this->instructions, arr_live_ranges); - } - - if (arr_live_ranges) - delete[] arr_live_ranges; - - ralloc_free(reg_live_ranges); -} - -/* Reassign indices to temporary registers by reusing unused indices created - * by optimization passes. */ -void -glsl_to_tgsi_visitor::renumber_registers(void) -{ - int i = 0; - int new_index = 0; - int *first_writes = ralloc_array(mem_ctx, int, this->next_temp); - struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp); - - for (i = 0; i < this->next_temp; i++) { - first_writes[i] = -1; - } - get_first_temp_write(first_writes); - - for (i = 0; i < this->next_temp; i++) { - if (first_writes[i] < 0) continue; - if (i != new_index) { - renames[i].new_reg = new_index; - renames[i].valid = true; - } - new_index++; - } - - rename_temp_registers(renames); - this->next_temp = new_index; - ralloc_free(renames); - ralloc_free(first_writes); -} - -#ifndef NDEBUG -void glsl_to_tgsi_visitor::print_stats() -{ - int narray_registers = 0; - for (unsigned i = 0; i < this->next_array; ++i) - narray_registers += this->array_sizes[i]; - - int ninstructions = 0; - foreach_in_list(glsl_to_tgsi_instruction, inst, &instructions) { - ++ninstructions; - } - - simple_mtx_lock(&print_stats_mutex); - stats_log << next_array << ", " - << next_temp << ", " - << narray_registers << ", " - << next_temp + narray_registers << ", " - << ninstructions << "\n"; - simple_mtx_unlock(&print_stats_mutex); -} -#endif -/* ------------------------- TGSI conversion stuff -------------------------- */ - -/** - * Intermediate state used during shader translation. - */ -struct st_translate { - struct ureg_program *ureg; - - unsigned temps_size; - struct ureg_dst *temps; - - struct ureg_dst *arrays; - unsigned num_temp_arrays; - struct ureg_src *constants; - int num_constants; - struct ureg_src *immediates; - int num_immediates; - struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; - struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; - struct ureg_dst address[3]; - struct ureg_src samplers[PIPE_MAX_SAMPLERS]; - struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS]; - struct ureg_src images[PIPE_MAX_SHADER_IMAGES]; - struct ureg_src systemValues[SYSTEM_VALUE_MAX]; - struct ureg_src hw_atomics[PIPE_MAX_HW_ATOMIC_BUFFERS]; - struct ureg_src shared_memory; - unsigned *array_sizes; - struct inout_decl *input_decls; - unsigned num_input_decls; - struct inout_decl *output_decls; - unsigned num_output_decls; - - const ubyte *inputMapping; - const ubyte *outputMapping; - - enum pipe_shader_type procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */ - bool tg4_component_in_swizzle; -}; - -/** - * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. - */ -static struct ureg_src -emit_immediate(struct st_translate *t, - gl_constant_value values[4], - GLenum type, int size) -{ - struct ureg_program *ureg = t->ureg; - - switch (type) { - case GL_FLOAT: - return ureg_DECL_immediate(ureg, &values[0].f, size); - case GL_DOUBLE: - return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size); - case GL_INT64_ARB: - return ureg_DECL_immediate_int64(ureg, (int64_t *)&values[0].f, size); - case GL_UNSIGNED_INT64_ARB: - return ureg_DECL_immediate_uint64(ureg, (uint64_t *)&values[0].f, size); - case GL_INT: - return ureg_DECL_immediate_int(ureg, &values[0].i, size); - case GL_UNSIGNED_INT: - case GL_BOOL: - return ureg_DECL_immediate_uint(ureg, &values[0].u, size); - default: - assert(!"should not get here - type must be float, int, uint, or bool"); - return ureg_src_undef(); - } -} - -/** - * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. - */ -static struct ureg_dst -dst_register(struct st_translate *t, gl_register_file file, unsigned index, - unsigned array_id) -{ - unsigned array; - - switch (file) { - case PROGRAM_UNDEFINED: - return ureg_dst_undef(); - - case PROGRAM_TEMPORARY: - /* Allocate space for temporaries on demand. */ - if (index >= t->temps_size) { - const int inc = align(index - t->temps_size + 1, 4096); - - t->temps = (struct ureg_dst*) - realloc(t->temps, - (t->temps_size + inc) * sizeof(struct ureg_dst)); - if (!t->temps) - return ureg_dst_undef(); - - memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst)); - t->temps_size += inc; - } - - if (ureg_dst_is_undef(t->temps[index])) - t->temps[index] = ureg_DECL_local_temporary(t->ureg); - - return t->temps[index]; - - case PROGRAM_ARRAY: - assert(array_id && array_id <= t->num_temp_arrays); - array = array_id - 1; - - if (ureg_dst_is_undef(t->arrays[array])) - t->arrays[array] = ureg_DECL_array_temporary( - t->ureg, t->array_sizes[array], TRUE); - - return ureg_dst_array_offset(t->arrays[array], index); - - case PROGRAM_OUTPUT: - if (!array_id) { - if (t->procType == PIPE_SHADER_FRAGMENT) - assert(index < 2 * FRAG_RESULT_MAX); - else if (t->procType == PIPE_SHADER_TESS_CTRL || - t->procType == PIPE_SHADER_TESS_EVAL) - assert(index < VARYING_SLOT_TESS_MAX); - else - assert(index < VARYING_SLOT_MAX); - - assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs)); - assert(t->outputs[t->outputMapping[index]].File != TGSI_FILE_NULL); - return t->outputs[t->outputMapping[index]]; - } - else { - struct inout_decl *decl = - find_inout_array(t->output_decls, - t->num_output_decls, array_id); - unsigned mesa_index = decl->mesa_index; - ubyte slot = t->outputMapping[mesa_index]; - - assert(slot != 0xff && t->outputs[slot].File == TGSI_FILE_OUTPUT); - - struct ureg_dst dst = t->outputs[slot]; - dst.ArrayID = array_id; - return ureg_dst_array_offset(dst, index - mesa_index); - } - - case PROGRAM_ADDRESS: - return t->address[index]; - - default: - assert(!"unknown dst register file"); - return ureg_dst_undef(); - } -} - -static struct ureg_src -translate_src(struct st_translate *t, const st_src_reg *src_reg); - -static struct ureg_src -translate_addr(struct st_translate *t, const st_src_reg *reladdr, - unsigned addr_index) -{ - return ureg_src(t->address[addr_index]); -} - -/** - * Create a TGSI ureg_dst register from an st_dst_reg. - */ -static struct ureg_dst -translate_dst(struct st_translate *t, - const st_dst_reg *dst_reg, - bool saturate) -{ - struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index, - dst_reg->array_id); - - if (dst.File == TGSI_FILE_NULL) - return dst; - - dst = ureg_writemask(dst, dst_reg->writemask); - - if (saturate) - dst = ureg_saturate(dst); - - if (dst_reg->reladdr != NULL) { - assert(dst_reg->file != PROGRAM_TEMPORARY); - dst = ureg_dst_indirect(dst, translate_addr(t, dst_reg->reladdr, 0)); - } - - if (dst_reg->has_index2) { - if (dst_reg->reladdr2) - dst = ureg_dst_dimension_indirect(dst, - translate_addr(t, dst_reg->reladdr2, 1), - dst_reg->index2D); - else - dst = ureg_dst_dimension(dst, dst_reg->index2D); - } - - return dst; -} - -/** - * Create a TGSI ureg_src register from an st_src_reg. - */ -static struct ureg_src -translate_src(struct st_translate *t, const st_src_reg *src_reg) -{ - struct ureg_src src; - int index = src_reg->index; - int double_reg2 = src_reg->double_reg2 ? 1 : 0; - - switch (src_reg->file) { - case PROGRAM_UNDEFINED: - src = ureg_imm4f(t->ureg, 0, 0, 0, 0); - break; - - case PROGRAM_TEMPORARY: - case PROGRAM_ARRAY: - src = ureg_src(dst_register(t, src_reg->file, src_reg->index, - src_reg->array_id)); - break; - - case PROGRAM_OUTPUT: { - struct ureg_dst dst = dst_register(t, src_reg->file, src_reg->index, - src_reg->array_id); - assert(dst.WriteMask != 0); - unsigned shift = ffs(dst.WriteMask) - 1; - src = ureg_swizzle(ureg_src(dst), - shift, - MIN2(shift + 1, 3), - MIN2(shift + 2, 3), - MIN2(shift + 3, 3)); - break; - } - - case PROGRAM_UNIFORM: - assert(src_reg->index >= 0); - src = src_reg->index < t->num_constants ? - t->constants[src_reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0); - break; - case PROGRAM_STATE_VAR: - case PROGRAM_CONSTANT: /* ie, immediate */ - if (src_reg->has_index2) - src = ureg_src_register(TGSI_FILE_CONSTANT, src_reg->index); - else - src = src_reg->index >= 0 && src_reg->index < t->num_constants ? - t->constants[src_reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0); - break; - - case PROGRAM_IMMEDIATE: - assert(src_reg->index >= 0 && src_reg->index < t->num_immediates); - src = t->immediates[src_reg->index]; - break; - - case PROGRAM_INPUT: - /* GLSL inputs are 64-bit containers, so we have to - * map back to the original index and add the offset after - * mapping. */ - index -= double_reg2; - if (!src_reg->array_id) { - assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs)); - assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL); - src = t->inputs[t->inputMapping[index] + double_reg2]; - } - else { - struct inout_decl *decl = find_inout_array(t->input_decls, - t->num_input_decls, - src_reg->array_id); - unsigned mesa_index = decl->mesa_index; - ubyte slot = t->inputMapping[mesa_index]; - - assert(slot != 0xff && t->inputs[slot].File == TGSI_FILE_INPUT); - - src = t->inputs[slot]; - src.ArrayID = src_reg->array_id; - src = ureg_src_array_offset(src, index + double_reg2 - mesa_index); - } - break; - - case PROGRAM_ADDRESS: - src = ureg_src(t->address[src_reg->index]); - break; - - case PROGRAM_SYSTEM_VALUE: - assert(src_reg->index < (int) ARRAY_SIZE(t->systemValues)); - src = t->systemValues[src_reg->index]; - break; - - case PROGRAM_HW_ATOMIC: - src = ureg_src_array_register(TGSI_FILE_HW_ATOMIC, src_reg->index, - src_reg->array_id); - break; - - default: - assert(!"unknown src register file"); - return ureg_src_undef(); - } - - if (src_reg->has_index2) { - /* 2D indexes occur with geometry shader inputs (attrib, vertex) - * and UBO constant buffers (buffer, position). - */ - if (src_reg->reladdr2) - src = ureg_src_dimension_indirect(src, - translate_addr(t, src_reg->reladdr2, 1), - src_reg->index2D); - else - src = ureg_src_dimension(src, src_reg->index2D); - } - - src = ureg_swizzle(src, - GET_SWZ(src_reg->swizzle, 0) & 0x3, - GET_SWZ(src_reg->swizzle, 1) & 0x3, - GET_SWZ(src_reg->swizzle, 2) & 0x3, - GET_SWZ(src_reg->swizzle, 3) & 0x3); - - if (src_reg->abs) - src = ureg_abs(src); - - if ((src_reg->negate & 0xf) == NEGATE_XYZW) - src = ureg_negate(src); - - if (src_reg->reladdr != NULL) { - assert(src_reg->file != PROGRAM_TEMPORARY); - src = ureg_src_indirect(src, translate_addr(t, src_reg->reladdr, 0)); - } - - return src; -} - -static struct tgsi_texture_offset -translate_tex_offset(struct st_translate *t, - const st_src_reg *in_offset) -{ - struct tgsi_texture_offset offset; - struct ureg_src src = translate_src(t, in_offset); - - offset.File = src.File; - offset.Index = src.Index; - offset.SwizzleX = src.SwizzleX; - offset.SwizzleY = src.SwizzleY; - offset.SwizzleZ = src.SwizzleZ; - offset.Padding = 0; - - assert(!src.Indirect); - assert(!src.DimIndirect); - assert(!src.Dimension); - assert(!src.Absolute); /* those shouldn't be used with integers anyway */ - assert(!src.Negate); - - return offset; -} - -static void -compile_tgsi_instruction(struct st_translate *t, - const glsl_to_tgsi_instruction *inst) -{ - struct ureg_program *ureg = t->ureg; - int i; - struct ureg_dst dst[2]; - struct ureg_src src[4]; - struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET]; - - int num_dst; - int num_src; - enum tgsi_texture_type tex_target = TGSI_TEXTURE_BUFFER; - - num_dst = num_inst_dst_regs(inst); - num_src = num_inst_src_regs(inst); - - for (i = 0; i < num_dst; i++) - dst[i] = translate_dst(t, - &inst->dst[i], - inst->saturate); - - for (i = 0; i < num_src; i++) - src[i] = translate_src(t, &inst->src[i]); - - switch (inst->op) { - case TGSI_OPCODE_BGNLOOP: - case TGSI_OPCODE_ELSE: - case TGSI_OPCODE_ENDLOOP: - case TGSI_OPCODE_IF: - case TGSI_OPCODE_UIF: - assert(num_dst == 0); - ureg_insn(ureg, inst->op, NULL, 0, src, num_src, inst->precise); - return; - - case TGSI_OPCODE_TEX: - case TGSI_OPCODE_TEX_LZ: - case TGSI_OPCODE_TXB: - case TGSI_OPCODE_TXD: - case TGSI_OPCODE_TXL: - case TGSI_OPCODE_TXP: - case TGSI_OPCODE_TXQ: - case TGSI_OPCODE_TXQS: - case TGSI_OPCODE_TXF: - case TGSI_OPCODE_TXF_LZ: - case TGSI_OPCODE_TEX2: - case TGSI_OPCODE_TXB2: - case TGSI_OPCODE_TXL2: - case TGSI_OPCODE_TG4: - case TGSI_OPCODE_LODQ: - case TGSI_OPCODE_SAMP2HND: - if (inst->resource.file == PROGRAM_SAMPLER) { - src[num_src] = t->samplers[inst->resource.index]; - if (t->tg4_component_in_swizzle && inst->op == TGSI_OPCODE_TG4) - src[num_src].SwizzleX = inst->gather_component; - } else { - /* Bindless samplers. */ - src[num_src] = translate_src(t, &inst->resource); - } - assert(src[num_src].File != TGSI_FILE_NULL); - if (inst->resource.reladdr) - src[num_src] = - ureg_src_indirect(src[num_src], - translate_addr(t, inst->resource.reladdr, 2)); - num_src++; - for (i = 0; i < (int)inst->tex_offset_num_offset; i++) { - texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); - } - tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); - - ureg_tex_insn(ureg, - inst->op, - dst, num_dst, - tex_target, - st_translate_texture_type(inst->tex_type), - texoffsets, inst->tex_offset_num_offset, - src, num_src); - return; - - case TGSI_OPCODE_RESQ: - case TGSI_OPCODE_LOAD: - case TGSI_OPCODE_ATOMUADD: - case TGSI_OPCODE_ATOMXCHG: - case TGSI_OPCODE_ATOMCAS: - case TGSI_OPCODE_ATOMAND: - case TGSI_OPCODE_ATOMOR: - case TGSI_OPCODE_ATOMXOR: - case TGSI_OPCODE_ATOMUMIN: - case TGSI_OPCODE_ATOMUMAX: - case TGSI_OPCODE_ATOMIMIN: - case TGSI_OPCODE_ATOMIMAX: - case TGSI_OPCODE_ATOMFADD: - case TGSI_OPCODE_IMG2HND: - case TGSI_OPCODE_ATOMINC_WRAP: - case TGSI_OPCODE_ATOMDEC_WRAP: - for (i = num_src - 1; i >= 0; i--) - src[i + 1] = src[i]; - num_src++; - if (inst->resource.file == PROGRAM_MEMORY) { - src[0] = t->shared_memory; - } else if (inst->resource.file == PROGRAM_BUFFER) { - src[0] = t->buffers[inst->resource.index]; - } else if (inst->resource.file == PROGRAM_HW_ATOMIC) { - src[0] = translate_src(t, &inst->resource); - } else if (inst->resource.file == PROGRAM_CONSTANT) { - assert(inst->resource.has_index2); - src[0] = ureg_src_register(TGSI_FILE_CONSTBUF, inst->resource.index); - } else { - assert(inst->resource.file != PROGRAM_UNDEFINED); - if (inst->resource.file == PROGRAM_IMAGE) { - src[0] = t->images[inst->resource.index]; - } else { - /* Bindless images. */ - src[0] = translate_src(t, &inst->resource); - } - tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); - } - if (inst->resource.reladdr) - src[0] = ureg_src_indirect(src[0], - translate_addr(t, inst->resource.reladdr, 2)); - assert(src[0].File != TGSI_FILE_NULL); - ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src, - inst->buffer_access, - tex_target, inst->image_format); - break; - - case TGSI_OPCODE_STORE: - if (inst->resource.file == PROGRAM_MEMORY) { - dst[0] = ureg_dst(t->shared_memory); - } else if (inst->resource.file == PROGRAM_BUFFER) { - dst[0] = ureg_dst(t->buffers[inst->resource.index]); - } else { - if (inst->resource.file == PROGRAM_IMAGE) { - dst[0] = ureg_dst(t->images[inst->resource.index]); - } else { - /* Bindless images. */ - dst[0] = ureg_dst(translate_src(t, &inst->resource)); - } - tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); - } - dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask); - if (inst->resource.reladdr) - dst[0] = ureg_dst_indirect(dst[0], - translate_addr(t, inst->resource.reladdr, 2)); - assert(dst[0].File != TGSI_FILE_NULL); - ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src, - inst->buffer_access, - tex_target, inst->image_format); - break; - - default: - ureg_insn(ureg, - inst->op, - dst, num_dst, - src, num_src, inst->precise); - break; - } -} - -/* Invert SamplePos.y when rendering to the default framebuffer. */ -static void -emit_samplepos_adjustment(struct st_translate *t, int wpos_y_transform) -{ - struct ureg_program *ureg = t->ureg; - - assert(wpos_y_transform >= 0); - struct ureg_src trans_const = ureg_DECL_constant(ureg, wpos_y_transform); - struct ureg_src samplepos_sysval = t->systemValues[SYSTEM_VALUE_SAMPLE_POS]; - struct ureg_dst samplepos_flipped = ureg_DECL_temporary(ureg); - struct ureg_dst is_fbo = ureg_DECL_temporary(ureg); - - ureg_ADD(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_Y), - ureg_imm1f(ureg, 1), ureg_negate(samplepos_sysval)); - - /* If trans.x == 1, use samplepos.y, else use 1 - samplepos.y. */ - ureg_FSEQ(ureg, ureg_writemask(is_fbo, TGSI_WRITEMASK_Y), - ureg_scalar(trans_const, TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1)); - ureg_UCMP(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_Y), - ureg_src(is_fbo), samplepos_sysval, ureg_src(samplepos_flipped)); - ureg_MOV(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_X), - samplepos_sysval); - - /* Use the result in place of the system value. */ - t->systemValues[SYSTEM_VALUE_SAMPLE_POS] = ureg_src(samplepos_flipped); -} - - -/** - * Emit the TGSI instructions for inverting and adjusting WPOS. - * This code is unavoidable because it also depends on whether - * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). - */ -static void -emit_wpos_adjustment(struct gl_context *ctx, - struct st_translate *t, - int wpos_transform_const, - boolean invert, - GLfloat adjX, GLfloat adjY[2]) -{ - struct ureg_program *ureg = t->ureg; - - assert(wpos_transform_const >= 0); - - /* Fragment program uses fragment position input. - * Need to replace instances of INPUT[WPOS] with temp T - * where T = INPUT[WPOS] is inverted by Y. - */ - struct ureg_src wpostrans = ureg_DECL_constant(ureg, wpos_transform_const); - struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); - struct ureg_src *wpos = - ctx->Const.GLSLFragCoordIsSysVal ? - &t->systemValues[SYSTEM_VALUE_FRAG_COORD] : - &t->inputs[t->inputMapping[VARYING_SLOT_POS]]; - struct ureg_src wpos_input = *wpos; - - /* First, apply the coordinate shift: */ - if (adjX || adjY[0] || adjY[1]) { - if (adjY[0] != adjY[1]) { - /* Adjust the y coordinate by adjY[1] or adjY[0] respectively - * depending on whether inversion is actually going to be applied - * or not, which is determined by testing against the inversion - * state variable used below, which will be either +1 or -1. - */ - struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg); - - ureg_CMP(ureg, adj_temp, - ureg_scalar(wpostrans, invert ? 2 : 0), - ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f), - ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f)); - ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp)); - } else { - ureg_ADD(ureg, wpos_temp, wpos_input, - ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f)); - } - wpos_input = ureg_src(wpos_temp); - } else { - /* MOV wpos_temp, input[wpos] - */ - ureg_MOV(ureg, wpos_temp, wpos_input); - } - - /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be - * inversion/identity, or the other way around if we're drawing to an FBO. - */ - if (invert) { - /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy - */ - ureg_MAD(ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), - wpos_input, - ureg_scalar(wpostrans, 0), - ureg_scalar(wpostrans, 1)); - } else { - /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww - */ - ureg_MAD(ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), - wpos_input, - ureg_scalar(wpostrans, 2), - ureg_scalar(wpostrans, 3)); - } - - /* Use wpos_temp as position input from here on: - */ - *wpos = ureg_src(wpos_temp); -} - - -/** - * Emit fragment position/ooordinate code. - */ -static void -emit_wpos(struct st_context *st, - struct st_translate *t, - const struct gl_program *program, - struct ureg_program *ureg, - int wpos_transform_const) -{ - struct pipe_screen *pscreen = st->screen; - GLfloat adjX = 0.0f; - GLfloat adjY[2] = { 0.0f, 0.0f }; - boolean invert = FALSE; - - /* Query the pixel center conventions supported by the pipe driver and set - * adjX, adjY to help out if it cannot handle the requested one internally. - * - * The bias of the y-coordinate depends on whether y-inversion takes place - * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are - * drawing to an FBO (causes additional inversion), and whether the pipe - * driver origin and the requested origin differ (the latter condition is - * stored in the 'invert' variable). - * - * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): - * - * center shift only: - * i -> h: +0.5 - * h -> i: -0.5 - * - * inversion only: - * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 - * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 - * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 - * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 - * - * inversion and center shift: - * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 - * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 - * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 - * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 - */ - if (program->info.fs.origin_upper_left) { - /* Fragment shader wants origin in upper-left */ - if (pscreen->get_param(pscreen, PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT)) { - /* the driver supports upper-left origin */ - } - else if (pscreen->get_param(pscreen, PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT)) { - /* the driver supports lower-left origin, need to invert Y */ - ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, - TGSI_FS_COORD_ORIGIN_LOWER_LEFT); - invert = TRUE; - } - else - assert(0); - } - else { - /* Fragment shader wants origin in lower-left */ - if (pscreen->get_param(pscreen, PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT)) - /* the driver supports lower-left origin */ - ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, - TGSI_FS_COORD_ORIGIN_LOWER_LEFT); - else if (pscreen->get_param(pscreen, PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT)) - /* the driver supports upper-left origin, need to invert Y */ - invert = TRUE; - else - assert(0); - } - - if (program->info.fs.pixel_center_integer) { - /* Fragment shader wants pixel center integer */ - if (pscreen->get_param(pscreen, PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER)) { - /* the driver supports pixel center integer */ - adjY[1] = 1.0f; - ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, - TGSI_FS_COORD_PIXEL_CENTER_INTEGER); - } - else if (pscreen->get_param(pscreen, PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { - /* the driver supports pixel center half integer, need to bias X,Y */ - adjX = -0.5f; - adjY[0] = -0.5f; - adjY[1] = 0.5f; - } - else - assert(0); - } - else { - /* Fragment shader wants pixel center half integer */ - if (pscreen->get_param(pscreen, PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { - /* the driver supports pixel center half integer */ - } - else if (pscreen->get_param(pscreen, PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER)) { - /* the driver supports pixel center integer, need to bias X,Y */ - adjX = adjY[0] = adjY[1] = 0.5f; - ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, - TGSI_FS_COORD_PIXEL_CENTER_INTEGER); - } - else - assert(0); - } - - /* we invert after adjustment so that we avoid the MOV to temporary, - * and reuse the adjustment ADD instead */ - emit_wpos_adjustment(st->ctx, t, wpos_transform_const, invert, adjX, adjY); -} - -/** - * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. - * TGSI uses +1 for front, -1 for back. - * This function converts the TGSI value to the GL value. Simply clamping/ - * saturating the value to [0,1] does the job. - */ -static void -emit_face_var(struct gl_context *ctx, struct st_translate *t) -{ - struct ureg_program *ureg = t->ureg; - struct ureg_dst face_temp = ureg_DECL_temporary(ureg); - struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]]; - - if (ctx->Const.NativeIntegers) { - ureg_FSGE(ureg, face_temp, face_input, ureg_imm1f(ureg, 0)); - } - else { - /* MOV_SAT face_temp, input[face] */ - ureg_MOV(ureg, ureg_saturate(face_temp), face_input); - } - - /* Use face_temp as face input from here on: */ - t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp); -} - -struct sort_inout_decls { - bool operator()(const struct inout_decl &a, const struct inout_decl &b) const { - return mapping[a.mesa_index] < mapping[b.mesa_index]; - } - - const ubyte *mapping; -}; - -/* Sort the given array of decls by the corresponding slot (TGSI file index). - * - * This is for the benefit of older drivers which are broken when the - * declarations aren't sorted in this way. - */ -static void -sort_inout_decls_by_slot(struct inout_decl *decls, - unsigned count, - const ubyte mapping[]) -{ - sort_inout_decls sorter; - sorter.mapping = mapping; - std::sort(decls, decls + count, sorter); -} - -/** - * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. - * \param program the program to translate - * \param numInputs number of input registers used - * \param inputMapping maps Mesa fragment program inputs to TGSI generic - * input indexes - * \param inputSemanticName the TGSI_SEMANTIC flag for each input - * \param inputSemanticIndex the semantic index (ex: which texcoord) for - * each input - * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input - * \param numOutputs number of output registers used - * \param outputMapping maps Mesa fragment program outputs to TGSI - * generic outputs - * \param outputSemanticName the TGSI_SEMANTIC flag for each output - * \param outputSemanticIndex the semantic index (ex: which texcoord) for - * each output - * - * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY - */ -extern "C" enum pipe_error -st_translate_program( - struct gl_context *ctx, - enum pipe_shader_type procType, - struct ureg_program *ureg, - glsl_to_tgsi_visitor *program, - const struct gl_program *proginfo, - GLuint numInputs, - const ubyte attrToIndex[], - const ubyte inputSlotToAttr[], - const ubyte inputSemanticName[], - const ubyte inputSemanticIndex[], - const ubyte interpMode[], - GLuint numOutputs, - const ubyte outputMapping[], - const ubyte outputSemanticName[], - const ubyte outputSemanticIndex[]) -{ - struct pipe_screen *screen = st_context(ctx)->screen; - struct st_translate *t; - unsigned i; - struct gl_program_constants *prog_const = - &ctx->Const.Program[program->shader->Stage]; - enum pipe_error ret = PIPE_OK; - uint8_t inputMapping[VARYING_SLOT_TESS_MAX] = {0}; - - assert(numInputs <= ARRAY_SIZE(t->inputs)); - assert(numOutputs <= ARRAY_SIZE(t->outputs)); - - ASSERT_BITFIELD_SIZE(st_src_reg, type, GLSL_TYPE_ERROR); - ASSERT_BITFIELD_SIZE(st_src_reg, file, PROGRAM_FILE_MAX); - ASSERT_BITFIELD_SIZE(st_dst_reg, type, GLSL_TYPE_ERROR); - ASSERT_BITFIELD_SIZE(st_dst_reg, file, PROGRAM_FILE_MAX); - ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, tex_type, GLSL_TYPE_ERROR); - ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format, PIPE_FORMAT_COUNT); - ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, tex_target, - (gl_texture_index) (NUM_TEXTURE_TARGETS - 1)); - ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format, - (enum pipe_format) (PIPE_FORMAT_COUNT - 1)); - ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, op, - (enum tgsi_opcode) (TGSI_OPCODE_LAST - 1)); - - if (proginfo->DualSlotInputs != 0) { - /* adjust attrToIndex to include placeholder for second - * part of a double attribute. - * Following code is basically matching behavior of - * util_lower_uint64_vertex_elements - */ - numInputs = 0; - for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) { - if ((proginfo->info.inputs_read & BITFIELD64_BIT(attr)) != 0) { - inputMapping[attr] = numInputs++; - - if ((proginfo->DualSlotInputs & BITFIELD64_BIT(attr)) != 0) { - /* add placeholder for second part of a double attribute */ - numInputs++; - } - } - } - inputMapping[VERT_ATTRIB_EDGEFLAG] = numInputs; - } - else { - memcpy(inputMapping, attrToIndex, sizeof(inputMapping)); - } - - t = CALLOC_STRUCT(st_translate); - if (!t) { - ret = PIPE_ERROR_OUT_OF_MEMORY; - goto out; - } - - t->procType = procType; - t->tg4_component_in_swizzle = screen->get_param(screen, PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE); - t->inputMapping = inputMapping; - t->outputMapping = outputMapping; - t->ureg = ureg; - t->num_temp_arrays = program->next_array; - if (t->num_temp_arrays) - t->arrays = (struct ureg_dst*) - calloc(t->num_temp_arrays, sizeof(t->arrays[0])); - - /* - * Declare input attributes. - */ - switch (procType) { - case PIPE_SHADER_FRAGMENT: - case PIPE_SHADER_GEOMETRY: - case PIPE_SHADER_TESS_EVAL: - case PIPE_SHADER_TESS_CTRL: - sort_inout_decls_by_slot(program->inputs, program->num_inputs, inputMapping); - - for (i = 0; i < program->num_inputs; ++i) { - struct inout_decl *decl = &program->inputs[i]; - unsigned slot = inputMapping[decl->mesa_index]; - struct ureg_src src; - ubyte tgsi_usage_mask = decl->usage_mask; - - if (glsl_base_type_is_64bit(decl->base_type)) { - if (tgsi_usage_mask == 1) - tgsi_usage_mask = TGSI_WRITEMASK_XY; - else if (tgsi_usage_mask == 2) - tgsi_usage_mask = TGSI_WRITEMASK_ZW; - else - tgsi_usage_mask = TGSI_WRITEMASK_XYZW; - } - - enum tgsi_interpolate_mode interp_mode = TGSI_INTERPOLATE_CONSTANT; - enum tgsi_interpolate_loc interp_location = TGSI_INTERPOLATE_LOC_CENTER; - if (procType == PIPE_SHADER_FRAGMENT) { - assert(interpMode); - interp_mode = interpMode[slot] != TGSI_INTERPOLATE_COUNT ? - (enum tgsi_interpolate_mode) interpMode[slot] : - tgsi_get_interp_mode(decl->interp, - inputSlotToAttr[slot] == VARYING_SLOT_COL0 || - inputSlotToAttr[slot] == VARYING_SLOT_COL1); - - interp_location = (enum tgsi_interpolate_loc) decl->interp_loc; - } - - src = ureg_DECL_fs_input_centroid_layout(ureg, - (enum tgsi_semantic) inputSemanticName[slot], - inputSemanticIndex[slot], - interp_mode, interp_location, slot, tgsi_usage_mask, - decl->array_id, decl->size); - - for (unsigned j = 0; j < decl->size; ++j) { - if (t->inputs[slot + j].File != TGSI_FILE_INPUT) { - /* The ArrayID is set up in dst_register */ - t->inputs[slot + j] = src; - t->inputs[slot + j].ArrayID = 0; - t->inputs[slot + j].Index += j; - } - } - } - break; - case PIPE_SHADER_VERTEX: - for (i = 0; i < numInputs; i++) { - t->inputs[i] = ureg_DECL_vs_input(ureg, i); - } - break; - case PIPE_SHADER_COMPUTE: - break; - default: - assert(0); - } - - /* - * Declare output attributes. - */ - switch (procType) { - case PIPE_SHADER_FRAGMENT: - case PIPE_SHADER_COMPUTE: - break; - case PIPE_SHADER_GEOMETRY: - case PIPE_SHADER_TESS_EVAL: - case PIPE_SHADER_TESS_CTRL: - case PIPE_SHADER_VERTEX: - sort_inout_decls_by_slot(program->outputs, program->num_outputs, outputMapping); - - for (i = 0; i < program->num_outputs; ++i) { - struct inout_decl *decl = &program->outputs[i]; - unsigned slot = outputMapping[decl->mesa_index]; - struct ureg_dst dst; - ubyte tgsi_usage_mask = decl->usage_mask; - - if (glsl_base_type_is_64bit(decl->base_type)) { - if (tgsi_usage_mask == 1) - tgsi_usage_mask = TGSI_WRITEMASK_XY; - else if (tgsi_usage_mask == 2) - tgsi_usage_mask = TGSI_WRITEMASK_ZW; - else - tgsi_usage_mask = TGSI_WRITEMASK_XYZW; - } - - dst = ureg_DECL_output_layout(ureg, - (enum tgsi_semantic) outputSemanticName[slot], - outputSemanticIndex[slot], - decl->gs_out_streams, - slot, tgsi_usage_mask, decl->array_id, decl->size, decl->invariant); - dst.Invariant = decl->invariant; - for (unsigned j = 0; j < decl->size; ++j) { - if (t->outputs[slot + j].File != TGSI_FILE_OUTPUT) { - /* The ArrayID is set up in dst_register */ - t->outputs[slot + j] = dst; - t->outputs[slot + j].ArrayID = 0; - t->outputs[slot + j].Index += j; - t->outputs[slot + j].Invariant = decl->invariant; - } - } - } - break; - default: - assert(0); - } - - if (procType == PIPE_SHADER_FRAGMENT) { - if (proginfo->info.inputs_read & VARYING_BIT_POS) { - /* Must do this after setting up t->inputs. */ - emit_wpos(st_context(ctx), t, proginfo, ureg, - program->wpos_transform_const); - } - - if (proginfo->info.inputs_read & VARYING_BIT_FACE) - emit_face_var(ctx, t); - - for (i = 0; i < numOutputs; i++) { - switch (outputSemanticName[i]) { - case TGSI_SEMANTIC_POSITION: - t->outputs[i] = ureg_DECL_output(ureg, - TGSI_SEMANTIC_POSITION, /* Z/Depth */ - outputSemanticIndex[i]); - t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); - break; - case TGSI_SEMANTIC_STENCIL: - t->outputs[i] = ureg_DECL_output(ureg, - TGSI_SEMANTIC_STENCIL, /* Stencil */ - outputSemanticIndex[i]); - t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); - break; - case TGSI_SEMANTIC_COLOR: - t->outputs[i] = ureg_DECL_output(ureg, - TGSI_SEMANTIC_COLOR, - outputSemanticIndex[i]); - break; - case TGSI_SEMANTIC_SAMPLEMASK: - t->outputs[i] = ureg_DECL_output(ureg, - TGSI_SEMANTIC_SAMPLEMASK, - outputSemanticIndex[i]); - /* TODO: If we ever support more than 32 samples, this will have - * to become an array. - */ - t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X); - break; - default: - assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); - ret = PIPE_ERROR_BAD_INPUT; - goto out; - } - } - } - else if (procType == PIPE_SHADER_VERTEX) { - for (i = 0; i < numOutputs; i++) { - if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) { - /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */ - ureg_MOV(ureg, - ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW), - ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); - t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X); - } - } - } - - /* Declare address register. - */ - if (program->num_address_regs > 0) { - assert(program->num_address_regs <= 3); - for (int i = 0; i < program->num_address_regs; i++) - t->address[i] = ureg_DECL_address(ureg); - } - - /* Declare misc input registers - */ - BITSET_FOREACH_SET(i, proginfo->info.system_values_read, SYSTEM_VALUE_MAX) { - enum tgsi_semantic semName = tgsi_get_sysval_semantic(i); - - t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0); - - if (semName == TGSI_SEMANTIC_INSTANCEID || - semName == TGSI_SEMANTIC_VERTEXID) { - /* From Gallium perspective, these system values are always - * integer, and require native integer support. However, if - * native integer is supported on the vertex stage but not the - * pixel stage (e.g, i915g + draw), Mesa will generate IR that - * assumes these system values are floats. To resolve the - * inconsistency, we insert a U2F. - */ - struct st_context *st = st_context(ctx); - struct pipe_screen *pscreen = st->screen; - assert(procType == PIPE_SHADER_VERTEX); - assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS)); - (void) pscreen; - if (!ctx->Const.NativeIntegers) { - struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg); - ureg_U2F(t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), - t->systemValues[i]); - t->systemValues[i] = ureg_scalar(ureg_src(temp), 0); - } - } - - if (procType == PIPE_SHADER_FRAGMENT && - semName == TGSI_SEMANTIC_POSITION) - emit_wpos(st_context(ctx), t, proginfo, ureg, - program->wpos_transform_const); - - if (procType == PIPE_SHADER_FRAGMENT && - semName == TGSI_SEMANTIC_SAMPLEPOS) - emit_samplepos_adjustment(t, program->wpos_transform_const); - } - - t->array_sizes = program->array_sizes; - t->input_decls = program->inputs; - t->num_input_decls = program->num_inputs; - t->output_decls = program->outputs; - t->num_output_decls = program->num_outputs; - - /* Emit constants and uniforms. TGSI uses a single index space for these, - * so we put all the translated regs in t->constants. - */ - if (proginfo->Parameters) { - t->constants = (struct ureg_src *) - calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0])); - if (t->constants == NULL) { - ret = PIPE_ERROR_OUT_OF_MEMORY; - goto out; - } - t->num_constants = proginfo->Parameters->NumParameters; - - for (i = 0; i < proginfo->Parameters->NumParameters; i++) { - unsigned pvo = proginfo->Parameters->Parameters[i].ValueOffset; - - switch (proginfo->Parameters->Parameters[i].Type) { - case PROGRAM_STATE_VAR: - case PROGRAM_UNIFORM: - t->constants[i] = ureg_DECL_constant(ureg, i); - break; - - /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect - * addressing of the const buffer. - * FIXME: Be smarter and recognize param arrays: - * indirect addressing is only valid within the referenced - * array. - */ - case PROGRAM_CONSTANT: - if (program->indirect_addr_consts) - t->constants[i] = ureg_DECL_constant(ureg, i); - else - t->constants[i] = emit_immediate(t, - proginfo->Parameters->ParameterValues + pvo, - proginfo->Parameters->Parameters[i].DataType, - 4); - break; - default: - break; - } - } - } - - for (i = 0; i < proginfo->info.num_ubos; i++) { - unsigned size = proginfo->sh.UniformBlocks[i]->UniformBufferSize; - unsigned num_const_vecs = (size + 15) / 16; - unsigned first, last; - assert(num_const_vecs > 0); - first = 0; - last = num_const_vecs > 0 ? num_const_vecs - 1 : 0; - ureg_DECL_constant2D(t->ureg, first, last, i + 1); - } - - /* Emit immediate values. - */ - t->immediates = (struct ureg_src *) - calloc(program->num_immediates, sizeof(struct ureg_src)); - if (t->immediates == NULL) { - ret = PIPE_ERROR_OUT_OF_MEMORY; - goto out; - } - t->num_immediates = program->num_immediates; - - i = 0; - foreach_in_list(immediate_storage, imm, &program->immediates) { - assert(i < program->num_immediates); - t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size32); - } - assert(i == program->num_immediates); - - /* texture samplers */ - for (i = 0; i < prog_const->MaxTextureImageUnits; i++) { - if (program->samplers_used & (1u << i)) { - enum tgsi_return_type type = - st_translate_texture_type(program->sampler_types[i]); - - t->samplers[i] = ureg_DECL_sampler(ureg, i); - - ureg_DECL_sampler_view(ureg, i, program->sampler_targets[i], - type, type, type, type); - } - } - - /* Declare atomic and shader storage buffers. */ - { - struct gl_program *prog = program->prog; - - if (!st_context(ctx)->has_hw_atomics) { - for (i = 0; i < prog->info.num_abos; i++) { - unsigned index = (prog->info.num_ssbos + - prog->sh.AtomicBuffers[i]->Binding); - assert(prog->sh.AtomicBuffers[i]->Binding < - prog_const->MaxAtomicBuffers); - t->buffers[index] = ureg_DECL_buffer(ureg, index, true); - } - } else { - for (i = 0; i < program->num_atomics; i++) { - struct hwatomic_decl *ainfo = &program->atomic_info[i]; - gl_uniform_storage *uni_storage = &prog->sh.data->UniformStorage[ainfo->location]; - int base = uni_storage->offset / ATOMIC_COUNTER_SIZE; - ureg_DECL_hw_atomic(ureg, base, base + ainfo->size - 1, ainfo->binding, - ainfo->array_id); - } - } - - assert(prog->info.num_ssbos <= prog_const->MaxShaderStorageBlocks); - for (i = 0; i < prog->info.num_ssbos; i++) { - t->buffers[i] = ureg_DECL_buffer(ureg, i, false); - } - } - - if (program->use_shared_memory) - t->shared_memory = ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED); - - for (i = 0; i < program->shader->Program->info.num_images; i++) { - if (program->images_used & (1 << i)) { - t->images[i] = ureg_DECL_image(ureg, i, - program->image_targets[i], - program->image_formats[i], - program->image_wr[i], - false); - } - } - - /* Emit each instruction in turn: - */ - foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) - compile_tgsi_instruction(t, inst); - -out: - if (t) { - free(t->arrays); - free(t->temps); - free(t->constants); - t->num_constants = 0; - free(t->immediates); - t->num_immediates = 0; - FREE(t); - } - - return ret; -} -/* ----------------------------- End TGSI code ------------------------------ */ - - -/** - * Convert a shader's GLSL IR into a Mesa gl_program, although without - * generating Mesa IR. - */ -static struct gl_program * -get_mesa_program_tgsi(struct gl_context *ctx, - struct gl_shader_program *shader_program, - struct gl_linked_shader *shader) -{ - glsl_to_tgsi_visitor* v; - struct gl_program *prog; - struct gl_shader_compiler_options *options = - &ctx->Const.ShaderCompilerOptions[shader->Stage]; - struct pipe_screen *pscreen = st_context(ctx)->screen; - enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(shader->Stage); - unsigned skip_merge_registers; - - validate_ir_tree(shader->ir); - - prog = shader->Program; - - prog->Parameters = _mesa_new_parameter_list(); - v = new glsl_to_tgsi_visitor(); - v->ctx = ctx; - v->prog = prog; - v->shader_program = shader_program; - v->shader = shader; - v->options = options; - v->native_integers = ctx->Const.NativeIntegers; - - v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget, - PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED); - v->have_fma = pscreen->get_shader_param(pscreen, ptarget, - PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED); - v->has_tex_txf_lz = pscreen->get_param(pscreen, - PIPE_CAP_TGSI_TEX_TXF_LZ); - - v->tg4_component_in_swizzle = pscreen->get_param(pscreen, PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE); - v->variables = _mesa_hash_table_create(v->mem_ctx, _mesa_hash_pointer, - _mesa_key_pointer_equal); - skip_merge_registers = - pscreen->get_shader_param(pscreen, ptarget, - PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS); - - generate_parameters_list_for_uniforms(ctx, shader_program, shader, - prog->Parameters); - - /* Remove reads from output registers. */ - if (!pscreen->get_param(pscreen, PIPE_CAP_SHADER_CAN_READ_OUTPUTS)) - lower_output_reads(shader->Stage, shader->ir); - - /* Emit intermediate IR for main(). */ - visit_exec_list(shader->ir, v); - -#if 0 - /* Print out some information (for debugging purposes) used by the - * optimization passes. */ - { - int i; - int *first_writes = ralloc_array(v->mem_ctx, int, v->next_temp); - int *first_reads = ralloc_array(v->mem_ctx, int, v->next_temp); - int *last_writes = ralloc_array(v->mem_ctx, int, v->next_temp); - int *last_reads = ralloc_array(v->mem_ctx, int, v->next_temp); - - for (i = 0; i < v->next_temp; i++) { - first_writes[i] = -1; - first_reads[i] = -1; - last_writes[i] = -1; - last_reads[i] = -1; - } - v->get_first_temp_read(first_reads); - v->get_last_temp_read_first_temp_write(last_reads, first_writes); - v->get_last_temp_write(last_writes); - for (i = 0; i < v->next_temp; i++) - printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i], - first_writes[i], - last_reads[i], - last_writes[i]); - ralloc_free(first_writes); - ralloc_free(first_reads); - ralloc_free(last_writes); - ralloc_free(last_reads); - } -#endif - - /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ - v->simplify_cmp(); - v->copy_propagate(); - - while (v->eliminate_dead_code()); - - v->merge_two_dsts(); - - if (!skip_merge_registers) { - v->split_arrays(); - v->copy_propagate(); - while (v->eliminate_dead_code()); - - v->merge_registers(); - v->copy_propagate(); - while (v->eliminate_dead_code()); - } - - v->renumber_registers(); - - /* Write the END instruction. */ - v->emit_asm(NULL, TGSI_OPCODE_END); - - if (ctx->_Shader->Flags & GLSL_DUMP) { - _mesa_log("\n"); - _mesa_log("GLSL IR for linked %s program %d:\n", - _mesa_shader_stage_to_string(shader->Stage), - shader_program->Name); - _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL); - _mesa_log("\n\n"); - } - - do_set_program_inouts(shader->ir, prog, shader->Stage); - - _mesa_copy_linked_program_data(shader_program, shader); - - if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_SKIP_SHRINK_IO_ARRAYS)) { - mark_array_io(v->inputs, v->num_inputs, - &prog->info.inputs_read, - prog->DualSlotInputs, - &prog->info.patch_inputs_read); - - mark_array_io(v->outputs, v->num_outputs, - &prog->info.outputs_written, 0ULL, - &prog->info.patch_outputs_written); - } else { - shrink_array_declarations(v->inputs, v->num_inputs, - &prog->info.inputs_read, - prog->DualSlotInputs, - &prog->info.patch_inputs_read); - shrink_array_declarations(v->outputs, v->num_outputs, - &prog->info.outputs_written, 0ULL, - &prog->info.patch_outputs_written); - } - - count_resources(v, prog); - - /* The GLSL IR won't be needed anymore. */ - ralloc_free(shader->ir); - shader->ir = NULL; - - /* This must be done before the uniform storage is associated. */ - if (shader->Stage == MESA_SHADER_FRAGMENT && - (prog->info.inputs_read & VARYING_BIT_POS || - BITSET_TEST(prog->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) || - BITSET_TEST(prog->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS))) { - static const gl_state_index16 wposTransformState[STATE_LENGTH] = { - STATE_FB_WPOS_Y_TRANSFORM - }; - - v->wpos_transform_const = _mesa_add_state_reference(prog->Parameters, - wposTransformState); - } - - /* Avoid reallocation of the program parameter list, because the uniform - * storage is only associated with the original parameter list. - * This should be enough for Bitmap and DrawPixels constants. - */ - _mesa_ensure_and_associate_uniform_storage(ctx, shader_program, prog, 8); - if (!shader_program->data->LinkStatus) { - free_glsl_to_tgsi_visitor(v); - _mesa_reference_program(ctx, &shader->Program, NULL); - return NULL; - } - - - prog->glsl_to_tgsi = v; - - PRINT_STATS(v->print_stats()); - - return prog; -} - -/* See if there are unsupported control flow statements. */ -class ir_control_flow_info_visitor : public ir_hierarchical_visitor { -private: - const struct gl_shader_compiler_options *options; -public: - ir_control_flow_info_visitor(const struct gl_shader_compiler_options *options) - : options(options), - unsupported(false) - { - } - - virtual ir_visitor_status visit_enter(ir_function *ir) - { - /* Other functions are skipped (same as glsl_to_tgsi). */ - if (strcmp(ir->name, "main") == 0) - return visit_continue; - - return visit_continue_with_parent; - } - - virtual ir_visitor_status visit_enter(ir_call *ir) - { - if (!ir->callee->is_intrinsic()) { - unsupported = true; /* it's a function call */ - return visit_stop; - } - return visit_continue; - } - - virtual ir_visitor_status visit_enter(ir_return *ir) - { - if (options->EmitNoMainReturn) { - unsupported = true; - return visit_stop; - } - return visit_continue; - } - - bool unsupported; -}; - -static bool -has_unsupported_control_flow(exec_list *ir, - const struct gl_shader_compiler_options *options) -{ - ir_control_flow_info_visitor visitor(options); - visit_list_elements(&visitor, ir); - return visitor.unsupported; -} - -/** - * Link a shader. - * This actually involves converting GLSL IR into an intermediate TGSI-like IR - * with code lowering and other optimizations. - */ -GLboolean -st_link_tgsi(struct gl_context *ctx, struct gl_shader_program *prog) -{ - struct pipe_screen *pscreen = st_context(ctx)->screen; - - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_linked_shader *shader = prog->_LinkedShaders[i]; - if (shader == NULL) - continue; - - exec_list *ir = shader->ir; - gl_shader_stage stage = shader->Stage; - enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(stage); - const struct gl_shader_compiler_options *options = - &ctx->Const.ShaderCompilerOptions[stage]; - - unsigned if_threshold = pscreen->get_shader_param(pscreen, ptarget, - PIPE_SHADER_CAP_LOWER_IF_THRESHOLD); - if (ctx->Const.GLSLOptimizeConservatively) { - /* Do it once and repeat only if there's unsupported control flow. */ - do { - do_common_optimization(ir, true, true, options, - ctx->Const.NativeIntegers); - lower_if_to_cond_assign((gl_shader_stage)i, ir, - options->MaxIfDepth, if_threshold); - } while (has_unsupported_control_flow(ir, options)); - } else { - /* Repeat it until it stops making changes. */ - bool progress; - do { - progress = do_common_optimization(ir, true, true, options, - ctx->Const.NativeIntegers); - progress |= lower_if_to_cond_assign((gl_shader_stage)i, ir, - options->MaxIfDepth, if_threshold); - } while (progress); - } - - /* Do this again to lower ir_binop_vector_extract introduced - * by optimization passes. - */ - do_vec_index_to_cond_assign(ir); - - validate_ir_tree(ir); - - struct gl_program *linked_prog = - get_mesa_program_tgsi(ctx, prog, shader); - st_set_prog_affected_state_flags(linked_prog); - - if (linked_prog) { - /* This is really conservative: */ - linked_prog->info.writes_memory = - linked_prog->info.num_ssbos || - linked_prog->info.num_images || - ctx->Extensions.ARB_bindless_texture || - (linked_prog->sh.LinkedTransformFeedback && - linked_prog->sh.LinkedTransformFeedback->NumVarying); - - if (!st_program_string_notify(ctx, - _mesa_shader_stage_to_program(i), - linked_prog)) { - _mesa_reference_program(ctx, &shader->Program, NULL); - return GL_FALSE; - } - } - } - - return GL_TRUE; -} diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h deleted file mode 100644 index d87571eb028..00000000000 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * Copyright © 2011 Bryan Cain - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef ST_GLSL_TO_TGSI_H -#define ST_GLSL_TO_TGSI_H - -#include "main/glheader.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct gl_context; -struct gl_shader; -struct gl_shader_program; -struct glsl_to_tgsi_visitor; -struct ureg_program; - -enum pipe_error st_translate_program( - struct gl_context *ctx, - enum pipe_shader_type procType, - struct ureg_program *ureg, - struct glsl_to_tgsi_visitor *program, - const struct gl_program *proginfo, - GLuint numInputs, - const ubyte inputMapping[], - const ubyte inputSlotToAttr[], - const ubyte inputSemanticName[], - const ubyte inputSemanticIndex[], - const ubyte interpMode[], - GLuint numOutputs, - const ubyte outputMapping[], - const ubyte outputSemanticName[], - const ubyte outputSemanticIndex[]); - -void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v); - -GLboolean -st_link_tgsi(struct gl_context *ctx, struct gl_shader_program *prog); - -enum tgsi_semantic -_mesa_sysval_to_semantic(unsigned sysval); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_array_merge.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi_array_merge.cpp deleted file mode 100644 index e54bb7b9f4d..00000000000 --- a/src/mesa/state_tracker/st_glsl_to_tgsi_array_merge.cpp +++ /dev/null @@ -1,698 +0,0 @@ -/* - * Copyright © 2017 Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/* A short overview on how the array merging works: - * - * Inputs: - * - per array information: live range, access mask, size - * - the program - * - * Output: - * - the program with updated array addressing - * - * Pseudo algorithm: - * - * repeat - * for all pairs of arrays: - * if they have non-overlapping live ranges and equal access masks: - * - pick shorter array - * - merge its live range into the longer array - * - set its merge target array to the longer array - * - mark the shorter array as processed - * - * for all pairs of arrays: - * if they have overlapping live ranges use in sum at most four components: - * - pick shorter array - * - evaluate reswizzle map to move its components into the components - * that are not used by the longer array - * - set its merge target array to the longer array - * - mark the shorter array as processed - * - bail out loop - * until no more successfull merges were found - * - * for all pairs of arrays: - * if they have non-overlapping live ranges: - * - pick shorter array - * - merge its live range into the longer array - * - set its merge target array to the longer array - * - mark the shorter array as processed - * - * Finalize remapping map so that target arrays are always final, i.e. have - * themselfes no merge target set. - * - * Example: - * ID | Length | Live range | access mask | target id | reswizzle - * ================================================================ - * 1 3 3-10 x___ 0 ____ - * 2 4 13-20 x___ 0 ____ - * 3 8 3-20 x___ 0 ____ - * 4 6 21-40 xy__ 0 ____ - * 5 7 12-30 xy__ 0 ____ - * - * 1. merge live ranges 1 and 2 - * - * ID | Length | Live range | access mask | target id | reswizzle - * ================================================================ - * 1 - - x___ 2 ____ - * 2 4 3-20 x___ 0 ____ - * 3 8 3-20 x___ 0 ____ - * 4 6 21-40 xy__ 0 ____ - * 5 7 12-30 xy__ 0 ____ - * - * - * 3. interleave 2 and 3 - * - * ID | Length | Live range | access mask | target id | reswizzle - * ================================================================ - * 1 - - x___ 2 ____ - * 2 - - x___ 3 _x__ - * 3 8 3-20 xy__ 0 ____ - * 4 6 21-40 xy__ 0 ____ - * 5 7 12-30 xy__ 0 ____ - * - * 3. merge live ranges 3 and 4 - * - * ID | Length | Live range | access mask | target id | reswizzle - * ================================================================ - * 1 - - x___ 2 ____ - * 2 - - x___ 3 _x__ - * 3 8 3-40 xy__ 0 ____ - * 4 - - xy__ 3 ____ - * 5 7 3-21 xy__ 0 ____ - * - * 4. interleave 3 and 5 - * - * ID | Length | Live range | access mask | target id | reswizzle - * ================================================================ - * 1 - - x___ 2 ____ - * 2 - - x___ 3 _x__ - * 3 8 3-40 xy__ 0 ____ - * 4 - - xy__ 3 ____ - * 5 - - xy__ 3 __xy - * - * 5. finalize remapping - * (Array 1 has been merged with 2 that was later interleaved, so - * the reswizzeling must be propagated. - * - * ID | Length | Live range | new access mask | target id | reswizzle - * ================================================================ - * 1 - - _y__ 3 _x__ - * 2 - - _y__ 3 _x__ - * 3 8 3-40 xy__ 0 ____ - * 4 - - xy__ 3 ____ - * 5 - - __zw 3 __xy - * -*/ - -#include "program/prog_instruction.h" -#include "util/u_math.h" -#include -#include -#include - -#include - -#include "st_glsl_to_tgsi_array_merge.h" - -#if __cplusplus >= 201402L -#include -using std::unique_ptr; -using std::make_unique; -#endif - -#define ARRAY_MERGE_DEBUG 0 - -#if ARRAY_MERGE_DEBUG > 0 -#define ARRAY_MERGE_DUMP(x) do std::cerr << x; while (0) -#define ARRAY_MERGE_DUMP_BLOCK(x) do { x } while (0) -#else -#define ARRAY_MERGE_DUMP(x) -#define ARRAY_MERGE_DUMP_BLOCK(x) -#endif - -static const char xyzw[] = "xyzw"; - -array_live_range::array_live_range(): - id(0), - length(0), - first_access(0), - last_access(0), - component_access_mask(0), - used_component_count(0), - target_array(nullptr) -{ - init_swizzles(); -} - -array_live_range::array_live_range(unsigned aid, unsigned alength): - id(aid), - length(alength), - first_access(0), - last_access(0), - component_access_mask(0), - used_component_count(0), - target_array(nullptr) -{ - init_swizzles(); -} - -array_live_range::array_live_range(unsigned aid, unsigned alength, int begin, - int end, int sw): - id(aid), - length(alength), - first_access(begin), - last_access(end), - component_access_mask(sw), - used_component_count(util_bitcount(sw)), - target_array(nullptr) -{ - init_swizzles(); -} - -void array_live_range::init_swizzles() -{ - for (int i = 0; i < 4; ++i) - swizzle_map[i] = i; -} - -void array_live_range::set_live_range(int _begin, int _end) -{ - set_begin(_begin); - set_end(_end); -} - -void array_live_range::set_access_mask(int mask) -{ - component_access_mask = mask; - used_component_count = util_bitcount(mask); -} - -void array_live_range::merge(array_live_range *a, array_live_range *b) -{ - if (a->array_length() < b->array_length()) - b->merge_live_range_from(a); - else - a->merge_live_range_from(b); -} - -void array_live_range::interleave(array_live_range *a, array_live_range *b) -{ - if (a->array_length() < b->array_length()) - a->interleave_into(b); - else - b->interleave_into(a); -} - -void array_live_range::interleave_into(array_live_range *other) -{ - for (int i = 0; i < 4; ++i) { - swizzle_map[i] = -1; - } - - int trgt_access_mask = other->access_mask(); - int summary_access_mask = trgt_access_mask; - int src_swizzle_bit = 1; - int next_free_swizzle_bit = 1; - int k = 0; - unsigned i; - unsigned last_src_bit = util_last_bit(component_access_mask); - - for (i = 0; i <= last_src_bit ; ++i, src_swizzle_bit <<= 1) { - - /* Jump over empty src component slots (e.g. x__w). This is just a - * safety measure and it is tested for, but it is very likely that the - * emitted code always uses slots staring from x without leaving holes - * (i.e. always xy__ not x_z_ or _yz_ etc). - */ - if (!(src_swizzle_bit & component_access_mask)) - continue; - - /* Find the next free access slot in the target. */ - while ((trgt_access_mask & next_free_swizzle_bit) && - k < 4) { - next_free_swizzle_bit <<= 1; - ++k; - } - assert(k < 4 && - "Interleaved array would have more then four components"); - - /* Set the mapping for this component. */ - swizzle_map[i] = k; - trgt_access_mask |= next_free_swizzle_bit; - - /* Update the joined access mask if we didn't just fill the mapping.*/ - if (src_swizzle_bit & component_access_mask) - summary_access_mask |= next_free_swizzle_bit; - } - - other->set_access_mask(summary_access_mask); - other->merge_live_range_from(this); - - ARRAY_MERGE_DUMP_BLOCK( - std::cerr << "Interleave " << id << " into " << other->id << ", swz:"; - for (unsigned i = 0; i < 4; ++i) { - std::cerr << ((swizzle_map[i] >= 0) ? xyzw[swizzle_map[i]] : '_'); - } - std::cerr << '\n'; - ); -} - -void array_live_range::merge_live_range_from(array_live_range *other) -{ - other->set_target(this); - if (other->begin() < first_access) - first_access = other->begin(); - if (other->end() > last_access) - last_access = other->end(); -} - -int8_t array_live_range::remap_one_swizzle(int8_t idx) const -{ - // needs testing - if (target_array) { - idx = swizzle_map[idx]; - if (idx >= 0) - idx = target_array->remap_one_swizzle(idx); - } - return idx; -} - -void array_live_range::set_target(array_live_range *target) -{ - target_array = target; -} - -void array_live_range::print(std::ostream& os) const -{ - os << "[id:" << id - << ", length:" << length - << ", (b:" << first_access - << ", e:" << last_access - << "), sw:" << (int)component_access_mask - << ", nc:" << (int)used_component_count - << "]"; -} - -bool array_live_range::time_doesnt_overlap(const array_live_range& other) const -{ - return (other.last_access < first_access || - last_access < other.first_access); -} - -namespace tgsi_array_merge { - -array_remapping::array_remapping(): - target_id(0) -{ - for (int i = 0; i < 4; ++i) { - read_swizzle_map[i] = i; - } -} - -array_remapping::array_remapping(int trgt_array_id, const int8_t swizzle[]): - target_id(trgt_array_id) -{ - for (int i = 0; i < 4; ++i) { - read_swizzle_map[i] = swizzle[i]; - } -} - -void array_remapping::init_from(const array_live_range& range) -{ - target_id = range.is_mapped() ? range.final_target()->array_id(): 0; - for (int i = 0; i < 4; ++i) - read_swizzle_map[i] = range.remap_one_swizzle(i); -} - - -int array_remapping::map_writemask(int write_mask) const -{ - assert(is_valid()); - int result_write_mask = 0; - for (int i = 0; i < 4; ++i) { - if (1 << i & write_mask) { - assert(read_swizzle_map[i] >= 0); - result_write_mask |= 1 << read_swizzle_map[i]; - } - } - return result_write_mask; -} - -uint16_t array_remapping::move_read_swizzles(uint16_t original_swizzle) const -{ - assert(is_valid()); - /* Since - * - * dst.zw = src.xy in glsl actually is MOV dst.__zw src.__xy - * - * when interleaving the arrays the source swizzles must be moved - * according to the changed dst write mask. - */ - uint16_t out_swizzle = 0; - for (int idx = 0; idx < 4; ++idx) { - uint16_t orig_swz = GET_SWZ(original_swizzle, idx); - int new_idx = read_swizzle_map[idx]; - if (new_idx >= 0) - out_swizzle |= orig_swz << 3 * new_idx; - } - return out_swizzle; -} - -uint16_t array_remapping::map_swizzles(uint16_t old_swizzle) const -{ - uint16_t out_swizzle = 0; - for (int idx = 0; idx < 4; ++idx) { - uint16_t swz = read_swizzle_map[GET_SWZ(old_swizzle, idx)]; - out_swizzle |= swz << 3 * idx; - } - return out_swizzle; -} - -void array_remapping::print(std::ostream& os) const -{ - if (is_valid()) { - os << "[aid: " << target_id << " swz: "; - for (int i = 0; i < 4; ++i) - os << (read_swizzle_map[i] >= 0 ? xyzw[read_swizzle_map[i]] : '_'); - os << "]"; - } else { - os << "[unused]"; - } -} - -/* Required by the unit tests */ -bool operator == (const array_remapping& lhs, const array_remapping& rhs) -{ - if (lhs.target_id != rhs.target_id) - return false; - - if (lhs.target_id == 0) - return true; - - for (int i = 0; i < 4; ++i) { - if (lhs.read_swizzle_map[i] != rhs.read_swizzle_map[i]) - return false; - } - return true; -} - -static -bool sort_by_begin(const array_live_range& lhs, const array_live_range& rhs) { - return lhs.begin() < rhs.begin(); -} - -/* Helper class to evaluate merging and interleaving of arrays */ -class array_merge_evaluator { -public: - typedef int (*array_merger)(array_live_range& range_1, - array_live_range& range_2); - - array_merge_evaluator(int _narrays, array_live_range *_ranges, - bool _restart); - - /** Run the merge strategy on all arrays - * @returns number of successfull merges - */ - int run(); - -private: - virtual int do_run(array_live_range& range_1, array_live_range& range_2) = 0; - - int narrays; - array_live_range *ranges; - bool restart; -}; - -array_merge_evaluator::array_merge_evaluator(int _narrays, - array_live_range *_ranges, - bool _restart): - narrays(_narrays), - ranges(_ranges), - restart(_restart) -{ -} - -int array_merge_evaluator::run() -{ - int remaps = 0; - - for (int i = 0; i < narrays; ++i) { - if (ranges[i].is_mapped()) - continue; - - for (int j = i + 1; j < narrays; ++j) { - if (!ranges[j].is_mapped()) { - ARRAY_MERGE_DUMP("try merge " << i << " id:" << ranges[i].array_id() - << " and " << j << " id: "<< ranges[j].array_id() - << "\n"); - int n = do_run(ranges[i], ranges[j]); - if (restart && n) - return n; - remaps += n; - } - } - } - return remaps; -} - -/* Merge live ranges if possible at all */ -class merge_live_range_always: public array_merge_evaluator { -public: - merge_live_range_always(int _narrays, array_live_range *_ranges): - array_merge_evaluator(_narrays, _ranges, false) { - } -protected: - int do_run(array_live_range& range_1, array_live_range& range_2){ - if (range_2.time_doesnt_overlap(range_1)) { - ARRAY_MERGE_DUMP("merge " << range_2 << " into " << range_1 << "\n"); - array_live_range::merge(&range_1,&range_2); - return 1; - } - return 0; - } -}; - -/* Merge live ranges only if they use the same swizzle */ -class merge_live_range_equal_swizzle: public merge_live_range_always { -public: - merge_live_range_equal_swizzle(int _narrays, array_live_range *_ranges): - merge_live_range_always(_narrays, _ranges) { - } -private: - int do_run(array_live_range& range_1, array_live_range& range_2){ - if (range_1.access_mask() == range_2.access_mask()) { - return merge_live_range_always::do_run(range_1, range_2); - } - return 0; - } -}; - -/* Interleave arrays if possible */ -class interleave_live_range: public array_merge_evaluator { -public: - interleave_live_range(int _narrays, array_live_range *_ranges): - array_merge_evaluator(_narrays, _ranges, true) { - } -private: - int do_run(array_live_range& range_1, array_live_range& range_2){ - if ((range_2.used_components() + range_1.used_components() <= 4) && - !range_1.time_doesnt_overlap(range_2)) { - ARRAY_MERGE_DUMP("Interleave " << range_2 << " into " << range_1 << "\n"); - array_live_range::interleave(&range_1, &range_2); - return 1; - } - return 0; - } -}; - -/* Estimate the array merging: First in a loop, arrays with equal access mask - * are merged, then interleave arrays that together use at most four components, - * and have overlapping live ranges. Finally arrays are merged regardless of - * access mask. - * @param[in] narrays number of arrays - * @param[in,out] alt array life times, the merge target life time will be - * updated with the new life time. - * @param[in,out] remapping track the arraay index remapping and reswizzeling. - * @returns number of merged arrays - */ -bool get_array_remapping(int narrays, array_live_range *ranges, - array_remapping *remapping) -{ - int total_remapped = 0; - int n_remapped; - - /* Sort by "begin of live range" so that we don't have to restart searching - * after every merge. - */ - std::sort(ranges, ranges + narrays, sort_by_begin); - merge_live_range_equal_swizzle merge_evaluator_es(narrays, ranges); - interleave_live_range interleave_lr(narrays, ranges); - do { - - n_remapped = merge_evaluator_es.run(); - - /* try only one array interleave, if successfull, another - * live_range merge is tried. The test MergeAndInterleave5 - * (mesa/st/tests/test_glsl_to_tgsi_array_merge.cpp) - * shows that this can result in more arrays being merged/interleaved. - */ - n_remapped += interleave_lr.run(); - total_remapped += n_remapped; - - ARRAY_MERGE_DUMP("Remapped " << n_remapped << " arrays\n"); - } while (n_remapped > 0); - - total_remapped += merge_live_range_always(narrays, ranges).run(); - ARRAY_MERGE_DUMP("Remapped a total of " << total_remapped << " arrays\n"); - - /* Resolve the remapping chain */ - for (int i = 1; i <= narrays; ++i) { - ARRAY_MERGE_DUMP("Map " << i << ":"); - remapping[ranges[i-1].array_id()].init_from(ranges[i-1]); - } - return total_remapped > 0; -} - -/* Remap the arrays in a TGSI program according to the given mapping. - * @param narrays number of arrays - * @param array_sizes array of arrays sizes - * @param map the array remapping information - * @param instructions TGSI program - * @returns number of arrays after remapping - */ -int remap_arrays(int narrays, unsigned *array_sizes, - exec_list *instructions, - array_remapping *map) -{ - /* re-calculate arrays */ -#if __cplusplus < 201402L - int *idx_map = new int[narrays + 1]; - unsigned *old_sizes = new unsigned[narrays]; -#else - unique_ptr idx_map = make_unique(narrays + 1); - unique_ptr old_sizes = make_unique(narrays); -#endif - - memcpy(&old_sizes[0], &array_sizes[0], sizeof(unsigned) * narrays); - - /* Evaluate mapping for the array indices and update array sizes */ - int new_narrays = 0; - for (int i = 1; i <= narrays; ++i) { - if (!map[i].is_valid()) { - ++new_narrays; - array_sizes[new_narrays-1] = old_sizes[i-1]; - idx_map[i] = new_narrays; - } - } - - /* Map the array ids of merged arrays. */ - for (int i = 1; i <= narrays; ++i) { - if (map[i].is_valid()) { - map[i].set_target_id(idx_map[map[i].target_array_id()]); - } - } - - /* Map the array ids of merge targets that got only renumbered. */ - for (int i = 1; i <= narrays; ++i) { - if (!map[i].is_valid()) { - map[i].set_target_id(idx_map[i]); - } - } - - /* Update the array ids and swizzles in the registers */ - foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) { - for (unsigned j = 0; j < num_inst_src_regs(inst); j++) { - st_src_reg& src = inst->src[j]; - if (src.file == PROGRAM_ARRAY && src.array_id > 0) { - array_remapping& m = map[src.array_id]; - if (m.is_valid()) { - src.array_id = m.target_array_id(); - src.swizzle = m.map_swizzles(src.swizzle); - } - } - } - for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) { - st_src_reg& src = inst->tex_offsets[j]; - if (src.file == PROGRAM_ARRAY && src.array_id > 0) { - array_remapping& m = map[src.array_id]; - if (m.is_valid()) { - src.array_id = m.target_array_id(); - src.swizzle = m.map_swizzles(src.swizzle); - } - } - } - for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) { - st_dst_reg& dst = inst->dst[j]; - if (dst.file == PROGRAM_ARRAY && dst.array_id > 0) { - array_remapping& m = map[dst.array_id]; - if (m.is_valid()) { - assert(j == 0 && - "remapping can only be done for single dest ops"); - dst.array_id = m.target_array_id(); - dst.writemask = m.map_writemask(dst.writemask); - - /* If the target component is moved, then the source swizzles - * must be moved accordingly. - */ - for (unsigned j = 0; j < num_inst_src_regs(inst); j++) { - st_src_reg& src = inst->src[j]; - src.swizzle = m.move_read_swizzles(src.swizzle); - } - } - } - } - st_src_reg& res = inst->resource; - if (res.file == PROGRAM_ARRAY && res.array_id > 0) { - array_remapping& m = map[res.array_id]; - if (m.is_valid()) { - res.array_id = m.target_array_id(); - res.swizzle = m.map_swizzles(res.swizzle); - } - } - } - -#if __cplusplus < 201402L - delete[] old_sizes; - delete[] idx_map; -#endif - - return new_narrays; -} - -} - -using namespace tgsi_array_merge; - -int merge_arrays(int narrays, - unsigned *array_sizes, - exec_list *instructions, - class array_live_range *arr_live_ranges) -{ - array_remapping *map= new array_remapping[narrays + 1]; - - if (get_array_remapping(narrays, arr_live_ranges, map)) - narrays = remap_arrays(narrays, array_sizes, instructions, map); - - delete[] map; - return narrays; -} \ No newline at end of file diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_array_merge.h b/src/mesa/state_tracker/st_glsl_to_tgsi_array_merge.h deleted file mode 100644 index 15738a817d3..00000000000 --- a/src/mesa/state_tracker/st_glsl_to_tgsi_array_merge.h +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright © 2017 Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef MESA_GLSL_TO_TGSI_ARRAY_MERGE_H -#define MESA_GLSL_TO_TGSI_ARRAY_MERGE_H - - -#include "st_glsl_to_tgsi_private.h" -#include - -/* Until mesa/st officialy requires c++11 */ -#if __cplusplus < 201103L -#define nullptr 0 -#endif - -/* Helper class to merge the live ranges of an arrays. - * - * For arrays the array length, live range, and component access needs to - * be kept, because when live ranges are merged or arrays are interleaved - * one can only merge or interleave an array into another with equal or more - * elements. For interleaving it is also required that the sum of used swizzles - * is at most four. - */ -class array_live_range { -public: - array_live_range(); - array_live_range(unsigned aid, unsigned alength); - array_live_range(unsigned aid, unsigned alength, int first_access, - int last_access, int mask); - - void set_live_range(int first_access, int last_access); - void set_begin(int _begin){first_access = _begin;} - void set_end(int _end){last_access = _end;} - void set_access_mask(int s); - - static void merge(array_live_range *a, array_live_range *b); - static void interleave(array_live_range *a, array_live_range *b); - - int array_id() const {return id;} - int target_array_id() const {return target_array ? target_array->id : 0;} - const array_live_range *final_target() const {return target_array ? - target_array->final_target() : this;} - unsigned array_length() const { return length;} - int begin() const { return first_access;} - int end() const { return last_access;} - int access_mask() const { return component_access_mask;} - int used_components() const {return used_component_count;} - - bool time_doesnt_overlap(const array_live_range& other) const; - - void print(std::ostream& os) const; - - bool is_mapped() const { return target_array != nullptr;} - - int8_t remap_one_swizzle(int8_t idx) const; - -private: - void init_swizzles(); - void set_target(array_live_range *target); - void merge_live_range_from(array_live_range *other); - void interleave_into(array_live_range *other); - - unsigned id; - unsigned length; - int first_access; - int last_access; - uint8_t component_access_mask; - uint8_t used_component_count; - array_live_range *target_array; - int8_t swizzle_map[4]; -}; - -inline -std::ostream& operator << (std::ostream& os, const array_live_range& lt) { - lt.print(os); - return os; -} - -namespace tgsi_array_merge { - -/* Helper class to apply array merge and interleav to the shader. - * The interface is exposed here to make unit tests possible. - */ -class array_remapping { -public: - - /** Create an invalid mapping that is used as place-holder for - * arrays that are not mapped at all. - */ - array_remapping(); - - /* Predefined remapping, needed for testing */ - array_remapping(int trgt_array_id, const int8_t swizzle[]); - - /* Initialiaze the mapping from an array_live_range that has been - * processed by the array merge and interleave algorithm. - */ - void init_from(const array_live_range& range); - - /* (Re)-set target id, needed when the mapping is resolved */ - void set_target_id(int tid) {target_id = tid;} - - /* Defines a valid remapping */ - bool is_valid() const {return target_id > 0;} - - /* Translates the write mask to the new, interleaved component - * position - */ - int map_writemask(int original_write_mask) const; - - /* Translates all read swizzles to the new, interleaved component - * swizzles - */ - uint16_t map_swizzles(uint16_t original_swizzle) const; - - /* Move the read swizzles to the positiones that correspond to - * a changed write mask. - */ - uint16_t move_read_swizzles(uint16_t original_swizzle) const; - - unsigned target_array_id() const {return target_id;} - - void print(std::ostream& os) const; - - friend bool operator == (const array_remapping& lhs, - const array_remapping& rhs); - -private: - - void interleave(int trgt_access_mask, int src_access_mask); - - unsigned target_id; - int8_t read_swizzle_map[4]; -}; - -inline -std::ostream& operator << (std::ostream& os, const array_remapping& am) -{ - am.print(os); - return os; -} - -/* Apply the array remapping (internal use, exposed here for testing) */ - bool get_array_remapping(int narrays, array_live_range *array_live_ranges, - array_remapping *remapping); - -/* Apply the array remapping (internal use, exposed here for testing) */ -int remap_arrays(int narrays, unsigned *array_sizes, - exec_list *instructions, - array_remapping *map); - -} - -/** Remap the array access to finalize the array merging and interleaving. - * @param[in] narrays number of input arrays, - * @param[in,out] array_sizes length array of input arrays, on output the - * array sizes will be updated according to the remapping, - * @param[in,out] instructions TGSI program, on output the arrays access is - * remapped to the new array layout, - * @param[in] array_live_ranges live ranges and access information of the - * arrays. - * @returns number of remaining arrays - */ -int merge_arrays(int narrays, - unsigned *array_sizes, - exec_list *instructions, - class array_live_range *arr_live_ranges); -#endif diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp deleted file mode 100644 index f259442cc37..00000000000 --- a/src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp +++ /dev/null @@ -1,423 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * Copyright © 2011 Bryan Cain - * Copyright © 2017 Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "st_glsl_to_tgsi_private.h" -#include "tgsi/tgsi_info.h" -#include "mesa/program/prog_instruction.h" -#include "mesa/program/prog_print.h" - -static int swizzle_for_type(const glsl_type *type, int component = 0) -{ - unsigned num_elements = 4; - - if (type) { - type = type->without_array(); - if (type->is_scalar() || type->is_vector() || type->is_matrix()) - num_elements = type->vector_elements; - } - - int swizzle = swizzle_for_size(num_elements); - assert(num_elements + component <= 4); - - swizzle += component * MAKE_SWIZZLE4(1, 1, 1, 1); - return swizzle; -} - -static st_src_reg * -dup_reladdr(const st_src_reg *input) -{ - if (!input) - return NULL; - - st_src_reg *reg = ralloc(input, st_src_reg); - if (!reg) { - assert(!"can't create reladdr, expect shader breakage"); - return NULL; - } - - *reg = *input; - return reg; -} - -st_src_reg::st_src_reg(gl_register_file file, int index, const glsl_type *type, - int component, unsigned array_id) -{ - assert(file != PROGRAM_ARRAY || array_id != 0); - this->file = file; - this->index = index; - this->swizzle = swizzle_for_type(type, component); - this->negate = 0; - this->abs = 0; - this->index2D = 0; - this->type = type ? type->base_type : GLSL_TYPE_ERROR; - this->reladdr = NULL; - this->reladdr2 = NULL; - this->has_index2 = false; - this->double_reg2 = false; - this->array_id = array_id; - this->is_double_vertex_input = false; -} - -st_src_reg::st_src_reg(gl_register_file file, int index, enum glsl_base_type type) -{ - assert(file != PROGRAM_ARRAY); /* need array_id > 0 */ - this->type = type; - this->file = file; - this->index = index; - this->index2D = 0; - this->swizzle = SWIZZLE_XYZW; - this->negate = 0; - this->abs = 0; - this->reladdr = NULL; - this->reladdr2 = NULL; - this->has_index2 = false; - this->double_reg2 = false; - this->array_id = 0; - this->is_double_vertex_input = false; -} - -st_src_reg::st_src_reg(gl_register_file file, int index, enum glsl_base_type type, int index2D) -{ - assert(file != PROGRAM_ARRAY); /* need array_id > 0 */ - this->type = type; - this->file = file; - this->index = index; - this->index2D = index2D; - this->swizzle = SWIZZLE_XYZW; - this->negate = 0; - this->abs = 0; - this->reladdr = NULL; - this->reladdr2 = NULL; - this->has_index2 = false; - this->double_reg2 = false; - this->array_id = 0; - this->is_double_vertex_input = false; -} - -void st_src_reg::reset() -{ - this->type = GLSL_TYPE_ERROR; - this->file = PROGRAM_UNDEFINED; - this->index = 0; - this->index2D = 0; - this->swizzle = 0; - this->negate = 0; - this->abs = 0; - this->reladdr = NULL; - this->reladdr2 = NULL; - this->has_index2 = false; - this->double_reg2 = false; - this->array_id = 0; - this->is_double_vertex_input = false; -} - -st_src_reg::st_src_reg() -{ - reset(); -} - -st_src_reg::st_src_reg(const st_src_reg ®) -{ - *this = reg; -} - -void st_src_reg::operator=(const st_src_reg ®) -{ - this->type = reg.type; - this->file = reg.file; - this->index = reg.index; - this->index2D = reg.index2D; - this->swizzle = reg.swizzle; - this->negate = reg.negate; - this->abs = reg.abs; - this->reladdr = dup_reladdr(reg.reladdr); - this->reladdr2 = dup_reladdr(reg.reladdr2); - this->has_index2 = reg.has_index2; - this->double_reg2 = reg.double_reg2; - this->array_id = reg.array_id; - this->is_double_vertex_input = reg.is_double_vertex_input; -} - -st_src_reg::st_src_reg(st_dst_reg reg) -{ - this->type = reg.type; - this->file = reg.file; - this->index = reg.index; - this->swizzle = SWIZZLE_XYZW; - this->negate = 0; - this->abs = 0; - this->reladdr = dup_reladdr(reg.reladdr); - this->index2D = reg.index2D; - this->reladdr2 = dup_reladdr(reg.reladdr2); - this->has_index2 = reg.has_index2; - this->double_reg2 = false; - this->array_id = reg.array_id; - this->is_double_vertex_input = false; -} - -st_src_reg st_src_reg::get_abs() -{ - st_src_reg reg = *this; - reg.negate = 0; - reg.abs = 1; - return reg; -} - -bool operator == (const st_src_reg& lhs, const st_src_reg& rhs) -{ - bool result; - - if (lhs.type != rhs.type || - lhs.file != rhs.file || - lhs.index != rhs.index || - lhs.swizzle != rhs.swizzle || - lhs.index2D != rhs.index2D || - lhs.has_index2 != rhs.has_index2 || - lhs.array_id != rhs.array_id || - lhs.negate != rhs.negate || - lhs.abs != rhs.abs || - lhs.double_reg2 != rhs.double_reg2 || - lhs.is_double_vertex_input != rhs.is_double_vertex_input) - return false; - - if (lhs.reladdr) { - if (!rhs.reladdr) - return false; - result = (*lhs.reladdr == *rhs.reladdr); - } else { - result = !rhs.reladdr; - } - - if (lhs.reladdr2) { - if (!rhs.reladdr2) - return false; - result &= (*lhs.reladdr2 == *rhs.reladdr2); - } else { - result &= !rhs.reladdr2; - } - - return result; -} - -static const char swz_txt[] = "xyzw"; - -std::ostream& operator << (std::ostream& os, const st_src_reg& reg) -{ - if (reg.negate) - os << "-"; - if (reg.abs) - os << "|"; - - os << _mesa_register_file_name(reg.file); - - if (reg.file == PROGRAM_ARRAY) { - os << "(" << reg.array_id << ")"; - } - if (reg.has_index2) { - os << "["; - if (reg.reladdr2) { - os << *reg.reladdr2; - } - os << "+" << reg.index2D << "]"; - } - os << "["; - if (reg.reladdr) { - os << *reg.reladdr; - } - os << reg.index << "]."; - for (int i = 0; i < 4; ++i) { - int swz = GET_SWZ(reg.swizzle, i); - if (swz < 4) - os << swz_txt[swz]; - else - os << "_"; - } - if (reg.abs) - os << "|"; - return os; -} - -st_dst_reg::st_dst_reg(st_src_reg reg) -{ - this->type = reg.type; - this->file = reg.file; - this->index = reg.index; - this->writemask = WRITEMASK_XYZW; - this->reladdr = dup_reladdr(reg.reladdr); - this->index2D = reg.index2D; - this->reladdr2 = dup_reladdr(reg.reladdr2); - this->has_index2 = reg.has_index2; - this->array_id = reg.array_id; -} - -st_dst_reg::st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type, int index) -{ - assert(file != PROGRAM_ARRAY); /* need array_id > 0 */ - this->file = file; - this->index = index; - this->index2D = 0; - this->writemask = writemask; - this->reladdr = NULL; - this->reladdr2 = NULL; - this->has_index2 = false; - this->type = type; - this->array_id = 0; -} - -st_dst_reg::st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type) -{ - assert(file != PROGRAM_ARRAY); /* need array_id > 0 */ - this->file = file; - this->index = 0; - this->index2D = 0; - this->writemask = writemask; - this->reladdr = NULL; - this->reladdr2 = NULL; - this->has_index2 = false; - this->type = type; - this->array_id = 0; -} - -st_dst_reg::st_dst_reg() -{ - this->type = GLSL_TYPE_ERROR; - this->file = PROGRAM_UNDEFINED; - this->index = 0; - this->index2D = 0; - this->writemask = 0; - this->reladdr = NULL; - this->reladdr2 = NULL; - this->has_index2 = false; - this->array_id = 0; -} - -st_dst_reg::st_dst_reg(const st_dst_reg ®) -{ - *this = reg; -} - -void st_dst_reg::operator=(const st_dst_reg ®) -{ - this->type = reg.type; - this->file = reg.file; - this->index = reg.index; - this->writemask = reg.writemask; - this->reladdr = dup_reladdr(reg.reladdr); - this->index2D = reg.index2D; - this->reladdr2 = dup_reladdr(reg.reladdr2); - this->has_index2 = reg.has_index2; - this->array_id = reg.array_id; -} - -bool operator == (const st_dst_reg& lhs, const st_dst_reg& rhs) -{ - bool result; - - if (lhs.type != rhs.type || - lhs.file != rhs.file || - lhs.index != rhs.index || - lhs.writemask != rhs.writemask || - lhs.index2D != rhs.index2D || - lhs.has_index2 != rhs.has_index2 || - lhs.array_id != rhs.array_id) - return false; - - if (lhs.reladdr) { - if (!rhs.reladdr) - return false; - result = (*lhs.reladdr == *rhs.reladdr); - } else { - result = !rhs.reladdr; - } - - if (lhs.reladdr2) { - if (!rhs.reladdr2) - return false; - result &= (*lhs.reladdr2 == *rhs.reladdr2); - } else { - result &= !rhs.reladdr2; - } - - return result; -} - -std::ostream& operator << (std::ostream& os, const st_dst_reg& reg) -{ - os << _mesa_register_file_name(reg.file); - if (reg.file == PROGRAM_ARRAY) { - os << "(" << reg.array_id << ")"; - } - if (reg.has_index2) { - os << "["; - if (reg.reladdr2) { - os << *reg.reladdr2; - } - os << "+" << reg.index2D << "]"; - } - os << "["; - if (reg.reladdr) { - os << *reg.reladdr; - } - os << reg.index << "]."; - for (int i = 0; i < 4; ++i) { - if (1 << i & reg.writemask) - os << swz_txt[i]; - else - os << "_"; - } - - return os; -} - -void glsl_to_tgsi_instruction::print(std::ostream& os) const -{ - os << tgsi_get_opcode_name(info->opcode) << " "; - - bool has_operators = false; - for (unsigned j = 0; j < num_inst_dst_regs(this); j++) { - has_operators = true; - if (j > 0) - os << ", "; - os << dst[j]; - } - - if (has_operators) - os << " := "; - - for (unsigned j = 0; j < num_inst_src_regs(this); j++) { - if (j > 0) - os << ", "; - os << src[j]; - } - - if (tex_offset_num_offset > 0) { - os << ", TEXOFS: "; - for (unsigned j = 0; j < tex_offset_num_offset; j++) { - if (j > 0) - os << ", "; - os << tex_offsets[j]; - } - } -} diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_private.h b/src/mesa/state_tracker/st_glsl_to_tgsi_private.h deleted file mode 100644 index 19dfa952e10..00000000000 --- a/src/mesa/state_tracker/st_glsl_to_tgsi_private.h +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * Copyright © 2011 Bryan Cain - * Copyright © 2017 Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef ST_GLSL_TO_TGSI_PRIVATE_H -#define ST_GLSL_TO_TGSI_PRIVATE_H - -#include "mesa/main/mtypes.h" -#include "program/prog_parameter.h" -#include "compiler/glsl_types.h" -#include "compiler/glsl/ir.h" -#include "tgsi/tgsi_info.h" -#include - -int swizzle_for_size(int size); - -class st_dst_reg; -/** - * This struct is a corresponding struct to TGSI ureg_src. - */ -class st_src_reg { -public: - st_src_reg(gl_register_file file, int index, const glsl_type *type, - int component = 0, unsigned array_id = 0); - - st_src_reg(gl_register_file file, int index, enum glsl_base_type type); - - st_src_reg(gl_register_file file, int index, enum glsl_base_type type, int index2D); - - st_src_reg(); - st_src_reg(const st_src_reg ®); - void operator=(const st_src_reg ®); - void reset(); - - explicit st_src_reg(st_dst_reg reg); - - st_src_reg get_abs(); - - int32_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ - int16_t index2D; - - uint16_t swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ - int negate:4; /**< NEGATE_XYZW mask from mesa */ - unsigned abs:1; - enum glsl_base_type type:6; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ - unsigned has_index2:1; - gl_register_file file:6; /**< PROGRAM_* from Mesa */ - /* - * Is this the second half of a double register pair? - * currently used for input mapping only. - */ - unsigned double_reg2:1; - unsigned is_double_vertex_input:1; - unsigned array_id:10; - /** Register index should be offset by the integer in this reg. */ - st_src_reg *reladdr; - st_src_reg *reladdr2; - - bool is_legal_tgsi_address_operand() const - { - /* 2D registers can't be used as an address operand, or if the address - * operand itself is a result of indirect addressing. - */ - return (type == GLSL_TYPE_INT || type == GLSL_TYPE_UINT) && - !has_index2 && !reladdr && !reladdr2; - } -}; - -bool operator == (const st_src_reg& lhs, const st_src_reg& rhs); - -std::ostream& operator << (std::ostream& os, const st_src_reg& reg); - -class st_dst_reg { -public: - st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type, int index); - - st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type); - - st_dst_reg(); - st_dst_reg(const st_dst_reg ®); - void operator=(const st_dst_reg ®); - - explicit st_dst_reg(st_src_reg reg); - - int32_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ - int16_t index2D; - gl_register_file file:6; /**< PROGRAM_* from Mesa */ - unsigned writemask:4; /**< Bitfield of WRITEMASK_[XYZW] */ - enum glsl_base_type type:6; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ - unsigned has_index2:1; - unsigned array_id:10; - - /** Register index should be offset by the integer in this reg. */ - st_src_reg *reladdr; - st_src_reg *reladdr2; -}; - -bool operator == (const st_dst_reg& lhs, const st_dst_reg& rhs); - -std::ostream& operator << (std::ostream& os, const st_dst_reg& reg); - - -class glsl_to_tgsi_instruction : public exec_node { -public: - DECLARE_RALLOC_CXX_OPERATORS(glsl_to_tgsi_instruction) - - st_dst_reg dst[2]; - st_src_reg src[4]; - st_src_reg resource; /**< sampler or buffer register */ - st_src_reg *tex_offsets; - - /** Pointer to the ir source this tree came fe02549fdrom for debugging */ - ir_instruction *ir; - - enum tgsi_opcode op:10; /**< TGSI opcode */ - unsigned precise:1; - unsigned saturate:1; - unsigned is_64bit_expanded:1; - unsigned sampler_base:5; - unsigned sampler_array_size:6; /**< 1-based size of sampler array, 1 if not array */ - gl_texture_index tex_target:5; - glsl_base_type tex_type:6; - unsigned tex_shadow:1; - enum pipe_format image_format:10; - unsigned tex_offset_num_offset:3; - unsigned dead_mask:4; /**< Used in dead code elimination */ - unsigned buffer_access:3; /**< bitmask of TGSI_MEMORY_x bits */ - unsigned read_only:1; - unsigned gather_component:2; /* 0, 1, 2, 3 */ - - const struct tgsi_opcode_info *info; - - void print(std::ostream& os) const; -}; - -inline std::ostream& -operator << (std::ostream& os, const glsl_to_tgsi_instruction& instr) -{ - instr.print(os); - return os; -} - -struct rename_reg_pair { - bool valid; - int new_reg; -}; - -inline static bool -is_resource_instruction(unsigned opcode) -{ - switch (opcode) { - case TGSI_OPCODE_RESQ: - case TGSI_OPCODE_LOAD: - case TGSI_OPCODE_ATOMUADD: - case TGSI_OPCODE_ATOMXCHG: - case TGSI_OPCODE_ATOMCAS: - case TGSI_OPCODE_ATOMAND: - case TGSI_OPCODE_ATOMOR: - case TGSI_OPCODE_ATOMXOR: - case TGSI_OPCODE_ATOMUMIN: - case TGSI_OPCODE_ATOMUMAX: - case TGSI_OPCODE_ATOMIMIN: - case TGSI_OPCODE_ATOMIMAX: - case TGSI_OPCODE_ATOMFADD: - case TGSI_OPCODE_ATOMINC_WRAP: - case TGSI_OPCODE_ATOMDEC_WRAP: - case TGSI_OPCODE_IMG2HND: - return true; - default: - return false; - } -} - -inline static unsigned -num_inst_dst_regs(const glsl_to_tgsi_instruction *op) -{ - return op->info->num_dst; -} - -inline static unsigned -num_inst_src_regs(const glsl_to_tgsi_instruction *op) -{ - return op->info->is_tex || is_resource_instruction(op->op) ? - op->info->num_src - 1 : op->info->num_src; -} -#endif diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp deleted file mode 100644 index 32cde9fc329..00000000000 --- a/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp +++ /dev/null @@ -1,1427 +0,0 @@ -/* - * Copyright © 2017 Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "st_glsl_to_tgsi_temprename.h" -#include "st_glsl_to_tgsi_array_merge.h" -#include "tgsi/tgsi_info.h" -#include "tgsi/tgsi_strings.h" -#include "program/prog_instruction.h" -#include "util/bitscan.h" -#include -#include - -/* std::sort is significantly faster than qsort */ -#define USE_STL_SORT -#ifdef USE_STL_SORT -#include -#endif - -#ifndef NDEBUG -#include -#include -#include "program/prog_print.h" -#include "util/debug.h" -using std::cerr; -using std::setw; -using std::ostream; -#endif - -/* If is included this is defined and clashes with - * std::numeric_limits<>::max() - */ -#ifdef max -#undef max -#endif - -using std::numeric_limits; - -/* Without c++11 define the nullptr for forward-compatibility - * and better readibility */ -#if __cplusplus < 201103L -#define nullptr 0 -#endif - -#ifndef NDEBUG -/* Prepare to make it possible to specify log file */ -static std::ostream& debug_log = cerr; - -/* Helper function to check whether we want to seen debugging output */ -static inline bool is_debug_enabled () -{ - static int debug_enabled = -1; - if (debug_enabled < 0) - debug_enabled = env_var_as_boolean("GLSL_TO_TGSI_RENAME_DEBUG", false); - return debug_enabled > 0; -} -#define RENAME_DEBUG(X) if (is_debug_enabled()) do { X; } while (false); -#else -#define RENAME_DEBUG(X) -#endif - -namespace { - -enum prog_scope_type { - outer_scope, /* Outer program scope */ - loop_body, /* Inside a loop */ - if_branch, /* Inside if branch */ - else_branch, /* Inside else branch */ - switch_body, /* Inside switch statmenet */ - switch_case_branch, /* Inside switch case statmenet */ - switch_default_branch, /* Inside switch default statmenet */ - undefined_scope -}; - -class prog_scope { -public: - prog_scope(prog_scope *parent, prog_scope_type type, int id, - int depth, int begin); - - prog_scope_type type() const; - prog_scope *parent() const; - int nesting_depth() const; - int id() const; - int end() const; - int begin() const; - int loop_break_line() const; - - const prog_scope *in_else_scope() const; - const prog_scope *in_ifelse_scope() const; - const prog_scope *in_parent_ifelse_scope() const; - const prog_scope *innermost_loop() const; - const prog_scope *outermost_loop() const; - const prog_scope *enclosing_conditional() const; - - bool is_loop() const; - bool is_in_loop() const; - bool is_switchcase_scope_in_loop() const; - bool is_conditional() const; - bool is_child_of(const prog_scope *scope) const; - bool is_child_of_ifelse_id_sibling(const prog_scope *scope) const; - - bool break_is_for_switchcase() const; - bool contains_range_of(const prog_scope& other) const; - - void set_end(int end); - void set_loop_break_line(int line); - -private: - prog_scope_type scope_type; - int scope_id; - int scope_nesting_depth; - int scope_begin; - int scope_end; - int break_loop_line; - prog_scope *parent_scope; -}; - -/* Some storage class to encapsulate the prog_scope (de-)allocations */ -class prog_scope_storage { -public: - prog_scope_storage(void *mem_ctx, int n); - ~prog_scope_storage(); - prog_scope * create(prog_scope *p, prog_scope_type type, int id, - int lvl, int s_begin); -private: - void *mem_ctx; - int current_slot; - prog_scope *storage; -}; - -/* Class to track the access to a component of a temporary register. */ - -class temp_comp_access { -public: - temp_comp_access(); - - void record_read(int line, prog_scope *scope); - void record_write(int line, prog_scope *scope); - register_live_range get_required_live_range(); -private: - void propagate_live_range_to_dominant_write_scope(); - bool conditional_ifelse_write_in_loop() const; - - void record_ifelse_write(const prog_scope& scope); - void record_if_write(const prog_scope& scope); - void record_else_write(const prog_scope& scope); - - prog_scope *last_read_scope; - prog_scope *first_read_scope; - prog_scope *first_write_scope; - - int first_write; - int last_read; - int last_write; - int first_read; - - /* This member variable tracks the current resolution of conditional writing - * to this temporary in IF/ELSE clauses. - * - * The initial value "conditionality_untouched" indicates that this - * temporary has not yet been written to within an if clause. - * - * A positive (other than "conditionality_untouched") number refers to the - * last loop id for which the write was resolved as unconditional. With each - * new loop this value will be overwitten by "conditionality_unresolved" - * on entering the first IF clause writing this temporary. - * - * The value "conditionality_unresolved" indicates that no resolution has - * been achieved so far. If the variable is set to this value at the end of - * the processing of the whole shader it also indicates a conditional write. - * - * The value "write_is_conditional" marks that the variable is written - * conditionally (i.e. not in all relevant IF/ELSE code path pairs) in at - * least one loop. - */ - int conditionality_in_loop_id; - - /* Helper constants to make the tracking code more readable. */ - static const int write_is_conditional = -1; - static const int conditionality_unresolved = 0; - static const int conditionality_untouched; - static const int write_is_unconditional; - - /* A bit field tracking the nexting levels of if-else clauses where the - * temporary has (so far) been written to in the if branch, but not in the - * else branch. - */ - unsigned int if_scope_write_flags; - - int next_ifelse_nesting_depth; - static const int supported_ifelse_nesting_depth = 32; - - /* Tracks the last if scope in which the temporary was written to - * without a write in the correspondig else branch. Is also used - * to track read-before-write in the according scope. - */ - const prog_scope *current_unpaired_if_write_scope; - - /* Flag to resolve read-before-write in the else scope. */ - bool was_written_in_current_else_scope; -}; - -const int -temp_comp_access::conditionality_untouched = numeric_limits::max(); - -const int -temp_comp_access::write_is_unconditional = numeric_limits::max() - 1; - -/* Class to track the access to all components of a temporary register. */ -class temp_access { -public: - temp_access(); - void record_read(int line, prog_scope *scope, int swizzle); - void record_write(int line, prog_scope *scope, int writemask); - register_live_range get_required_live_range(); -private: - void update_access_mask(int mask); - - temp_comp_access comp[4]; - int access_mask; - bool needs_component_tracking; -}; - -/* Class to track array access. - * Compared to the temporary tracking this is very simplified, mainly because - * with the likely indirect access one can not really establish access - * patterns for individual elements. Instead the life range evaluation is - * always for the whole array, handles only loops and the fact whether a - * value was accessed conditionally in a loop. - */ -class array_access { -public: - array_access(); - void record_access(int line, prog_scope *scope, int swizzle); - void get_required_live_range(array_live_range &lr); -private: - int first_access; - int last_access; - prog_scope *first_access_scope; - prog_scope *last_access_scope; - unsigned accumulated_swizzle:4; - int conditional_access_in_loop:1; -}; - -prog_scope_storage::prog_scope_storage(void *mc, int n): - mem_ctx(mc), - current_slot(0) -{ - storage = ralloc_array(mem_ctx, prog_scope, n); -} - -prog_scope_storage::~prog_scope_storage() -{ - ralloc_free(storage); -} - -prog_scope* -prog_scope_storage::create(prog_scope *p, prog_scope_type type, int id, - int lvl, int s_begin) -{ - storage[current_slot] = prog_scope(p, type, id, lvl, s_begin); - return &storage[current_slot++]; -} - -prog_scope::prog_scope(prog_scope *parent, prog_scope_type type, int id, - int depth, int scope_begin): - scope_type(type), - scope_id(id), - scope_nesting_depth(depth), - scope_begin(scope_begin), - scope_end(-1), - break_loop_line(numeric_limits::max()), - parent_scope(parent) -{ -} - -prog_scope_type prog_scope::type() const -{ - return scope_type; -} - -prog_scope *prog_scope::parent() const -{ - return parent_scope; -} - -int prog_scope::nesting_depth() const -{ - return scope_nesting_depth; -} - -bool prog_scope::is_loop() const -{ - return (scope_type == loop_body); -} - -bool prog_scope::is_in_loop() const -{ - if (scope_type == loop_body) - return true; - - if (parent_scope) - return parent_scope->is_in_loop(); - - return false; -} - -const prog_scope *prog_scope::innermost_loop() const -{ - if (scope_type == loop_body) - return this; - - if (parent_scope) - return parent_scope->innermost_loop(); - - return nullptr; -} - -const prog_scope *prog_scope::outermost_loop() const -{ - const prog_scope *loop = nullptr; - const prog_scope *p = this; - - do { - if (p->type() == loop_body) - loop = p; - p = p->parent(); - } while (p); - - return loop; -} - -bool prog_scope::is_child_of_ifelse_id_sibling(const prog_scope *scope) const -{ - const prog_scope *my_parent = in_parent_ifelse_scope(); - while (my_parent) { - /* is a direct child? */ - if (my_parent == scope) - return false; - /* is a child of the conditions sibling? */ - if (my_parent->id() == scope->id()) - return true; - my_parent = my_parent->in_parent_ifelse_scope(); - } - return false; -} - -bool prog_scope::is_child_of(const prog_scope *scope) const -{ - const prog_scope *my_parent = parent(); - while (my_parent) { - if (my_parent == scope) - return true; - my_parent = my_parent->parent(); - } - return false; -} - -const prog_scope *prog_scope::enclosing_conditional() const -{ - if (is_conditional()) - return this; - - if (parent_scope) - return parent_scope->enclosing_conditional(); - - return nullptr; -} - -bool prog_scope::contains_range_of(const prog_scope& other) const -{ - return (begin() <= other.begin()) && (end() >= other.end()); -} - -bool prog_scope::is_conditional() const -{ - return scope_type == if_branch || - scope_type == else_branch || - scope_type == switch_case_branch || - scope_type == switch_default_branch; -} - -const prog_scope *prog_scope::in_else_scope() const -{ - if (scope_type == else_branch) - return this; - - if (parent_scope) - return parent_scope->in_else_scope(); - - return nullptr; -} - -const prog_scope *prog_scope::in_parent_ifelse_scope() const -{ - if (parent_scope) - return parent_scope->in_ifelse_scope(); - else - return nullptr; -} - -const prog_scope *prog_scope::in_ifelse_scope() const -{ - if (scope_type == if_branch || - scope_type == else_branch) - return this; - - if (parent_scope) - return parent_scope->in_ifelse_scope(); - - return nullptr; -} - -bool prog_scope::is_switchcase_scope_in_loop() const -{ - return (scope_type == switch_case_branch || - scope_type == switch_default_branch) && - is_in_loop(); -} - -bool prog_scope::break_is_for_switchcase() const -{ - if (scope_type == loop_body) - return false; - - if (scope_type == switch_case_branch || - scope_type == switch_default_branch || - scope_type == switch_body) - return true; - - if (parent_scope) - return parent_scope->break_is_for_switchcase(); - - return false; -} - -int prog_scope::id() const -{ - return scope_id; -} - -int prog_scope::begin() const -{ - return scope_begin; -} - -int prog_scope::end() const -{ - return scope_end; -} - -void prog_scope::set_end(int end) -{ - if (scope_end == -1) - scope_end = end; -} - -void prog_scope::set_loop_break_line(int line) -{ - if (scope_type == loop_body) { - break_loop_line = MIN2(break_loop_line, line); - } else { - if (parent_scope) - parent()->set_loop_break_line(line); - } -} - -int prog_scope::loop_break_line() const -{ - return break_loop_line; -} - -temp_access::temp_access(): - access_mask(0), - needs_component_tracking(false) -{ -} - -void temp_access::update_access_mask(int mask) -{ - if (access_mask && access_mask != mask) - needs_component_tracking = true; - access_mask |= mask; -} - -void temp_access::record_write(int line, prog_scope *scope, int writemask) -{ - update_access_mask(writemask); - - if (writemask & WRITEMASK_X) - comp[0].record_write(line, scope); - if (writemask & WRITEMASK_Y) - comp[1].record_write(line, scope); - if (writemask & WRITEMASK_Z) - comp[2].record_write(line, scope); - if (writemask & WRITEMASK_W) - comp[3].record_write(line, scope); -} - -void temp_access::record_read(int line, prog_scope *scope, int readmask) -{ - update_access_mask(readmask); - - if (readmask & WRITEMASK_X) - comp[0].record_read(line, scope); - if (readmask & WRITEMASK_Y) - comp[1].record_read(line, scope); - if (readmask & WRITEMASK_Z) - comp[2].record_read(line, scope); - if (readmask & WRITEMASK_W) - comp[3].record_read(line, scope); -} - -array_access::array_access(): - first_access(-1), - last_access(-1), - first_access_scope(nullptr), - last_access_scope(nullptr), - accumulated_swizzle(0), - conditional_access_in_loop(false) -{ -} - -void array_access::record_access(int line, prog_scope *scope, int swizzle) -{ - if (!first_access_scope) { - first_access = line; - first_access_scope = scope; - } - last_access_scope = scope; - last_access = line; - accumulated_swizzle |= swizzle; - if (scope->in_ifelse_scope() && scope->innermost_loop()) - conditional_access_in_loop = true; -} - -void array_access::get_required_live_range(array_live_range& lr) -{ - RENAME_DEBUG(debug_log << "first_access_scope=" << first_access_scope << "\n"); - RENAME_DEBUG(debug_log << "last_access_scope=" << last_access_scope << "\n"); - - if (first_access_scope == last_access_scope) { - lr.set_live_range(first_access, last_access); - lr.set_access_mask(accumulated_swizzle); - return; - } - - const prog_scope *shared_scope = first_access_scope; - const prog_scope *other_scope = last_access_scope; - - assert(shared_scope); - RENAME_DEBUG(debug_log << "shared_scope=" << shared_scope << "\n"); - - if (conditional_access_in_loop) { - const prog_scope *help = shared_scope->outermost_loop(); - if (help) { - shared_scope = help; - } else { - help = other_scope->outermost_loop(); - if (help) - other_scope = help; - } - if (first_access > shared_scope->begin()) - first_access = shared_scope->begin(); - if (last_access < shared_scope->end()) - last_access = shared_scope->end(); - } - - /* See if any of the two is the parent of the other. */ - if (other_scope->contains_range_of(*shared_scope)) { - shared_scope = other_scope; - } else while (!shared_scope->contains_range_of(*other_scope)) { - assert(shared_scope->parent()); - if (shared_scope->type() == loop_body) { - if (last_access < shared_scope->end()) - last_access = shared_scope->end(); - } - shared_scope = shared_scope->parent(); - } - - while (shared_scope != other_scope) { - if (other_scope->type() == loop_body) { - if (last_access < other_scope->end()) - last_access = other_scope->end(); - } - other_scope = other_scope->parent(); - } - - lr.set_live_range(first_access, last_access); - lr.set_access_mask(accumulated_swizzle); -} - - -inline static register_live_range make_live_range(int b, int e) -{ - register_live_range lt; - lt.begin = b; - lt.end = e; - return lt; -} - -register_live_range temp_access::get_required_live_range() -{ - register_live_range result = make_live_range(-1, -1); - - unsigned mask = access_mask; - while (mask) { - unsigned chan = u_bit_scan(&mask); - register_live_range lt = comp[chan].get_required_live_range(); - - if (lt.begin >= 0) { - if ((result.begin < 0) || (result.begin > lt.begin)) - result.begin = lt.begin; - } - - if (lt.end > result.end) - result.end = lt.end; - - if (!needs_component_tracking) - break; - } - return result; -} - -temp_comp_access::temp_comp_access(): - last_read_scope(nullptr), - first_read_scope(nullptr), - first_write_scope(nullptr), - first_write(-1), - last_read(-1), - last_write(-1), - first_read(numeric_limits::max()), - conditionality_in_loop_id(conditionality_untouched), - if_scope_write_flags(0), - next_ifelse_nesting_depth(0), - current_unpaired_if_write_scope(nullptr), - was_written_in_current_else_scope(false) -{ -} - -void temp_comp_access::record_read(int line, prog_scope *scope) -{ - last_read_scope = scope; - last_read = line; - - if (first_read > line) { - first_read = line; - first_read_scope = scope; - } - - /* If the conditionality of the first write is already resolved then - * no further checks are required. - */ - if (conditionality_in_loop_id == write_is_unconditional || - conditionality_in_loop_id == write_is_conditional) - return; - - /* Check whether we are in a condition within a loop */ - const prog_scope *ifelse_scope = scope->in_ifelse_scope(); - const prog_scope *enclosing_loop; - if (ifelse_scope && (enclosing_loop = ifelse_scope->innermost_loop())) { - - /* If we have either not yet written to this register nor writes are - * resolved as unconditional in the enclosing loop then check whether - * we read before write in an IF/ELSE branch. - */ - if ((conditionality_in_loop_id != write_is_conditional) && - (conditionality_in_loop_id != enclosing_loop->id())) { - - if (current_unpaired_if_write_scope) { - - /* Has been written in this or a parent scope? - this makes the temporary - * unconditionally set at this point. - */ - if (scope->is_child_of(current_unpaired_if_write_scope)) - return; - - /* Has been written in the same scope before it was read? */ - if (ifelse_scope->type() == if_branch) { - if (current_unpaired_if_write_scope->id() == scope->id()) - return; - } else { - if (was_written_in_current_else_scope) - return; - } - } - - /* The temporary was read (conditionally) before it is written, hence - * it should survive a loop. This can be signaled like if it were - * conditionally written. - */ - conditionality_in_loop_id = write_is_conditional; - } - } -} - -void temp_comp_access::record_write(int line, prog_scope *scope) -{ - last_write = line; - - if (first_write < 0) { - first_write = line; - first_write_scope = scope; - - /* If the first write we encounter is not in a conditional branch, or - * the conditional write is not within a loop, then this is to be - * considered an unconditional dominant write. - */ - const prog_scope *conditional = scope->enclosing_conditional(); - if (!conditional || !conditional->innermost_loop()) { - conditionality_in_loop_id = write_is_unconditional; - } - } - - /* The conditionality of the first write is already resolved. */ - if (conditionality_in_loop_id == write_is_unconditional || - conditionality_in_loop_id == write_is_conditional) - return; - - /* If the nesting depth is larger than the supported level, - * then we assume conditional writes. - */ - if (next_ifelse_nesting_depth >= supported_ifelse_nesting_depth) { - conditionality_in_loop_id = write_is_conditional; - return; - } - - /* If we are in an IF/ELSE scope within a loop and the loop has not - * been resolved already, then record this write. - */ - const prog_scope *ifelse_scope = scope->in_ifelse_scope(); - if (ifelse_scope && ifelse_scope->innermost_loop() && - ifelse_scope->innermost_loop()->id() != conditionality_in_loop_id) - record_ifelse_write(*ifelse_scope); -} - -void temp_comp_access::record_ifelse_write(const prog_scope& scope) -{ - if (scope.type() == if_branch) { - /* The first write in an IF branch within a loop implies unresolved - * conditionality (if it was untouched or unconditional before). - */ - conditionality_in_loop_id = conditionality_unresolved; - was_written_in_current_else_scope = false; - record_if_write(scope); - } else { - was_written_in_current_else_scope = true; - record_else_write(scope); - } -} - -void temp_comp_access::record_if_write(const prog_scope& scope) -{ - /* Don't record write if this IF scope if it ... - * - is not the first write in this IF scope, - * - has already been written in a parent IF scope. - * In both cases this write is a secondary write that doesn't contribute - * to resolve conditionality. - * - * Record the write if it - * - is the first one (obviously), - * - happens in an IF branch that is a child of the ELSE branch of the - * last active IF/ELSE pair. In this case recording this write is used to - * established whether the write is (un-)conditional in the scope enclosing - * this outer IF/ELSE pair. - */ - if (!current_unpaired_if_write_scope || - (current_unpaired_if_write_scope->id() != scope.id() && - scope.is_child_of_ifelse_id_sibling(current_unpaired_if_write_scope))) { - if_scope_write_flags |= 1 << next_ifelse_nesting_depth; - current_unpaired_if_write_scope = &scope; - next_ifelse_nesting_depth++; - } -} - -void temp_comp_access::record_else_write(const prog_scope& scope) -{ - int mask = 1 << (next_ifelse_nesting_depth - 1); - - /* If the temporary was written in an IF branch on the same scope level - * and this branch is the sibling of this ELSE branch, then we have a - * pair of writes that makes write access to this temporary unconditional - * in the enclosing scope. - */ - - if ((if_scope_write_flags & mask) && - (scope.id() == current_unpaired_if_write_scope->id())) { - --next_ifelse_nesting_depth; - if_scope_write_flags &= ~mask; - - /* The following code deals with propagating unconditionality from - * inner levels of nested IF/ELSE to the outer levels like in - * - * 1: var t; - * 2: if (a) { <- start scope A - * 3: if (b) - * 4: t = ... - * 5: else - * 6: t = ... - * 7: } else { <- start scope B - * 8: if (c) - * 9: t = ... - * A: else <- start scope C - * B: t = ... - * C: } - * - */ - - const prog_scope *parent_ifelse = scope.parent()->in_ifelse_scope(); - - if (1 << (next_ifelse_nesting_depth - 1) & if_scope_write_flags) { - /* We are at the end of scope C and already recorded a write - * within an IF scope (A), the sibling of the parent ELSE scope B, - * and it is not yet resolved. Mark that as the last relevant - * IF scope. Below the write will be resolved for the A/B - * scope pair. - */ - current_unpaired_if_write_scope = parent_ifelse; - } else { - current_unpaired_if_write_scope = nullptr; - } - /* Promote the first write scope to the enclosing scope because - * the current IF/ELSE pair is now irrelevant for the analysis. - * This is also required to evaluate the minimum life time for t in - * { - * var t; - * if (a) - * t = ... - * else - * t = ... - * x = t; - * ... - * } - */ - first_write_scope = scope.parent(); - - /* If some parent is IF/ELSE and in a loop then propagate the - * write to that scope. Otherwise the write is unconditional - * because it happens in both corresponding IF/ELSE branches - * in this loop, and hence, record the loop id to signal the - * resolution. - */ - if (parent_ifelse && parent_ifelse->is_in_loop()) { - record_ifelse_write(*parent_ifelse); - } else { - conditionality_in_loop_id = scope.innermost_loop()->id(); - } - } else { - /* The temporary was not written in the IF branch corresponding - * to this ELSE branch, hence the write is conditional. - */ - conditionality_in_loop_id = write_is_conditional; - } -} - -bool temp_comp_access::conditional_ifelse_write_in_loop() const -{ - return conditionality_in_loop_id <= conditionality_unresolved; -} - -void temp_comp_access::propagate_live_range_to_dominant_write_scope() -{ - first_write = first_write_scope->begin(); - int lr = first_write_scope->end(); - - if (last_read < lr) - last_read = lr; -} - -register_live_range temp_comp_access::get_required_live_range() -{ - bool keep_for_full_loop = false; - - /* This register component is not used at all, or only read, - * mark it as unused and ignore it when renaming. - * glsl_to_tgsi_visitor::renumber_registers will take care of - * eliminating registers that are not written to. - */ - if (last_write < 0) - return make_live_range(-1, -1); - - assert(first_write_scope); - - /* Only written to, just make sure the register component is not - * reused in the range it is used to write to - */ - if (!last_read_scope) - return make_live_range(first_write, last_write + 1); - - const prog_scope *enclosing_scope_first_read = first_read_scope; - const prog_scope *enclosing_scope_first_write = first_write_scope; - - /* We read before writing in a loop - * hence the value must survive the loops - */ - if ((first_read <= first_write) && - first_read_scope->is_in_loop()) { - keep_for_full_loop = true; - enclosing_scope_first_read = first_read_scope->outermost_loop(); - } - - /* A conditional write within a (nested) loop must survive the outermost - * loop if the last read was not within the same scope. - */ - const prog_scope *conditional = enclosing_scope_first_write->enclosing_conditional(); - if (conditional && !conditional->contains_range_of(*last_read_scope) && - (conditional->is_switchcase_scope_in_loop() || - conditional_ifelse_write_in_loop())) { - keep_for_full_loop = true; - enclosing_scope_first_write = conditional->outermost_loop(); - } - - /* Evaluate the scope that is shared by all: required first write scope, - * required first read before write scope, and last read scope. - */ - const prog_scope *enclosing_scope = enclosing_scope_first_read; - if (enclosing_scope_first_write->contains_range_of(*enclosing_scope)) - enclosing_scope = enclosing_scope_first_write; - - if (last_read_scope->contains_range_of(*enclosing_scope)) - enclosing_scope = last_read_scope; - - while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) || - !enclosing_scope->contains_range_of(*last_read_scope)) { - enclosing_scope = enclosing_scope->parent(); - assert(enclosing_scope); - } - - /* Propagate the last read scope to the target scope */ - while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) { - /* If the read is in a loop and we have to move up the scope we need to - * extend the live range to the end of this current loop because at this - * point we don't know whether the component was written before - * un-conditionally in the same loop. - */ - if (last_read_scope->is_loop()) - last_read = last_read_scope->end(); - - last_read_scope = last_read_scope->parent(); - } - - /* If the variable has to be kept for the whole loop, and we - * are currently in a loop, then propagate the live range. - */ - if (keep_for_full_loop && first_write_scope->is_loop()) - propagate_live_range_to_dominant_write_scope(); - - /* Propagate the first_dominant_write scope to the target scope */ - while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) { - /* Propagate live_range if there was a break in a loop and the write was - * after the break inside that loop. Note, that this is only needed if - * we move up in the scopes. - */ - if (first_write_scope->loop_break_line() < first_write) { - keep_for_full_loop = true; - propagate_live_range_to_dominant_write_scope(); - } - - first_write_scope = first_write_scope->parent(); - - /* Propagte live_range if we are now in a loop */ - if (keep_for_full_loop && first_write_scope->is_loop()) - propagate_live_range_to_dominant_write_scope(); - } - - /* The last write past the last read is dead code, but we have to - * ensure that the component is not reused too early, hence extend the - * live_range past the last write. - */ - if (last_write >= last_read) - last_read = last_write + 1; - - /* Here we are at the same scope, all is resolved */ - return make_live_range(first_write, last_read); -} - -/* Helper class for sorting and searching the registers based - * on live ranges. */ -class register_merge_record { -public: - int begin; - int end; - int reg; - bool erase; - - bool operator < (const register_merge_record& rhs) const { - return begin < rhs.begin; - } -}; - -class access_recorder { -public: - access_recorder(int _ntemps, int _narrays); - ~access_recorder(); - - void record_read(const st_src_reg& src, int line, prog_scope *scope); - void record_write(const st_dst_reg& src, int line, prog_scope *scope, - bool no_reswizzle); - - void get_required_live_ranges(register_live_range *register_live_ranges, - array_live_range *array_live_ranges); -private: - - int ntemps; - int narrays; - temp_access *temp_acc; - array_access *array_acc; -}; - -access_recorder::access_recorder(int _ntemps, int _narrays): - ntemps(_ntemps), - narrays(_narrays) -{ - temp_acc = new temp_access[ntemps]; - array_acc = new array_access[narrays]; -} - -access_recorder::~access_recorder() -{ - delete[] array_acc; - delete[] temp_acc; -} - -void access_recorder::record_read(const st_src_reg& src, int line, - prog_scope *scope) -{ - int readmask = 0; - for (int idx = 0; idx < 4; ++idx) { - int swz = GET_SWZ(src.swizzle, idx); - readmask |= (1 << swz) & 0xF; - } - - if (src.file == PROGRAM_TEMPORARY) - temp_acc[src.index].record_read(line, scope, readmask); - - if (src.file == PROGRAM_ARRAY) { - assert(src.array_id <= narrays); - array_acc[src.array_id - 1].record_access(line, scope, readmask); - } - - if (src.reladdr) - record_read(*src.reladdr, line, scope); - if (src.reladdr2) - record_read(*src.reladdr2, line, scope); -} - -void access_recorder::record_write(const st_dst_reg& dst, int line, - prog_scope *scope, bool can_reswizzle) -{ - if (dst.file == PROGRAM_TEMPORARY) - temp_acc[dst.index].record_write(line, scope, dst.writemask); - - if (dst.file == PROGRAM_ARRAY) { - assert(dst.array_id <= narrays); - - /* If the array is written as dst of a multi-dst operation, we must not - * reswizzle the access, because we would have to reswizzle also the - * other dst. For now just fill the mask to make interleaving impossible. - */ - array_acc[dst.array_id - 1].record_access(line, scope, - can_reswizzle ? dst.writemask: 0xF); - } - - if (dst.reladdr) - record_read(*dst.reladdr, line, scope); - if (dst.reladdr2) - record_read(*dst.reladdr2, line, scope); -} - -void access_recorder::get_required_live_ranges(struct register_live_range *register_live_ranges, - class array_live_range *array_live_ranges) -{ - RENAME_DEBUG(debug_log << "== register live ranges ==========\n"); - for(int i = 0; i < ntemps; ++i) { - RENAME_DEBUG(debug_log << setw(4) << i); - register_live_ranges[i] = temp_acc[i].get_required_live_range(); - RENAME_DEBUG(debug_log << ": [" << register_live_ranges[i].begin << ", " - << register_live_ranges[i].end << "]\n"); - } - RENAME_DEBUG(debug_log << "==================================\n\n"); - - RENAME_DEBUG(debug_log << "== array live ranges ==========\n"); - for(int i = 0; i < narrays; ++i) { - RENAME_DEBUG(debug_log<< setw(4) << i); - array_acc[i].get_required_live_range(array_live_ranges[i]); - RENAME_DEBUG(debug_log << ": [" <op == TGSI_OPCODE_BGNLOOP || - inst->op == TGSI_OPCODE_SWITCH || - inst->op == TGSI_OPCODE_CASE || - inst->op == TGSI_OPCODE_IF || - inst->op == TGSI_OPCODE_UIF || - inst->op == TGSI_OPCODE_ELSE || - inst->op == TGSI_OPCODE_DEFAULT) - ++n_scopes; - } - - prog_scope_storage scopes(mem_ctx, n_scopes); - - access_recorder access(ntemps, narrays); - - prog_scope *cur_scope = scopes.create(nullptr, outer_scope, 0, 0, line); - - RENAME_DEBUG(debug_log << "========= Begin shader ============\n"); - - foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) { - if (is_at_end) { - assert(!"GLSL_TO_TGSI: shader has instructions past end marker"); - break; - } - - RENAME_DEBUG(dump_instruction(debug_log, line, cur_scope, *inst)); - - switch (inst->op) { - case TGSI_OPCODE_BGNLOOP: { - cur_scope = scopes.create(cur_scope, loop_body, loop_id++, - cur_scope->nesting_depth() + 1, line); - break; - } - case TGSI_OPCODE_ENDLOOP: { - cur_scope->set_end(line); - cur_scope = cur_scope->parent(); - assert(cur_scope); - break; - } - case TGSI_OPCODE_IF: - case TGSI_OPCODE_UIF: { - assert(num_inst_src_regs(inst) == 1); - access.record_read(inst->src[0], line, cur_scope); - cur_scope = scopes.create(cur_scope, if_branch, if_id++, - cur_scope->nesting_depth() + 1, line + 1); - break; - } - case TGSI_OPCODE_ELSE: { - assert(cur_scope->type() == if_branch); - cur_scope->set_end(line - 1); - cur_scope = scopes.create(cur_scope->parent(), else_branch, - cur_scope->id(), cur_scope->nesting_depth(), - line + 1); - break; - } - case TGSI_OPCODE_END: { - cur_scope->set_end(line); - is_at_end = true; - break; - } - case TGSI_OPCODE_ENDIF: { - cur_scope->set_end(line - 1); - cur_scope = cur_scope->parent(); - assert(cur_scope); - break; - } - case TGSI_OPCODE_SWITCH: { - assert(num_inst_src_regs(inst) == 1); - prog_scope *scope = scopes.create(cur_scope, switch_body, switch_id++, - cur_scope->nesting_depth() + 1, line); - /* We record the read only for the SWITCH statement itself, like it - * is used by the only consumer of TGSI_OPCODE_SWITCH in tgsi_exec.c. - */ - access.record_read(inst->src[0], line, cur_scope); - cur_scope = scope; - break; - } - case TGSI_OPCODE_ENDSWITCH: { - cur_scope->set_end(line - 1); - /* Remove the case level, it might not have been - * closed with a break. - */ - if (cur_scope->type() != switch_body) - cur_scope = cur_scope->parent(); - - cur_scope = cur_scope->parent(); - assert(cur_scope); - break; - } - case TGSI_OPCODE_CASE: { - /* Take care of tracking the registers. */ - prog_scope *switch_scope = cur_scope->type() == switch_body ? - cur_scope : cur_scope->parent(); - - assert(num_inst_src_regs(inst) == 1); - access.record_read(inst->src[0], line, switch_scope); - - FALLTHROUGH; /* To allocate the scope. */ - } - case TGSI_OPCODE_DEFAULT: { - prog_scope_type t = inst->op == TGSI_OPCODE_CASE ? switch_case_branch - : switch_default_branch; - prog_scope *switch_scope = (cur_scope->type() == switch_body) ? - cur_scope : cur_scope->parent(); - assert(switch_scope->type() == switch_body); - prog_scope *scope = scopes.create(switch_scope, t, - switch_scope->id(), - switch_scope->nesting_depth() + 1, - line); - /* Previous case falls through, so scope was not yet closed. */ - if ((cur_scope != switch_scope) && (cur_scope->end() == -1)) - cur_scope->set_end(line - 1); - cur_scope = scope; - break; - } - case TGSI_OPCODE_BRK: { - if (cur_scope->break_is_for_switchcase()) { - cur_scope->set_end(line - 1); - } else { - cur_scope->set_loop_break_line(line); - } - break; - } - case TGSI_OPCODE_CAL: - case TGSI_OPCODE_RET: - /* These opcodes are not supported and if a subroutine would - * be called in a shader, then the live_range tracking would have - * to follow that call to see which registers are used there. - * Since this is not done, we have to bail out here and signal - * that no register merge will take place. - */ - return false; - default: { - for (unsigned j = 0; j < num_inst_src_regs(inst); j++) { - access.record_read(inst->src[j], line, cur_scope); - } - for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) { - access.record_read(inst->tex_offsets[j], line, cur_scope); - } - unsigned ndst = num_inst_dst_regs(inst); - for (unsigned j = 0; j < ndst; j++) { - access.record_write(inst->dst[j], line, cur_scope, ndst == 1); - } - access.record_read(inst->resource, line, cur_scope); - } - } - ++line; - } - - RENAME_DEBUG(debug_log << "==================================\n\n"); - - /* Make sure last scope is closed, even though no - * TGSI_OPCODE_END was given. - */ - if (cur_scope->end() < 0) - cur_scope->set_end(line - 1); - - access.get_required_live_ranges(register_live_ranges, array_live_ranges); - return true; -} - -/* Find the next register between [start, end) that has a live range starting - * at or after bound by using a binary search. - * start points at the beginning of the search range, - * end points at the element past the end of the search range, and - * the array comprising [start, end) must be sorted in ascending order. - */ -static register_merge_record* -find_next_rename(register_merge_record* start, register_merge_record* end, int bound) -{ - int delta = (end - start); - - while (delta > 0) { - int half = delta >> 1; - register_merge_record* middle = start + half; - - if (bound <= middle->begin) { - delta = half; - } else { - start = middle; - ++start; - delta -= half + 1; - } - } - - return start; -} - -#ifndef USE_STL_SORT -static int register_merge_record_compare (const void *a, const void *b) { - const register_merge_record *aa = static_cast(a); - const register_merge_record *bb = static_cast(b); - return aa->begin < bb->begin ? -1 : (aa->begin > bb->begin ? 1 : 0); -} -#endif - -/* This functions evaluates the register merges by using a binary - * search to find suitable merge candidates. */ -void get_temp_registers_remapping(void *mem_ctx, int ntemps, - const struct register_live_range *live_ranges, - struct rename_reg_pair *result) -{ - register_merge_record *reg_access = ralloc_array(mem_ctx, register_merge_record, ntemps); - - int used_temps = 0; - for (int i = 0; i < ntemps; ++i) { - if (live_ranges[i].begin >= 0) { - reg_access[used_temps].begin =live_ranges[i].begin; - reg_access[used_temps].end =live_ranges[i].end; - reg_access[used_temps].reg = i; - reg_access[used_temps].erase = false; - ++used_temps; - } - } - -#ifdef USE_STL_SORT - std::sort(reg_access, reg_access + used_temps); -#else - std::qsort(reg_access, used_temps, sizeof(register_merge_record), - register_merge_record_compare); -#endif - - register_merge_record *trgt = reg_access; - register_merge_record *reg_access_end = reg_access + used_temps; - register_merge_record *first_erase = reg_access_end; - register_merge_record *search_start = trgt + 1; - - while (trgt != reg_access_end) { - register_merge_record *src = find_next_rename(search_start, reg_access_end, - trgt->end); - if (src != reg_access_end) { - result[src->reg].new_reg = trgt->reg; - result[src->reg].valid = true; - trgt->end = src->end; - - /* Since we only search forward, don't remove the renamed - * register just now, only mark it. */ - src->erase = true; - - if (first_erase == reg_access_end) - first_erase = src; - - search_start = src + 1; - } else { - /* Moving to the next target register it is time to remove - * the already merged registers from the search range */ - if (first_erase != reg_access_end) { - register_merge_record *outp = first_erase; - register_merge_record *inp = first_erase + 1; - - while (inp != reg_access_end) { - if (!inp->erase) - *outp++ = *inp; - ++inp; - } - - reg_access_end = outp; - first_erase = reg_access_end; - } - ++trgt; - search_start = trgt + 1; - } - } - ralloc_free(reg_access); -} - -/* Code below used for debugging */ -#ifndef NDEBUG -static -void dump_instruction(ostream& os, int line, prog_scope *scope, - const glsl_to_tgsi_instruction& inst) -{ - const struct tgsi_opcode_info *info = inst.info; - int indent = scope->nesting_depth(); - if ((scope->type() == switch_case_branch || - scope->type() == switch_default_branch) && - (info->opcode == TGSI_OPCODE_CASE || - info->opcode == TGSI_OPCODE_DEFAULT)) - --indent; - - if (info->opcode == TGSI_OPCODE_ENDIF || - info->opcode == TGSI_OPCODE_ELSE || - info->opcode == TGSI_OPCODE_ENDLOOP || - info->opcode == TGSI_OPCODE_ENDSWITCH) - --indent; - - os << setw(4) << line << ": "; - os << setw(indent * 4) << " "; - os << inst << "\n"; -} -#endif diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.h b/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.h deleted file mode 100644 index c70cd888191..00000000000 --- a/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright © 2017 Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef MESA_GLSL_TO_TGSI_TEMPRENAME_H -#define MESA_GLSL_TO_TGSI_TEMPRENAME_H - -#include "st_glsl_to_tgsi_array_merge.h" - -/** Storage to record the required live range of a temporary register - * begin == end == -1 indicates that the register can be reused without - * limitations. Otherwise, "begin" indicates the first instruction in which - * a write operation may target this temporary, and end indicates the - * last instruction in which a value can be read from this temporary. - * Hence, a register R2 can be merged with a register R1 if R1.end <= R2.begin. - */ -struct register_live_range { - int begin; - int end; -}; - -/** Evaluates the required live ranges of temporary registers in a shader. - * The live range estimation can only be run sucessfully if the shader doesn't - * call a subroutine. - * @param[in] mem_ctx a memory context that can be used with the ralloc_* - * functions - * @param[in] instructions the shader to be anlzyed - * @param[in] ntemps number of temporaries reserved for this shader - * @param[in,out] reg_live_ranges memory location to store the estimated - * required live ranges for each temporary register. The parameter must - * point to allocated memory that can hold ntemps register_live_range - * structures. On output the live ranges contains the live ranges for - * the registers with the exception of TEMP[0] - * @param[in] narrays number of array sreserved for this shader - * @param[in,out] arr_live_ranges memory location to store the estimated required - * live ranges for each array. The parameter must point to allocated memory - * that can hold narrays array_live_range structures. On output the live - * ranges contains the live ranges for the registers with the exception of - * ARRAY[0]. - * @returns: true if the lifetimes were estimated, false if not (i.e. if a - * subroutine was called). - */ -bool -get_temp_registers_required_live_ranges(void *mem_ctx, exec_list *instructions, - int ntemps, struct register_live_range *register_live_ranges, - int narrays, array_live_range *array_live_ranges); - -/** Estimate the merge remapping of the registers. - * @param[in] mem_ctx a memory context that can be used with the ralloc_* - * functions - * @param[in] ntemps number of temporaries reserved for this shader - * @param[in] reg_live_ranges required live range for each temporary register. - * @param[in,out] result memory location to store the register remapping table. - * On input the parameter must point to allocated memory that can hold the - * renaming information for ntemps registers, on output the mapping is stored. - * Note that TEMP[0] is not considered for register renaming. - */ -void get_temp_registers_remapping(void *mem_ctx, int ntemps, - const struct register_live_range* reg_live_ranges, - struct rename_reg_pair *result); -#endif \ No newline at end of file diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 46b44550a86..edfacfe22e5 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -588,115 +588,37 @@ static bool st_translate_vertex_program(struct st_context *st, struct gl_program *prog) { - struct ureg_program *ureg; - enum pipe_error error; - unsigned num_outputs = 0; - unsigned attr; - ubyte output_semantic_name[VARYING_SLOT_MAX] = {0}; - ubyte output_semantic_index[VARYING_SLOT_MAX] = {0}; - /* ARB_vp: */ - if (!prog->glsl_to_tgsi) { - if (prog->arb.IsPositionInvariant) - _mesa_insert_mvp_code(st->ctx, prog); + if (prog->arb.IsPositionInvariant) + _mesa_insert_mvp_code(st->ctx, prog); - _mesa_remove_output_reads(prog, PROGRAM_OUTPUT); + _mesa_remove_output_reads(prog, PROGRAM_OUTPUT); - /* This determines which states will be updated when the assembly - * shader is bound. - */ - prog->affected_states = ST_NEW_VS_STATE | - ST_NEW_RASTERIZER | - ST_NEW_VERTEX_ARRAYS; + /* This determines which states will be updated when the assembly + * shader is bound. + */ + prog->affected_states = ST_NEW_VS_STATE | + ST_NEW_RASTERIZER | + ST_NEW_VERTEX_ARRAYS; - if (prog->Parameters->NumParameters) - prog->affected_states |= ST_NEW_VS_CONSTANTS; + if (prog->Parameters->NumParameters) + prog->affected_states |= ST_NEW_VS_CONSTANTS; - if (prog->nir) - ralloc_free(prog->nir); + if (prog->nir) + ralloc_free(prog->nir); - if (prog->serialized_nir) { - free(prog->serialized_nir); - prog->serialized_nir = NULL; - } - - prog->state.type = PIPE_SHADER_IR_NIR; - prog->nir = st_translate_prog_to_nir(st, prog, - MESA_SHADER_VERTEX); - prog->info = prog->nir->info; - - st_prepare_vertex_program(prog, NULL); - return true; + if (prog->serialized_nir) { + free(prog->serialized_nir); + prog->serialized_nir = NULL; } - uint8_t input_to_index[VERT_ATTRIB_MAX]; - st_prepare_vertex_program(prog, input_to_index); + prog->state.type = PIPE_SHADER_IR_NIR; + prog->nir = st_translate_prog_to_nir(st, prog, + MESA_SHADER_VERTEX); + prog->info = prog->nir->info; - /* Get semantic names and indices. */ - for (attr = 0; attr < VARYING_SLOT_MAX; attr++) { - if (prog->info.outputs_written & BITFIELD64_BIT(attr)) { - unsigned slot = num_outputs++; - unsigned semantic_name, semantic_index; - tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic, - &semantic_name, &semantic_index); - output_semantic_name[slot] = semantic_name; - output_semantic_index[slot] = semantic_index; - } - } - /* pre-setup potentially unused edgeflag output */ - output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG; - output_semantic_index[num_outputs] = 0; - - ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->screen); - if (ureg == NULL) - return false; - - ureg_setup_shader_info(ureg, &prog->info); - - if (ST_DEBUG & DEBUG_MESA) { - _mesa_print_program(prog); - _mesa_print_program_parameters(st->ctx, prog); - debug_printf("\n"); - } - - struct gl_vertex_program *vp = (struct gl_vertex_program *)prog; - - error = st_translate_program(st->ctx, - PIPE_SHADER_VERTEX, - ureg, - prog->glsl_to_tgsi, - prog, - /* inputs */ - vp->num_inputs, - input_to_index, - NULL, /* inputSlotToAttr */ - NULL, /* input semantic name */ - NULL, /* input semantic index */ - NULL, /* interp mode */ - /* outputs */ - num_outputs, - vp->result_to_output, - output_semantic_name, - output_semantic_index); - - st_translate_stream_output_info(prog); - - free_glsl_to_tgsi_visitor(prog->glsl_to_tgsi); - - if (error) { - debug_printf("%s: failed to translate GLSL IR program:\n", __func__); - _mesa_print_program(prog); - debug_assert(0); - return false; - } - - prog->state.tokens = ureg_get_tokens(ureg, NULL); - ureg_destroy(ureg); - - prog->glsl_to_tgsi = NULL; - st_store_ir_in_disk_cache(st, prog, false); - - return prog->state.tokens != NULL; + st_prepare_vertex_program(prog, NULL); + return true; } static struct nir_shader * @@ -981,330 +903,53 @@ st_get_common_variant(struct st_context *st, /** - * Translate a Mesa fragment shader into a TGSI shader. + * Translate a non-GLSL Mesa fragment shader into a NIR shader. */ static bool st_translate_fragment_program(struct st_context *st, struct gl_program *fp) { /* Non-GLSL programs: */ - if (!fp->glsl_to_tgsi) { - _mesa_remove_output_reads(fp, PROGRAM_OUTPUT); - if (st->ctx->Const.GLSLFragCoordIsSysVal) - _mesa_program_fragment_position_to_sysval(fp); + _mesa_remove_output_reads(fp, PROGRAM_OUTPUT); + if (st->ctx->Const.GLSLFragCoordIsSysVal) + _mesa_program_fragment_position_to_sysval(fp); - /* This determines which states will be updated when the assembly - * shader is bound. - * - * fragment.position and glDrawPixels always use constants. - */ - fp->affected_states = ST_NEW_FS_STATE | - ST_NEW_SAMPLE_SHADING | - ST_NEW_FS_CONSTANTS; + /* This determines which states will be updated when the assembly + * shader is bound. + * + * fragment.position and glDrawPixels always use constants. + */ + fp->affected_states = ST_NEW_FS_STATE | + ST_NEW_SAMPLE_SHADING | + ST_NEW_FS_CONSTANTS; - if (fp->ati_fs) { - /* Just set them for ATI_fs unconditionally. */ + if (fp->ati_fs) { + /* Just set them for ATI_fs unconditionally. */ + fp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS | + ST_NEW_FS_SAMPLERS; + } else { + /* ARB_fp */ + if (fp->SamplersUsed) fp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS | - ST_NEW_FS_SAMPLERS; - } else { - /* ARB_fp */ - if (fp->SamplersUsed) - fp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS | - ST_NEW_FS_SAMPLERS; + ST_NEW_FS_SAMPLERS; + } + + /* Translate to NIR. ATI_fs translates at variant time. */ + if (!fp->ati_fs) { + nir_shader *nir = + st_translate_prog_to_nir(st, fp, MESA_SHADER_FRAGMENT); + + if (fp->nir) + ralloc_free(fp->nir); + if (fp->serialized_nir) { + free(fp->serialized_nir); + fp->serialized_nir = NULL; } - - /* Translate to NIR. ATI_fs translates at variant time. */ - if (!fp->ati_fs) { - nir_shader *nir = - st_translate_prog_to_nir(st, fp, MESA_SHADER_FRAGMENT); - - if (fp->nir) - ralloc_free(fp->nir); - if (fp->serialized_nir) { - free(fp->serialized_nir); - fp->serialized_nir = NULL; - } - fp->state.type = PIPE_SHADER_IR_NIR; - fp->nir = nir; - } - - return true; + fp->state.type = PIPE_SHADER_IR_NIR; + fp->nir = nir; } - ubyte outputMapping[2 * FRAG_RESULT_MAX]; - ubyte inputMapping[VARYING_SLOT_MAX]; - ubyte inputSlotToAttr[VARYING_SLOT_MAX]; - ubyte interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ - GLuint attr; - GLbitfield64 inputsRead; - struct ureg_program *ureg; - - GLboolean write_all = GL_FALSE; - - ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; - ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; - uint fs_num_inputs = 0; - - ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; - ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; - uint fs_num_outputs = 0; - - memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr)); - - /* - * Convert Mesa program inputs to TGSI input register semantics. - */ - inputsRead = fp->info.inputs_read; - for (attr = 0; attr < VARYING_SLOT_MAX; attr++) { - if ((inputsRead & BITFIELD64_BIT(attr)) != 0) { - const GLuint slot = fs_num_inputs++; - - inputMapping[attr] = slot; - inputSlotToAttr[slot] = attr; - - switch (attr) { - case VARYING_SLOT_POS: - input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - case VARYING_SLOT_COL0: - input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - input_semantic_index[slot] = 0; - interpMode[slot] = fp->glsl_to_tgsi ? - TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR; - break; - case VARYING_SLOT_COL1: - input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - input_semantic_index[slot] = 1; - interpMode[slot] = fp->glsl_to_tgsi ? - TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR; - break; - case VARYING_SLOT_FOGC: - input_semantic_name[slot] = TGSI_SEMANTIC_FOG; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; - break; - case VARYING_SLOT_FACE: - input_semantic_name[slot] = TGSI_SEMANTIC_FACE; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; - break; - case VARYING_SLOT_PRIMITIVE_ID: - input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; - break; - case VARYING_SLOT_LAYER: - input_semantic_name[slot] = TGSI_SEMANTIC_LAYER; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; - break; - case VARYING_SLOT_VIEWPORT: - input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; - break; - case VARYING_SLOT_CLIP_DIST0: - input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; - break; - case VARYING_SLOT_CLIP_DIST1: - input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; - input_semantic_index[slot] = 1; - interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; - break; - case VARYING_SLOT_CULL_DIST0: - case VARYING_SLOT_CULL_DIST1: - /* these should have been lowered by GLSL */ - assert(0); - break; - /* In most cases, there is nothing special about these - * inputs, so adopt a convention to use the generic - * semantic name and the mesa VARYING_SLOT_ number as the - * index. - * - * All that is required is that the vertex shader labels - * its own outputs similarly, and that the vertex shader - * generates at least every output required by the - * fragment shader plus fixed-function hardware (such as - * BFC). - * - * However, some drivers may need us to identify the PNTC and TEXi - * varyings if, for example, their capability to replace them with - * sprite coordinates is limited. - */ - case VARYING_SLOT_PNTC: - if (st->needs_texcoord_semantic) { - input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - } - FALLTHROUGH; - case VARYING_SLOT_TEX0: - case VARYING_SLOT_TEX1: - case VARYING_SLOT_TEX2: - case VARYING_SLOT_TEX3: - case VARYING_SLOT_TEX4: - case VARYING_SLOT_TEX5: - case VARYING_SLOT_TEX6: - case VARYING_SLOT_TEX7: - if (st->needs_texcoord_semantic) { - input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD; - input_semantic_index[slot] = attr - VARYING_SLOT_TEX0; - interpMode[slot] = fp->glsl_to_tgsi ? - TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE; - break; - } - FALLTHROUGH; - case VARYING_SLOT_VAR0: - default: - /* Semantic indices should be zero-based because drivers may choose - * to assign a fixed slot determined by that index. - * This is useful because ARB_separate_shader_objects uses location - * qualifiers for linkage, and if the semantic index corresponds to - * these locations, linkage passes in the driver become unecessary. - * - * If needs_texcoord_semantic is true, no semantic indices will be - * consumed for the TEXi varyings, and we can base the locations of - * the user varyings on VAR0. Otherwise, we use TEX0 as base index. - */ - assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC || - (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7)); - input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - input_semantic_index[slot] = st_get_generic_varying_index(st, attr); - if (attr == VARYING_SLOT_PNTC) - interpMode[slot] = TGSI_INTERPOLATE_LINEAR; - else { - interpMode[slot] = fp->glsl_to_tgsi ? - TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE; - } - break; - } - } - else { - inputMapping[attr] = -1; - } - } - - /* - * Semantics and mapping for outputs - */ - GLbitfield64 outputsWritten = fp->info.outputs_written; - - /* if z is written, emit that first */ - if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION; - fs_output_semantic_index[fs_num_outputs] = 0; - outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs; - fs_num_outputs++; - outputsWritten &= ~(1 << FRAG_RESULT_DEPTH); - } - - if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { - fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL; - fs_output_semantic_index[fs_num_outputs] = 0; - outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs; - fs_num_outputs++; - outputsWritten &= ~(1 << FRAG_RESULT_STENCIL); - } - - if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) { - fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK; - fs_output_semantic_index[fs_num_outputs] = 0; - outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs; - fs_num_outputs++; - outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK); - } - - /* handle remaining outputs (color) */ - for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) { - const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten : - fp->SecondaryOutputsWritten; - const unsigned loc = attr % FRAG_RESULT_MAX; - - if (written & BITFIELD64_BIT(loc)) { - switch (loc) { - case FRAG_RESULT_DEPTH: - case FRAG_RESULT_STENCIL: - case FRAG_RESULT_SAMPLE_MASK: - /* handled above */ - assert(0); - break; - case FRAG_RESULT_COLOR: - write_all = GL_TRUE; - FALLTHROUGH; - default: { - int index; - assert(loc == FRAG_RESULT_COLOR || - (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX)); - - index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0); - - if (attr >= FRAG_RESULT_MAX) { - /* Secondary color for dual source blending. */ - assert(index == 0); - index++; - } - - fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR; - fs_output_semantic_index[fs_num_outputs] = index; - outputMapping[attr] = fs_num_outputs; - break; - } - } - - fs_num_outputs++; - } - } - - ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->screen); - if (ureg == NULL) - return false; - - ureg_setup_shader_info(ureg, &fp->info); - - if (ST_DEBUG & DEBUG_MESA) { - _mesa_print_program(fp); - _mesa_print_program_parameters(st->ctx, fp); - debug_printf("\n"); - } - if (write_all == GL_TRUE) - ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1); - - if (fp->glsl_to_tgsi) { - st_translate_program(st->ctx, - PIPE_SHADER_FRAGMENT, - ureg, - fp->glsl_to_tgsi, - fp, - /* inputs */ - fs_num_inputs, - inputMapping, - inputSlotToAttr, - input_semantic_name, - input_semantic_index, - interpMode, - /* outputs */ - fs_num_outputs, - outputMapping, - fs_output_semantic_name, - fs_output_semantic_index); - - free_glsl_to_tgsi_visitor(fp->glsl_to_tgsi); - } - - fp->state.tokens = ureg_get_tokens(ureg, NULL); - ureg_destroy(ureg); - - if (fp->glsl_to_tgsi) { - fp->glsl_to_tgsi = NULL; - st_store_ir_in_disk_cache(st, fp, false); - } - - return fp->state.tokens != NULL; + return true; } static struct st_fp_variant * @@ -1670,146 +1315,6 @@ st_get_fp_variant(struct st_context *st, return fpv; } -/** - * Translate a program. This is common code for geometry and tessellation - * shaders. - */ -static bool -st_translate_common_program(struct st_context *st, - struct gl_program *prog) -{ - enum pipe_shader_type stage = - pipe_shader_type_from_mesa(prog->info.stage); - struct ureg_program *ureg = ureg_create_with_screen(stage, st->screen); - - if (ureg == NULL) - return false; - - ureg_setup_shader_info(ureg, &prog->info); - - ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX]; - ubyte inputMapping[VARYING_SLOT_TESS_MAX]; - ubyte outputMapping[VARYING_SLOT_TESS_MAX]; - GLuint attr; - - ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; - ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; - uint num_inputs = 0; - - ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; - ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; - uint num_outputs = 0; - - GLint i; - - memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr)); - memset(inputMapping, 0, sizeof(inputMapping)); - memset(outputMapping, 0, sizeof(outputMapping)); - memset(&prog->state, 0, sizeof(prog->state)); - - /* - * Convert Mesa program inputs to TGSI input register semantics. - */ - for (attr = 0; attr < VARYING_SLOT_MAX; attr++) { - if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0) - continue; - - unsigned slot = num_inputs++; - - inputMapping[attr] = slot; - inputSlotToAttr[slot] = attr; - - unsigned semantic_name, semantic_index; - tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic, - &semantic_name, &semantic_index); - input_semantic_name[slot] = semantic_name; - input_semantic_index[slot] = semantic_index; - } - - /* Also add patch inputs. */ - for (attr = 0; attr < 32; attr++) { - if (prog->info.patch_inputs_read & (1u << attr)) { - GLuint slot = num_inputs++; - GLuint patch_attr = VARYING_SLOT_PATCH0 + attr; - - inputMapping[patch_attr] = slot; - inputSlotToAttr[slot] = patch_attr; - input_semantic_name[slot] = TGSI_SEMANTIC_PATCH; - input_semantic_index[slot] = attr; - } - } - - /* initialize output semantics to defaults */ - for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { - output_semantic_name[i] = TGSI_SEMANTIC_GENERIC; - output_semantic_index[i] = 0; - } - - /* - * Determine number of outputs, the (default) output register - * mapping and the semantic information for each output. - */ - for (attr = 0; attr < VARYING_SLOT_MAX; attr++) { - if (prog->info.outputs_written & BITFIELD64_BIT(attr)) { - GLuint slot = num_outputs++; - - outputMapping[attr] = slot; - - unsigned semantic_name, semantic_index; - tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic, - &semantic_name, &semantic_index); - output_semantic_name[slot] = semantic_name; - output_semantic_index[slot] = semantic_index; - } - } - - /* Also add patch outputs. */ - for (attr = 0; attr < 32; attr++) { - if (prog->info.patch_outputs_written & (1u << attr)) { - GLuint slot = num_outputs++; - GLuint patch_attr = VARYING_SLOT_PATCH0 + attr; - - outputMapping[patch_attr] = slot; - output_semantic_name[slot] = TGSI_SEMANTIC_PATCH; - output_semantic_index[slot] = attr; - } - } - - st_translate_program(st->ctx, - stage, - ureg, - prog->glsl_to_tgsi, - prog, - /* inputs */ - num_inputs, - inputMapping, - inputSlotToAttr, - input_semantic_name, - input_semantic_index, - NULL, - /* outputs */ - num_outputs, - outputMapping, - output_semantic_name, - output_semantic_index); - - prog->state.tokens = ureg_get_tokens(ureg, NULL); - - ureg_destroy(ureg); - - st_translate_stream_output_info(prog); - - st_store_ir_in_disk_cache(st, prog, false); - - if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA) - _mesa_print_program(prog); - - free_glsl_to_tgsi_visitor(prog->glsl_to_tgsi); - prog->glsl_to_tgsi = NULL; - return true; -} - - /** * Vert/Geom/Frag programs have per-context variants. Free all the * variants attached to the given program which match the given context. @@ -2067,9 +1572,6 @@ st_program_string_notify( struct gl_context *ctx, prog->skip_pointsize_xfb = true; NIR_PASS_V(prog->nir, st_nir_add_point_size); } - } else { - if (!st_translate_common_program(st, prog)) - return false; } st_finalize_program(st, prog); diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index 3570d3d02be..980cee8880f 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -40,7 +40,6 @@ #include "tgsi/tgsi_from_mesa.h" #include "st_context.h" #include "st_texture.h" -#include "st_glsl_to_tgsi.h" #ifdef __cplusplus extern "C" { diff --git a/src/mesa/state_tracker/tests/meson.build b/src/mesa/state_tracker/tests/meson.build index 5f53f1d186e..bd81d5ded04 100644 --- a/src/mesa/state_tracker/tests/meson.build +++ b/src/mesa/state_tracker/tests/meson.build @@ -20,7 +20,6 @@ libmesa_st_test_common = static_library( 'mesa_st_test_common', - ['st_tests_common.cpp', ir_expression_operation_h], include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux], dependencies : [idep_gtest, idep_mesautil], ) @@ -38,33 +37,3 @@ test( ), suite : ['st_mesa'], ) - -test( - 'st_renumerate_test', - executable( - 'st_renumerate_test', - ['test_glsl_to_tgsi_lifetime.cpp', ir_expression_operation_h], - include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux], - link_with : [ - libmesa_st_test_common, libmesa, libglapi, libgallium, - ], - dependencies : [idep_mesautil, idep_gtest], - ), - suite : ['st_mesa'], - protocol : gtest_test_protocol, -) - -test( - 'st-array-merge-test', - executable( - 'st_array_merge_test', - ['test_glsl_to_tgsi_array_merge.cpp', ir_expression_operation_h], - include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux], - link_with : [ - libmesa_st_test_common, libmesa, libglapi, libgallium, - ], - dependencies : [idep_mesautil, idep_gtest], - ), - suite : ['st_mesa'], - protocol : gtest_test_protocol, -) diff --git a/src/mesa/state_tracker/tests/st_tests_common.cpp b/src/mesa/state_tracker/tests/st_tests_common.cpp deleted file mode 100644 index 4da688cfa0e..00000000000 --- a/src/mesa/state_tracker/tests/st_tests_common.cpp +++ /dev/null @@ -1,619 +0,0 @@ -/* - * Copyright © 2017 Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "st_tests_common.h" - -#include "mesa/program/prog_instruction.h" -#include "tgsi/tgsi_info.h" -#include "tgsi/tgsi_ureg.h" -#include "compiler/glsl/list.h" -#include "gtest/gtest.h" - -#include -#include - -using std::vector; -using std::pair; -using std::make_pair; -using std::transform; -using std::copy; -using std::tuple; - - -/* Implementation of helper and test classes */ -void *FakeCodeline::mem_ctx = nullptr; - -FakeCodeline::FakeCodeline(tgsi_opcode _op, const vector& _dst, - const vector& _src, const vector&_to): - op(_op), - max_temp_id(0), - max_array_id(0) -{ - transform(_dst.begin(), _dst.end(), std::back_inserter(dst), - [this](int i) { return create_dst_register(i);}); - - transform(_src.begin(), _src.end(), std::back_inserter(src), - [this](int i) { return create_src_register(i);}); - - transform(_to.begin(), _to.end(), std::back_inserter(tex_offsets), - [this](int i) { return create_src_register(i);}); - -} - -FakeCodeline::FakeCodeline(tgsi_opcode _op, const vector>& _dst, - const vector>& _src, - const vector>&_to, - SWZ with_swizzle): - op(_op), - max_temp_id(0), - max_array_id(0) -{ - (void)with_swizzle; - - transform(_dst.begin(), _dst.end(), std::back_inserter(dst), - [this](pair r) { - return create_dst_register(r.first, r.second); - }); - - transform(_src.begin(), _src.end(), std::back_inserter(src), - [this](const pair& r) { - return create_src_register(r.first, r.second); - }); - - transform(_to.begin(), _to.end(), std::back_inserter(tex_offsets), - [this](const pair& r) { - return create_src_register(r.first, r.second); - }); -} - -FakeCodeline::FakeCodeline(tgsi_opcode _op, const vector>& _dst, - const vector>& _src, - const vector>&_to, RA with_reladdr): - op(_op), - max_temp_id(0), - max_array_id(0) -{ - (void)with_reladdr; - - transform(_dst.begin(), _dst.end(), std::back_inserter(dst), - [this](const tuple& r) { - return create_dst_register(r); - }); - - transform(_src.begin(), _src.end(), std::back_inserter(src), - [this](const tuple& r) { - return create_src_register(r); - }); - - transform(_to.begin(), _to.end(), std::back_inserter(tex_offsets), - [this](const tuple& r) { - return create_src_register(r); - }); -} - -FakeCodeline::FakeCodeline(tgsi_opcode _op, const vector>& _dst, - const vector>& _src, - const vector>&_to, - ARR with_array): - FakeCodeline(_op) -{ - (void)with_array; - - transform(_dst.begin(), _dst.end(), std::back_inserter(dst), - [this](const tuple& r) { - return create_array_dst_register(r); - }); - - transform(_src.begin(), _src.end(), std::back_inserter(src), - [this](const tuple& r) { - return create_array_src_register(r); - }); - - transform(_to.begin(), _to.end(), std::back_inserter(tex_offsets), - [this](const tuple& r) { - return create_array_src_register(r); - }); - -} - -FakeCodeline::FakeCodeline(const glsl_to_tgsi_instruction& instr): - op(instr.op), - max_temp_id(0), - max_array_id(0) -{ - int nsrc = num_inst_src_regs(&instr); - int ndst = num_inst_dst_regs(&instr); - - copy(instr.src, instr.src + nsrc, std::back_inserter(src)); - copy(instr.dst, instr.dst + ndst, std::back_inserter(dst)); - - for (auto& s: src) - read_reg(s); - - for (auto& d: dst) - read_reg(d); - -} - -template -void FakeCodeline::read_reg(const st_reg& s) -{ - if (s.file == PROGRAM_ARRAY) { - if (s.array_id > max_array_id) - max_array_id = s.array_id; - if (s.reladdr) - read_reg(*s.reladdr); - if (s.reladdr2) - read_reg(*s.reladdr2); - } else if (s.file == PROGRAM_TEMPORARY) { - if (s.index > max_temp_id) - max_temp_id = s.index; - } -} - -void FakeCodeline::print(std::ostream& os) const -{ - const struct tgsi_opcode_info *info = tgsi_get_opcode_info(op); - os << tgsi_get_opcode_name(info->opcode) << " "; - - for (auto d: dst) { - os << d << " "; - } - os << " <- "; - for (auto s: src) { - os << s << " "; - } - os << "\n"; -} - -bool operator == (const FakeCodeline& lhs, const FakeCodeline& rhs) -{ - if ((lhs.op != rhs.op) || - (lhs.src.size() != rhs.src.size()) || - (lhs.dst.size() != rhs.dst.size())) - return false; - - return std::equal(lhs.src.begin(), lhs.src.end(), rhs.src.begin()) && - std::equal(lhs.dst.begin(), lhs.dst.end(), rhs.dst.begin()); -} - -st_src_reg FakeCodeline::create_src_register(int src_idx) -{ - return create_src_register(src_idx, - src_idx < 0 ? PROGRAM_INPUT : PROGRAM_TEMPORARY); -} - -static int swizzle_from_char(const char *sw) -{ - int swizzle = 0; - if (!sw || sw[0] == 0) - return SWIZZLE_XYZW; - - const char *isw = sw; - for (int i = 0; i < 4; ++i) { - switch (*isw) { - case 'x': break; /* is zero */ - case 'y': swizzle |= SWIZZLE_Y << 3 * i; break; - case 'z': swizzle |= SWIZZLE_Z << 3 * i; break; - case 'w': swizzle |= SWIZZLE_W << 3 * i; break; - default: - assert(!"This test uses an unknown swizzle character"); - } - if (isw[1] != 0) - ++isw; - } - return swizzle; -} - -st_src_reg FakeCodeline::create_src_register(int src_idx, const char *sw) -{ - st_src_reg result = create_src_register(src_idx); - result.swizzle = swizzle_from_char(sw); - return result; -} - -st_src_reg FakeCodeline::create_src_register(int src_idx, gl_register_file file) -{ - st_src_reg retval; - retval.file = file; - retval.index = src_idx >= 0 ? src_idx : 1 - src_idx; - - if (file == PROGRAM_TEMPORARY) { - if (max_temp_id < src_idx) - max_temp_id = src_idx; - } else if (file == PROGRAM_ARRAY) { - retval.array_id = 1; - if (max_array_id < 1) - max_array_id = 1; - } - retval.swizzle = SWIZZLE_XYZW; - retval.type = GLSL_TYPE_INT; - - return retval; -} - -st_src_reg *FakeCodeline::create_rel_src_register(int idx) -{ - st_src_reg *retval = ralloc(mem_ctx, st_src_reg); - *retval = st_src_reg(PROGRAM_TEMPORARY, idx, GLSL_TYPE_INT); - if (max_temp_id < idx) - max_temp_id = idx; - return retval; -} - -st_src_reg FakeCodeline::create_array_src_register(const tuple& r) -{ - - int array_id = std::get<0>(r); - int idx = std::get<1>(r); - - st_src_reg retval = create_src_register(idx, std::get<2>(r)); - - if (array_id > 0) { - retval.file = PROGRAM_ARRAY; - - retval.array_id = array_id; - if (max_array_id < array_id) - max_array_id = array_id; - } else { - if (max_temp_id < idx) - max_temp_id = idx; - } - - return retval; -} - -st_dst_reg FakeCodeline::create_array_dst_register(const tuple& r) -{ - - int array_id = std::get<0>(r); - int idx = std::get<1>(r); - - st_dst_reg retval = create_dst_register(idx, std::get<2>(r)); - - if (array_id > 0) { - retval.file = PROGRAM_ARRAY; - retval.array_id = array_id; - if (max_array_id < array_id) - max_array_id = array_id; - } else { - if (max_temp_id < idx) - max_temp_id = idx; - } - return retval; -} - -st_src_reg FakeCodeline::create_src_register(const tuple& src) -{ - int src_idx = std::get<0>(src); - int relidx1 = std::get<1>(src); - int relidx2 = std::get<2>(src); - - gl_register_file file = PROGRAM_TEMPORARY; - if (src_idx < 0) - file = PROGRAM_OUTPUT; - else if (relidx1 || relidx2) { - file = PROGRAM_ARRAY; - } - - st_src_reg retval = create_src_register(src_idx, file); - if (src_idx >= 0) { - if (relidx1 || relidx2) { - retval.array_id = 1; - - if (relidx1) - retval.reladdr = create_rel_src_register(relidx1); - if (relidx2) { - retval.reladdr2 = create_rel_src_register(relidx2); - retval.has_index2 = true; - retval.index2D = 10; - } - } - } - return retval; -} - -st_dst_reg FakeCodeline::create_dst_register(int dst_idx,int writemask) -{ - gl_register_file file; - int idx = 0; - if (dst_idx >= 0) { - file = PROGRAM_TEMPORARY; - idx = dst_idx; - if (max_temp_id < idx) - max_temp_id = idx; - } else { - file = PROGRAM_OUTPUT; - idx = 1 - dst_idx; - } - return st_dst_reg(file, writemask, GLSL_TYPE_INT, idx); -} - -st_dst_reg FakeCodeline::create_dst_register(int dst_idx) -{ - return create_dst_register(dst_idx, dst_idx < 0 ? - PROGRAM_OUTPUT : PROGRAM_TEMPORARY); -} - -st_dst_reg FakeCodeline::create_dst_register(int dst_idx, gl_register_file file) -{ - st_dst_reg retval; - retval.file = file; - retval.index = dst_idx >= 0 ? dst_idx : 1 - dst_idx; - - if (file == PROGRAM_TEMPORARY) { - if (max_temp_id < dst_idx) - max_temp_id = dst_idx; - } else if (file == PROGRAM_ARRAY) { - retval.array_id = 1; - if (max_array_id < 1) - max_array_id = 1; - } - retval.writemask = 0xF; - retval.type = GLSL_TYPE_INT; - - return retval; -} - -st_dst_reg FakeCodeline::create_dst_register(const tuple& dst) -{ - int dst_idx = std::get<0>(dst); - int relidx1 = std::get<1>(dst); - int relidx2 = std::get<2>(dst); - - gl_register_file file = PROGRAM_TEMPORARY; - if (dst_idx < 0) - file = PROGRAM_OUTPUT; - else if (relidx1 || relidx2) { - file = PROGRAM_ARRAY; - } - st_dst_reg retval = create_dst_register(dst_idx, file); - - if (relidx1 || relidx2) { - if (relidx1) - retval.reladdr = create_rel_src_register(relidx1); - if (relidx2) { - retval.reladdr2 = create_rel_src_register(relidx2); - retval.has_index2 = true; - retval.index2D = 10; - } - } - return retval; -} - -glsl_to_tgsi_instruction *FakeCodeline::get_codeline() const -{ - glsl_to_tgsi_instruction *next_instr = new(mem_ctx) glsl_to_tgsi_instruction(); - next_instr->op = op; - next_instr->info = tgsi_get_opcode_info(op); - - assert(src.size() == num_inst_src_regs(next_instr)); - assert(dst.size() == num_inst_dst_regs(next_instr)); - assert(tex_offsets.size() < 3); - - copy(src.begin(), src.end(), next_instr->src); - copy(dst.begin(), dst.end(), next_instr->dst); - - next_instr->tex_offset_num_offset = tex_offsets.size(); - - if (next_instr->tex_offset_num_offset > 0) { - next_instr->tex_offsets = ralloc_array(mem_ctx, st_src_reg, tex_offsets.size()); - copy(tex_offsets.begin(), tex_offsets.end(), next_instr->tex_offsets); - } else { - next_instr->tex_offsets = nullptr; - } - return next_instr; -} - -void FakeCodeline::set_mem_ctx(void *ctx) -{ - mem_ctx = ctx; -} - -FakeShader::FakeShader(const vector& source): - program(source), - num_temps(0), - num_arrays(0) -{ - for (const FakeCodeline& i: source) { - int t = i.get_max_reg_id(); - if (t > num_temps) - num_temps = t; - - int a = i.get_max_array_id(); - if (a > num_arrays) - num_arrays = a; - } - ++num_temps; -} - -FakeShader::FakeShader(exec_list *tgsi_prog): - num_temps(0), - num_arrays(0) -{ - FakeCodeline nop(TGSI_OPCODE_NOP); - FakeCodeline& last = nop; - - foreach_in_list(glsl_to_tgsi_instruction, inst, tgsi_prog) { - program.push_back(last = FakeCodeline(*inst)); - if (last.get_max_array_id() > num_arrays) - num_arrays = last.get_max_array_id(); - if (num_temps < last.get_max_reg_id()) - num_temps = last.get_max_reg_id(); - } - ++num_temps; -} - -int FakeShader::get_num_arrays() const -{ - return num_arrays; -} - -int FakeShader::get_num_temps() const -{ - return num_temps; -} - -exec_list* FakeShader::get_program(void *ctx) const -{ - exec_list *prog = new(ctx) exec_list(); - - for (const FakeCodeline& i: program) { - prog->push_tail(i.get_codeline()); - } - - return prog; -} - -size_t FakeShader::length() const -{ - return program.size(); -} - -const FakeCodeline& FakeShader::line(unsigned i) const -{ - return program[i]; -} - -void MesaTestWithMemCtx::SetUp() -{ - mem_ctx = ralloc_context(nullptr); - FakeCodeline::set_mem_ctx(mem_ctx); -} - -void MesaTestWithMemCtx::TearDown() -{ - ralloc_free(mem_ctx); - FakeCodeline::set_mem_ctx(nullptr); - mem_ctx = nullptr; -} - - -LifetimeEvaluatorTest::life_range_result -LifetimeEvaluatorTest::run(const vector& code, bool& success) -{ - FakeShader shader(code); - life_range_result result = make_pair(life_range_result::first_type(shader.get_num_temps()), - life_range_result::second_type(shader.get_num_arrays())); - - success = - get_temp_registers_required_live_ranges(mem_ctx, shader.get_program(mem_ctx), - shader.get_num_temps(),&result.first[0], - shader.get_num_arrays(), &result.second[0]); - return result; -} - -void LifetimeEvaluatorTest::run(const vector& code, const temp_lt_expect& e) -{ - bool success = false; - auto result = run(code, success); - ASSERT_TRUE(success); - ASSERT_EQ(result.first.size(), e.size()); - check(result.first, e); -} - -void LifetimeEvaluatorTest::run(const vector& code, const array_lt_expect& e) -{ - bool success = false; - auto result = run(code, success); - ASSERT_TRUE(success); - ASSERT_EQ(result.second.size(), e.size()); - check(result.second, e); -} - -void LifetimeEvaluatorExactTest::check( const vector& lifetimes, - const temp_lt_expect& e) -{ - for (unsigned i = 1; i < lifetimes.size(); ++i) { - EXPECT_EQ(lifetimes[i].begin, e[i][0]); - EXPECT_EQ(lifetimes[i].end, e[i][1]); - } -} - -void LifetimeEvaluatorExactTest::check(const vector& lifetimes, - const array_lt_expect& e) -{ - for (unsigned i = 0; i < lifetimes.size(); ++i) { - EXPECT_EQ(lifetimes[i].begin(), e[i].begin()); - EXPECT_EQ(lifetimes[i].end(), e[i].end()); - EXPECT_EQ(lifetimes[i].access_mask(), e[i].access_mask()); - } -} - -void LifetimeEvaluatorAtLeastTest::check( const vector& lifetimes, - const temp_lt_expect& e) -{ - for (unsigned i = 1; i < lifetimes.size(); ++i) { - EXPECT_LE(lifetimes[i].begin, e[i][0]); - EXPECT_GE(lifetimes[i].end, e[i][1]); - } -} - -void LifetimeEvaluatorAtLeastTest::check(const vector& lifetimes, - const array_lt_expect& e) -{ - for (unsigned i = 0; i < lifetimes.size(); ++i) { - EXPECT_LE(lifetimes[i].begin(), e[i].begin()); - EXPECT_GE(lifetimes[i].end(), e[i].end()); - - /* Tests that lifetimes doesn't add unexpected swizzles */ - EXPECT_EQ(lifetimes[i].access_mask()| e[i].access_mask(), - e[i].access_mask()); - } -} - - -void RegisterRemappingTest::run(const vector& lt, - const vector& expect) -{ - rename_reg_pair proto{false,0}; - vector result(lt.size(), proto); - - get_temp_registers_remapping(mem_ctx, lt.size(), <[0], &result[0]); - - vector remap(lt.size()); - for (unsigned i = 0; i < lt.size(); ++i) { - remap[i] = result[i].valid ? result[i].new_reg : i; - } - - std::transform(remap.begin(), remap.end(), result.begin(), remap.begin(), - [](int x, const rename_reg_pair& rn) { - return rn.valid ? rn.new_reg : x; - }); - - for(unsigned i = 1; i < remap.size(); ++i) { - EXPECT_EQ(remap[i], expect[i]); - } -} - -void RegisterLifetimeAndRemappingTest::run(const vector& code, - const vector& expect) -{ - FakeShader shader(code); - std::vector lt(shader.get_num_temps()); - std::vector alt(shader.get_num_arrays()); - get_temp_registers_required_live_ranges(mem_ctx, shader.get_program(mem_ctx), - shader.get_num_temps(), <[0], - shader.get_num_arrays(), &alt[0]); - this->run(lt, expect); -} diff --git a/src/mesa/state_tracker/tests/st_tests_common.h b/src/mesa/state_tracker/tests/st_tests_common.h deleted file mode 100644 index 9b74a37c1c7..00000000000 --- a/src/mesa/state_tracker/tests/st_tests_common.h +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright © 2017 Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef mesa_st_tests_h -#define mesa_st_tests_h - -#include "state_tracker/st_glsl_to_tgsi_temprename.h" -#include "state_tracker/st_glsl_to_tgsi_array_merge.h" -#include "gtest/gtest.h" - -#include - -#define MP(X, W) std::make_pair(X, W) -#define MT(X,Y,Z) std::make_tuple(X,Y,Z) - -/* Use this to make the compiler pick the swizzle constructor below */ -struct SWZ {}; - -/* Use this to make the compiler pick the constructor with reladdr below */ -struct RA {}; - -/* Use this to make the compiler pick the constructor with array below */ -struct ARR {}; - -/* A line to describe a TGSI instruction for building mock shaders. */ -struct FakeCodeline { - FakeCodeline(tgsi_opcode _op): op(_op), max_temp_id(0), max_array_id(0) {} - FakeCodeline(tgsi_opcode _op, const std::vector& _dst, const std::vector& _src, - const std::vector&_to); - - FakeCodeline(tgsi_opcode _op, const std::vector>& _dst, - const std::vector>& _src, - const std::vector>&_to, SWZ with_swizzle); - - FakeCodeline(tgsi_opcode _op, const std::vector>& _dst, - const std::vector>& _src, - const std::vector>&_to, RA with_reladdr); - - FakeCodeline(tgsi_opcode _op, const std::vector > &_dst, - const std::vector>& _src, - const std::vector>&_to, ARR with_array); - - FakeCodeline(const glsl_to_tgsi_instruction& inst); - - int get_max_reg_id() const { return max_temp_id;} - int get_max_array_id() const { return max_array_id;} - - glsl_to_tgsi_instruction *get_codeline() const; - - static void set_mem_ctx(void *ctx); - - friend bool operator == (const FakeCodeline& lsh, const FakeCodeline& rhs); - - void print(std::ostream& os) const; -private: - st_src_reg create_src_register(int src_idx); - st_src_reg create_src_register(int src_idx, const char *swizzle); - st_src_reg create_src_register(int src_idx, gl_register_file file); - st_src_reg create_src_register(const std::tuple& src); - st_src_reg *create_rel_src_register(int idx); - st_src_reg create_array_src_register(const std::tuple& r); - st_dst_reg create_array_dst_register(const std::tuple& r); - - st_dst_reg create_dst_register(int dst_idx); - st_dst_reg create_dst_register(int dst_idx, int writemask); - st_dst_reg create_dst_register(int dst_idx, gl_register_file file); - st_dst_reg create_dst_register(const std::tuple& dest); - - template - void read_reg(const st_reg& s); - - tgsi_opcode op; - std::vector dst; - std::vector src; - std::vector tex_offsets; - - int max_temp_id; - int max_array_id; - static void *mem_ctx; -}; - -inline std::ostream& operator << (std::ostream& os, const FakeCodeline& line) -{ - line.print(os); - return os; -} - -/* A few constants that will not be tracked as temporary registers - by the fake shader. - */ -const int in0 = -1; -const int in1 = -2; -const int in2 = -3; - -const int out0 = -1; -const int out1 = -2; -const int out2 = -3; - -class FakeShader { -public: - FakeShader(const std::vector& source); - FakeShader(exec_list *tgsi_prog); - - exec_list* get_program(void *ctx) const; - int get_num_temps() const; - int get_num_arrays() const; - - size_t length() const; - - const FakeCodeline& line(unsigned i) const; - -private: - - std::vector program; - int num_temps; - int num_arrays; -}; - -using temp_lt_expect = std::vector>; -using array_lt_expect = std::vector; - -class MesaTestWithMemCtx : public testing::Test { - void SetUp(); - void TearDown(); -protected: - void *mem_ctx; -}; - -class LifetimeEvaluatorTest : public MesaTestWithMemCtx { -protected: - void run(const std::vector& code, const temp_lt_expect& e); - void run(const std::vector& code, const array_lt_expect& e); -private: - using life_range_result=std::pair, - std::vector>; - life_range_result run(const std::vector& code, bool& success); - - virtual void check(const std::vector& result, - const temp_lt_expect& e) = 0; - virtual void check(const std::vector& lifetimes, - const array_lt_expect& e) = 0; -}; - -/* This is a test class to check the exact life times of - * registers. */ -class LifetimeEvaluatorExactTest : public LifetimeEvaluatorTest { -protected: - void check(const std::vector& result, - const temp_lt_expect& e); - - void check(const std::vector& result, - const array_lt_expect& e); -}; - -/* This test class checks that the life time covers at least - * in the expected range. It is used for cases where we know that - * a the implementation could be improved on estimating the minimal - * life time. - */ -class LifetimeEvaluatorAtLeastTest : public LifetimeEvaluatorTest { -protected: - void check(const std::vector& result, const temp_lt_expect& e); - void check(const std::vector& result, - const array_lt_expect& e); -}; - -/* With this test class the renaming mapping estimation is tested */ -class RegisterRemappingTest : public MesaTestWithMemCtx { -protected: - void run(const std::vector& lt, - const std::vector &expect); -}; - -/* With this test class the combined lifetime estimation and renaming - * mepping estimation is tested - */ -class RegisterLifetimeAndRemappingTest : public RegisterRemappingTest { -protected: - using RegisterRemappingTest::run; - void run(const std::vector& code, const std::vector &expect); -}; - -#endif diff --git a/src/mesa/state_tracker/tests/test_glsl_to_tgsi_array_merge.cpp b/src/mesa/state_tracker/tests/test_glsl_to_tgsi_array_merge.cpp deleted file mode 100644 index a701716a09c..00000000000 --- a/src/mesa/state_tracker/tests/test_glsl_to_tgsi_array_merge.cpp +++ /dev/null @@ -1,962 +0,0 @@ -/* - * Copyright © 2017 Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - - -#include "st_tests_common.h" - -#include "tgsi/tgsi_ureg.h" -#include "tgsi/tgsi_info.h" -#include "mesa/program/prog_instruction.h" -#include "gtest/gtest.h" - -#include -#include -#include - -using std::vector; - -using namespace tgsi_array_merge; -using ArrayLiveRangeMerge=testing::Test; - -TEST_F(ArrayLiveRangeMerge, SimpleLiveRange) -{ - array_live_range a1(1, 10, 1, 5, WRITEMASK_X); - array_live_range a2(2, 5, 6, 10, WRITEMASK_X); - - array_live_range::merge(&a1, &a2); - - EXPECT_EQ(a1.array_id(), 1); - EXPECT_EQ(a1.begin(), 1); - EXPECT_EQ(a1.end(), 10); - EXPECT_EQ(a1.target_array_id(), 0); - EXPECT_EQ(a1.used_components(), 1); - EXPECT_EQ(a1.access_mask(), WRITEMASK_X); - - EXPECT_EQ(a1.remap_one_swizzle(0), 0); - EXPECT_EQ(a1.remap_one_swizzle(1), 1); - EXPECT_EQ(a1.remap_one_swizzle(2), 2); - EXPECT_EQ(a1.remap_one_swizzle(3), 3); - - EXPECT_EQ(a2.array_id(), 2); - EXPECT_EQ(a2.begin(), 6); - EXPECT_EQ(a2.end(), 10); - EXPECT_EQ(a2.target_array_id(), 1); - EXPECT_EQ(a2.used_components(), 1); - EXPECT_EQ(a2.access_mask(), WRITEMASK_X); - - EXPECT_EQ(a2.remap_one_swizzle(0), 0); - EXPECT_EQ(a2.remap_one_swizzle(1), 1); - EXPECT_EQ(a2.remap_one_swizzle(2), 2); - EXPECT_EQ(a2.remap_one_swizzle(3), 3); -} - -TEST_F(ArrayLiveRangeMerge, SimpleLiveRangeInverse) -{ - array_live_range a1(1, 5, 1, 5, WRITEMASK_X); - array_live_range a2(2, 10, 6, 10, WRITEMASK_X); - - array_live_range::merge(&a1, &a2); - - EXPECT_EQ(a1.array_id(), 1); - EXPECT_EQ(a1.begin(), 1); - EXPECT_EQ(a1.end(), 5); - EXPECT_EQ(a1.target_array_id(), 2); - EXPECT_EQ(a1.used_components(), 1); - EXPECT_EQ(a1.access_mask(), WRITEMASK_X); - - EXPECT_EQ(a1.remap_one_swizzle(0), 0); - EXPECT_EQ(a1.remap_one_swizzle(1), 1); - EXPECT_EQ(a1.remap_one_swizzle(2), 2); - EXPECT_EQ(a1.remap_one_swizzle(3), 3); - - EXPECT_EQ(a2.array_id(), 2); - EXPECT_EQ(a2.begin(), 1); - EXPECT_EQ(a2.end(), 10); - EXPECT_EQ(a2.target_array_id(), 0); - EXPECT_EQ(a2.used_components(), 1); - EXPECT_EQ(a2.access_mask(), WRITEMASK_X); - - EXPECT_EQ(a2.remap_one_swizzle(0), 0); - EXPECT_EQ(a2.remap_one_swizzle(1), 1); - EXPECT_EQ(a2.remap_one_swizzle(2), 2); - EXPECT_EQ(a2.remap_one_swizzle(3), 3); -} - - -TEST_F(ArrayLiveRangeMerge, Interleave_x_xyz) -{ - array_live_range a1(1, 10, 1, 10, WRITEMASK_X); - array_live_range a2(2, 9, 1, 10, WRITEMASK_XYZ); - - array_live_range::interleave(&a1, &a2); - - EXPECT_EQ(a1.array_id(), 1); - EXPECT_EQ(a1.begin(), 1); - EXPECT_EQ(a1.end(), 10); - EXPECT_EQ(a1.array_length(), 10u); - EXPECT_EQ(a1.target_array_id(), 0); - EXPECT_EQ(a1.used_components(), 4); - EXPECT_EQ(a1.access_mask(), WRITEMASK_XYZW); - - EXPECT_EQ(a1.remap_one_swizzle(0), 0); - EXPECT_EQ(a1.remap_one_swizzle(1), 1); - EXPECT_EQ(a1.remap_one_swizzle(2), 2); - EXPECT_EQ(a1.remap_one_swizzle(3), 3); - - EXPECT_EQ(a2.array_id(), 2); - EXPECT_EQ(a2.begin(), 1); - EXPECT_EQ(a2.end(), 10); - EXPECT_EQ(a2.target_array_id(), 1); - - EXPECT_EQ(a2.remap_one_swizzle(0), 1); - EXPECT_EQ(a2.remap_one_swizzle(1), 2); - EXPECT_EQ(a2.remap_one_swizzle(2), 3); - EXPECT_EQ(a2.remap_one_swizzle(3), -1); -} - -TEST_F(ArrayLiveRangeMerge, Interleave_xyz_x) -{ - array_live_range a1(1, 10, 1, 10, WRITEMASK_XYZ); - array_live_range a2(2, 9, 1, 10, WRITEMASK_X); - - array_live_range::interleave(&a1, &a2); - - EXPECT_EQ(a1.array_id(), 1); - EXPECT_EQ(a1.begin(), 1); - EXPECT_EQ(a1.end(), 10); - EXPECT_EQ(a1.array_length(), 10u); - EXPECT_EQ(a1.target_array_id(), 0); - EXPECT_EQ(a1.used_components(), 4); - EXPECT_EQ(a1.access_mask(), WRITEMASK_XYZW); - - EXPECT_EQ(a1.remap_one_swizzle(0), 0); - EXPECT_EQ(a1.remap_one_swizzle(1), 1); - EXPECT_EQ(a1.remap_one_swizzle(2), 2); - EXPECT_EQ(a1.remap_one_swizzle(3), 3); - - EXPECT_EQ(a2.array_id(), 2); - EXPECT_EQ(a2.begin(), 1); - EXPECT_EQ(a2.end(), 10); - EXPECT_EQ(a2.target_array_id(), 1); - - EXPECT_EQ(a2.remap_one_swizzle(0), 3); - EXPECT_EQ(a2.remap_one_swizzle(1), -1); - EXPECT_EQ(a2.remap_one_swizzle(2), -1); - EXPECT_EQ(a2.remap_one_swizzle(3), -1); -} - - -TEST_F(ArrayLiveRangeMerge, SimpleInterleave) -{ - array_live_range a1(1, 10, 1, 10, WRITEMASK_X); - array_live_range a2(2, 9, 1, 10, WRITEMASK_X); - - array_live_range::interleave(&a1, &a2); - - EXPECT_EQ(a1.array_id(), 1); - EXPECT_EQ(a1.begin(), 1); - EXPECT_EQ(a1.end(), 10); - EXPECT_EQ(a1.array_length(), 10u); - EXPECT_EQ(a1.target_array_id(), 0); - EXPECT_EQ(a1.used_components(), 2); - EXPECT_EQ(a1.access_mask(), WRITEMASK_XY); - - EXPECT_EQ(a1.remap_one_swizzle(0), 0); - EXPECT_EQ(a1.remap_one_swizzle(1), 1); - EXPECT_EQ(a1.remap_one_swizzle(2), 2); - EXPECT_EQ(a1.remap_one_swizzle(3), 3); - - EXPECT_EQ(a2.array_id(), 2); - EXPECT_EQ(a2.begin(), 1); - EXPECT_EQ(a2.end(), 10); - EXPECT_EQ(a2.target_array_id(), 1); - - EXPECT_EQ(a2.remap_one_swizzle(0), 1); - EXPECT_EQ(a2.remap_one_swizzle(1), -1); - EXPECT_EQ(a2.remap_one_swizzle(2), -1); - EXPECT_EQ(a2.remap_one_swizzle(3), -1); -} - - -TEST_F(ArrayLiveRangeMerge, SimpleInterleaveInverse) -{ - array_live_range a1(1, 8, 1, 10, WRITEMASK_X); - array_live_range a2(2, 9, 1, 10, WRITEMASK_X); - - array_live_range::interleave(&a1, &a2); - - EXPECT_EQ(a1.array_id(), 1); - EXPECT_EQ(a1.begin(), 1); - EXPECT_EQ(a1.end(), 10); - EXPECT_EQ(a1.target_array_id(), 2); - - EXPECT_EQ(a1.remap_one_swizzle(0), 1); - EXPECT_EQ(a1.remap_one_swizzle(1), -1); - EXPECT_EQ(a1.remap_one_swizzle(2), -1); - EXPECT_EQ(a1.remap_one_swizzle(3), -1); - - EXPECT_EQ(a2.array_id(), 2); - EXPECT_EQ(a2.target_array_id(), 0); - EXPECT_EQ(a2.begin(), 1); - EXPECT_EQ(a2.end(), 10); - EXPECT_EQ(a2.array_length(), 9u); - EXPECT_EQ(a2.used_components(), 2); - EXPECT_EQ(a2.access_mask(), WRITEMASK_XY); -} - - -TEST_F(ArrayLiveRangeMerge, InterleaveRiveRangeExtend) -{ - array_live_range a1(1, 10, 2, 9, WRITEMASK_X); - array_live_range a2(2, 9, 1, 10, WRITEMASK_X); - - array_live_range::interleave(&a1, &a2); - - EXPECT_EQ(a1.array_id(), 1); - EXPECT_EQ(a1.begin(), 1); - EXPECT_EQ(a1.end(), 10); - EXPECT_EQ(a1.array_length(), 10u); - EXPECT_EQ(a1.target_array_id(), 0); - EXPECT_EQ(a1.used_components(), 2); - EXPECT_EQ(a1.access_mask(), WRITEMASK_XY); - - EXPECT_EQ(a1.remap_one_swizzle(0), 0); - EXPECT_EQ(a1.remap_one_swizzle(1), 1); - EXPECT_EQ(a1.remap_one_swizzle(2), 2); - EXPECT_EQ(a1.remap_one_swizzle(3), 3); - - EXPECT_EQ(a2.array_id(), 2); - EXPECT_EQ(a2.begin(), 1); - EXPECT_EQ(a2.end(), 10); - EXPECT_EQ(a2.target_array_id(), 1); - - EXPECT_EQ(a2.remap_one_swizzle(0), 1); - EXPECT_EQ(a2.remap_one_swizzle(1), -1); - EXPECT_EQ(a2.remap_one_swizzle(2), -1); - EXPECT_EQ(a2.remap_one_swizzle(3), -1); -} - -TEST_F(ArrayLiveRangeMerge, InterleaveLiveRangeExtendInverse) -{ - array_live_range a1(1, 8, 2, 11, WRITEMASK_X); - array_live_range a2(2, 9, 1, 10, WRITEMASK_X); - - array_live_range::interleave(&a1, &a2); - - EXPECT_EQ(a1.array_id(), 1); - EXPECT_EQ(a1.begin(), 2); - EXPECT_EQ(a1.end(), 11); - EXPECT_EQ(a1.target_array_id(), 2); - EXPECT_EQ(a1.used_components(), 1); - EXPECT_EQ(a1.access_mask(), WRITEMASK_X); - - EXPECT_EQ(a1.remap_one_swizzle(0), 1); - EXPECT_EQ(a1.remap_one_swizzle(1), -1); - EXPECT_EQ(a1.remap_one_swizzle(2), -1); - EXPECT_EQ(a1.remap_one_swizzle(3), -1); - - EXPECT_EQ(a2.array_id(), 2); - EXPECT_EQ(a2.begin(), 1); - EXPECT_EQ(a2.end(), 11); - EXPECT_EQ(a2.target_array_id(), 0); - EXPECT_EQ(a2.used_components(), 2); - EXPECT_EQ(a2.access_mask(), WRITEMASK_XY); - - EXPECT_EQ(a2.remap_one_swizzle(0), 0); - EXPECT_EQ(a2.remap_one_swizzle(1), 1); - EXPECT_EQ(a2.remap_one_swizzle(2), 2); - EXPECT_EQ(a2.remap_one_swizzle(3), 3); -} - -TEST_F(ArrayLiveRangeMerge, InterleaveChained) -{ - array_live_range a1(1, 8, 2, 11, WRITEMASK_X); - array_live_range a2(2, 9, 1, 10, WRITEMASK_X); - array_live_range a3(3, 10, 1, 10, WRITEMASK_X); - - array_live_range::interleave(&a1, &a2); - array_live_range::interleave(&a2, &a3); - - EXPECT_EQ(a1.array_id(), 1); - EXPECT_EQ(a1.begin(), 2); - EXPECT_EQ(a1.end(), 11); - EXPECT_EQ(a1.target_array_id(), 2); - EXPECT_EQ(a1.used_components(), 1); - EXPECT_EQ(a1.access_mask(), WRITEMASK_X); - - EXPECT_EQ(a1.remap_one_swizzle(0), 2); - EXPECT_EQ(a1.remap_one_swizzle(1), -1); - EXPECT_EQ(a1.remap_one_swizzle(2), -1); - EXPECT_EQ(a1.remap_one_swizzle(3), -1); - - EXPECT_EQ(a2.array_id(), 2); - EXPECT_EQ(a2.begin(), 1); - EXPECT_EQ(a2.end(), 11); - EXPECT_EQ(a2.target_array_id(), 3); - EXPECT_EQ(a2.used_components(), 2); - EXPECT_EQ(a2.access_mask(), WRITEMASK_XY); - - EXPECT_EQ(a2.remap_one_swizzle(0), 1); - EXPECT_EQ(a2.remap_one_swizzle(1), 2); - EXPECT_EQ(a2.remap_one_swizzle(2), -1); - EXPECT_EQ(a2.remap_one_swizzle(3), -1); - - EXPECT_EQ(a3.array_id(), 3); - EXPECT_EQ(a3.begin(), 1); - EXPECT_EQ(a3.end(), 11); - EXPECT_EQ(a3.target_array_id(), 0); - EXPECT_EQ(a3.used_components(), 3); - EXPECT_EQ(a3.access_mask(), WRITEMASK_XYZ); - - EXPECT_EQ(a3.remap_one_swizzle(0), 0); - EXPECT_EQ(a3.remap_one_swizzle(1), 1); - EXPECT_EQ(a3.remap_one_swizzle(2), 2); - EXPECT_EQ(a3.remap_one_swizzle(3), 3); -} - -TEST_F(ArrayLiveRangeMerge, MergeInterleaveChained) -{ - array_live_range a1(1, 8, 1, 5, WRITEMASK_X); - array_live_range a2(2, 9, 6, 10, WRITEMASK_X); - array_live_range a3(3, 10, 1, 10, WRITEMASK_X); - - array_live_range::merge(&a1, &a2); - array_live_range::interleave(&a2, &a3); - - EXPECT_EQ(a1.array_id(), 1); - EXPECT_EQ(a1.begin(), 1); - EXPECT_EQ(a1.end(), 5); - EXPECT_EQ(a1.target_array_id(), 2); - EXPECT_EQ(a1.used_components(), 1); - EXPECT_EQ(a1.access_mask(), WRITEMASK_X); - - EXPECT_EQ(a1.remap_one_swizzle(0), 1); - EXPECT_EQ(a1.remap_one_swizzle(1), -1); - EXPECT_EQ(a1.remap_one_swizzle(2), -1); - EXPECT_EQ(a1.remap_one_swizzle(3), -1); - - EXPECT_EQ(a2.array_id(), 2); - EXPECT_EQ(a2.begin(), 1); - EXPECT_EQ(a2.end(), 10); - EXPECT_EQ(a2.target_array_id(), 3); - EXPECT_EQ(a2.used_components(), 1); - EXPECT_EQ(a2.access_mask(), WRITEMASK_X); - - EXPECT_EQ(a2.remap_one_swizzle(0), 1); - EXPECT_EQ(a2.remap_one_swizzle(1), -1); - EXPECT_EQ(a2.remap_one_swizzle(2), -1); - EXPECT_EQ(a2.remap_one_swizzle(3), -1); - - EXPECT_EQ(a3.array_id(), 3); - EXPECT_EQ(a3.begin(), 1); - EXPECT_EQ(a3.end(), 10); - EXPECT_EQ(a3.target_array_id(), 0); - EXPECT_EQ(a3.used_components(), 2); - EXPECT_EQ(a3.access_mask(), WRITEMASK_XY); - - EXPECT_EQ(a3.remap_one_swizzle(0), 0); - EXPECT_EQ(a3.remap_one_swizzle(1), 1); - EXPECT_EQ(a3.remap_one_swizzle(2), 2); - EXPECT_EQ(a3.remap_one_swizzle(3), 3); -} - -TEST_F(ArrayLiveRangeMerge, MergeMergeAndInterleave) -{ - array_live_range a1(1, 5, 1, 5, WRITEMASK_X); - array_live_range a2(2, 4, 6, 7, WRITEMASK_X); - array_live_range a3(3, 3, 1, 5, WRITEMASK_X); - array_live_range a4(4, 2, 6, 8, WRITEMASK_X); - - array_live_range::merge(&a1, &a2); - array_live_range::merge(&a3, &a4); - array_live_range::interleave(&a1, &a3); - - EXPECT_EQ(a1.array_id(), 1); - EXPECT_EQ(a1.begin(), 1); - EXPECT_EQ(a1.end(), 8); - EXPECT_EQ(a1.target_array_id(), 0); - EXPECT_EQ(a1.used_components(), 2); - EXPECT_EQ(a1.access_mask(), WRITEMASK_XY); - - EXPECT_EQ(a1.remap_one_swizzle(0), 0); - EXPECT_EQ(a1.remap_one_swizzle(1), 1); - EXPECT_EQ(a1.remap_one_swizzle(2), 2); - EXPECT_EQ(a1.remap_one_swizzle(3), 3); - - EXPECT_EQ(a2.array_id(), 2); - EXPECT_EQ(a2.begin(), 6); - EXPECT_EQ(a2.end(), 7); - EXPECT_EQ(a2.target_array_id(), 1); - EXPECT_EQ(a2.used_components(), 1); - EXPECT_EQ(a2.access_mask(), WRITEMASK_X); - - EXPECT_EQ(a2.remap_one_swizzle(0), 0); - EXPECT_EQ(a2.remap_one_swizzle(1), 1); - EXPECT_EQ(a2.remap_one_swizzle(2), 2); - EXPECT_EQ(a2.remap_one_swizzle(3), 3); - - EXPECT_EQ(a3.array_id(), 3); - EXPECT_EQ(a3.begin(), 1); - EXPECT_EQ(a3.end(), 8); - EXPECT_EQ(a3.target_array_id(), 1); - EXPECT_EQ(a3.used_components(), 1); - EXPECT_EQ(a3.access_mask(), WRITEMASK_X); - - EXPECT_EQ(a3.remap_one_swizzle(0), 1); - EXPECT_EQ(a3.remap_one_swizzle(1), -1); - EXPECT_EQ(a3.remap_one_swizzle(2), -1); - EXPECT_EQ(a3.remap_one_swizzle(3), -1); - - EXPECT_EQ(a4.array_id(), 4); - EXPECT_EQ(a4.begin(), 6); - EXPECT_EQ(a4.end(), 8); - EXPECT_EQ(a4.target_array_id(), 3); - EXPECT_EQ(a4.used_components(), 1); - EXPECT_EQ(a4.access_mask(), WRITEMASK_X); - - EXPECT_EQ(a4.remap_one_swizzle(0), 1); - EXPECT_EQ(a4.remap_one_swizzle(1), -1); - EXPECT_EQ(a4.remap_one_swizzle(2), -1); - EXPECT_EQ(a4.remap_one_swizzle(3), -1); - -} - - -TEST_F(ArrayLiveRangeMerge, MergeInterleaveMergeInterleaveChained) -{ - array_live_range a1(1, 8, 1, 5, WRITEMASK_X); - array_live_range a2(2, 9, 6, 10, WRITEMASK_X); - array_live_range a3(3, 10, 1, 10, WRITEMASK_X); - array_live_range a4(4, 11, 11, 20, WRITEMASK_XY); - array_live_range a5(5, 15, 5, 20, WRITEMASK_XY); - - array_live_range::merge(&a1, &a2); - array_live_range::interleave(&a2, &a3); // a2 -> a3 - array_live_range::merge(&a3, &a4); - array_live_range::interleave(&a4, &a5); // a4 -> a5 - - - EXPECT_EQ(a1.array_id(), 1); - EXPECT_EQ(a1.begin(), 1); - EXPECT_EQ(a1.end(), 5); - EXPECT_EQ(a1.target_array_id(), 2); - EXPECT_EQ(a1.used_components(), 1); - EXPECT_EQ(a1.access_mask(), WRITEMASK_X); - - EXPECT_EQ(a1.remap_one_swizzle(0), 3); - EXPECT_EQ(a1.remap_one_swizzle(1), -1); - EXPECT_EQ(a1.remap_one_swizzle(2), -1); - EXPECT_EQ(a1.remap_one_swizzle(3), -1); - - EXPECT_EQ(a2.array_id(), 2); - EXPECT_EQ(a2.begin(), 1); - EXPECT_EQ(a2.end(), 10); - EXPECT_EQ(a2.target_array_id(), 3); - EXPECT_EQ(a2.used_components(), 1); - EXPECT_EQ(a2.access_mask(), WRITEMASK_X); - - EXPECT_EQ(a2.remap_one_swizzle(0), 3); - EXPECT_EQ(a2.remap_one_swizzle(1), -1); - EXPECT_EQ(a2.remap_one_swizzle(2), -1); - EXPECT_EQ(a2.remap_one_swizzle(3), -1); - - EXPECT_EQ(a3.array_id(), 3); - EXPECT_EQ(a3.begin(), 1); - EXPECT_EQ(a3.end(), 10); - EXPECT_EQ(a3.target_array_id(), 4); - EXPECT_EQ(a3.used_components(), 2); - EXPECT_EQ(a3.access_mask(), WRITEMASK_XY); - - EXPECT_EQ(a3.remap_one_swizzle(0), 2); - EXPECT_EQ(a3.remap_one_swizzle(1), 3); - EXPECT_EQ(a3.remap_one_swizzle(2), -1); - EXPECT_EQ(a3.remap_one_swizzle(3), -1); - - EXPECT_EQ(a4.array_id(), 4); - EXPECT_EQ(a4.begin(), 1); - EXPECT_EQ(a4.end(), 20); - EXPECT_EQ(a4.target_array_id(), 5); - EXPECT_EQ(a4.used_components(), 2); - EXPECT_EQ(a4.access_mask(), WRITEMASK_XY); - - EXPECT_EQ(a4.remap_one_swizzle(0), 2); - EXPECT_EQ(a4.remap_one_swizzle(1), 3); - EXPECT_EQ(a4.remap_one_swizzle(2), -1); - EXPECT_EQ(a4.remap_one_swizzle(3), -1); - - EXPECT_EQ(a5.array_id(), 5); - EXPECT_EQ(a5.begin(), 1); - EXPECT_EQ(a5.end(), 20); - EXPECT_EQ(a5.target_array_id(), 0); - EXPECT_EQ(a5.used_components(), 4); - EXPECT_EQ(a5.access_mask(), WRITEMASK_XYZW); - - EXPECT_EQ(a5.remap_one_swizzle(0), 0); - EXPECT_EQ(a5.remap_one_swizzle(1), 1); - EXPECT_EQ(a5.remap_one_swizzle(2), 2); - EXPECT_EQ(a5.remap_one_swizzle(3), 3); -} - -using ArrayMergeTest=testing::Test; - -TEST_F(ArrayMergeTest, ArrayMergeTwoSwizzles) -{ - vector alt = { - {1, 4, 1, 5, WRITEMASK_X}, - {2, 4, 2, 5, WRITEMASK_X}, - }; - - int8_t expect_swizzle[] = {1, -1, -1, -1}; - vector expect = { - {}, - {1, expect_swizzle}, - }; - - vector result(alt.size() + 1); - - get_array_remapping(2, &alt[0], &result[0]); - - EXPECT_EQ(result[1], expect[0]); - EXPECT_EQ(result[2], expect[1]); - -} - -TEST_F(ArrayMergeTest, ArrayMergeFourSwizzles) -{ - vector alt = { - {1, 8, 1, 7, WRITEMASK_X}, - {2, 7, 2, 7, WRITEMASK_X}, - {3, 6, 3, 7, WRITEMASK_X}, - {4, 5, 4, 7, WRITEMASK_X}, - }; - int8_t expect_swizzle1[] = {1, -1, -1, -1}; - int8_t expect_swizzle2[] = {2, -1, -1, -1}; - int8_t expect_swizzle3[] = {3, -1, -1, -1}; - - vector expect = { - {}, - {1, expect_swizzle1}, - {1, expect_swizzle2}, - {1, expect_swizzle3}, - }; - - vector result(alt.size() + 1); - - get_array_remapping(4, &alt[0], &result[0]); - - EXPECT_EQ(result[1], expect[0]); - EXPECT_EQ(result[2], expect[1]); - EXPECT_EQ(result[3], expect[2]); - EXPECT_EQ(result[4], expect[3]); - -} - - -TEST_F(ArrayMergeTest, SimpleChainMerge) -{ - vector input = { - {1, 3, 1, 5, WRITEMASK_XYZW}, - {2, 2, 6, 7, WRITEMASK_XYZW}, - }; - - int8_t expect_swizzle[] = {0, 1, 2, 3}; - vector expect = { - {}, - {1, expect_swizzle}, - }; - - vector result(3); - get_array_remapping(2, &input[0], &result[0]); - - EXPECT_EQ(result[1], expect[0]); - EXPECT_EQ(result[2], expect[1]); -} - -TEST_F(ArrayMergeTest, MergeAndInterleave) -{ - vector input = { - {1, 5, 1, 5, WRITEMASK_X}, - {2, 4, 6, 7, WRITEMASK_X}, - {3, 3, 1, 5, WRITEMASK_X}, - {4, 2, 6, 7, WRITEMASK_X}, - }; - - int8_t expect_swizzle1[] = {0, 1, 2, 3}; - int8_t expect_swizzle2[] = {1, -1, -1, -1}; - int8_t expect_swizzle3[] = {1, -1, -1, -1}; - - vector expect = { - {}, - {1, expect_swizzle1}, - {1, expect_swizzle2}, - {1, expect_swizzle3} - }; - vector result(input.size() + 1); - get_array_remapping(input.size(), &input[0], &result[0]); - - EXPECT_EQ(result[1], expect[0]); - EXPECT_EQ(result[2], expect[1]); - EXPECT_EQ(result[3], expect[2]); - EXPECT_EQ(result[4], expect[3]); -} - -TEST_F(ArrayMergeTest, MergeAndInterleave2) -{ - vector input = { - {1, 5, 1, 5, WRITEMASK_X}, - {2, 4, 6, 7, WRITEMASK_X}, - {3, 3, 1, 8, WRITEMASK_XY}, - {4, 2, 6, 7, WRITEMASK_X}, - }; - - int8_t expect_swizzle1[] = {0, 1, 2, 3}; - int8_t expect_swizzle2[] = {1, 2, -1, -1}; - int8_t expect_swizzle3[] = {3, -1, -1, -1}; - - vector expect = { - {}, - {1, expect_swizzle1}, - {1, expect_swizzle2}, - {1, expect_swizzle3} - }; - vector result(input.size() + 1); - get_array_remapping(input.size(), &input[0], &result[0]); - - EXPECT_EQ(result[1], expect[0]); - EXPECT_EQ(result[2], expect[1]); - EXPECT_EQ(result[3], expect[2]); - EXPECT_EQ(result[4], expect[3]); -} - - -TEST_F(ArrayMergeTest, MergeAndInterleave3) -{ - vector input = { - {1, 5, 1, 5, WRITEMASK_X}, - {2, 4, 6, 7, WRITEMASK_XY}, - {3, 3, 1, 5, WRITEMASK_X} - }; - - int8_t expect_swizzle1[] = {0, 1, 2, 3}; - int8_t expect_swizzle2[] = {1, -1, -1, -1}; - - vector expect = { - {}, - {1, expect_swizzle1}, - {1, expect_swizzle2} - }; - vector result(input.size() + 1); - get_array_remapping(input.size(), &input[0], &result[0]); - - EXPECT_EQ(result[1], expect[0]); - EXPECT_EQ(result[2], expect[1]); - EXPECT_EQ(result[3], expect[2]); -} - -TEST_F(ArrayMergeTest, MergeAndInterleave4) -{ - vector input = { - {1, 7, 1, 5, WRITEMASK_X}, - {2, 6, 6, 7, WRITEMASK_XY}, - {3, 5, 1, 5, WRITEMASK_X}, - {4, 4, 8, 9, WRITEMASK_XYZ}, - {5, 3, 8, 9, WRITEMASK_W}, - {6, 2, 10, 11, WRITEMASK_XYZW}, - }; - - int8_t expect_swizzle1[] = {0, 1, 2, 3}; - int8_t expect_swizzle2[] = {1, -1, -1, -1}; - int8_t expect_swizzle3[] = {0, 1, 2, 3}; - int8_t expect_swizzle4[] = {-1, -1, -1, 3}; - int8_t expect_swizzle5[] = {0, 1, 2, 3}; - - vector expect = { - {}, - {1, expect_swizzle1}, - {1, expect_swizzle2}, - {1, expect_swizzle3}, /* W from below will be interleaved in */ - {1, expect_swizzle4}, - {1, expect_swizzle5} - }; - vector result(input.size() + 1); - get_array_remapping(input.size(), &input[0], &result[0]); - - EXPECT_EQ(result[1], expect[0]); - EXPECT_EQ(result[2], expect[1]); - EXPECT_EQ(result[3], expect[2]); - EXPECT_EQ(result[4], expect[3]); - EXPECT_EQ(result[5], expect[4]); - EXPECT_EQ(result[6], expect[5]); - -} - -TEST_F(ArrayMergeTest, MergeAndInterleave5) -{ - vector input = { - {1, 7, 1, 5, WRITEMASK_X}, - {2, 6, 1, 3, WRITEMASK_X}, - {3, 5, 4, 5, WRITEMASK_X}, - {4, 4, 6, 10, WRITEMASK_XY}, - {5, 8, 1, 10, WRITEMASK_XY} - }; - - /* 1. merge 3 into 2 - * 2. interleave 2 into 1 (x -> y) --- (y -> w) - * 3. merge 4 into 1 / - * 4. interleave 1 into 5 (x,y - z,w) - */ - - /* swizzle1 holds the summary mask */ - int8_t expect_swizzle1[] = {2, 3, -1, -1}; - int8_t expect_swizzle2[] = {3, -1, -1, -1}; - int8_t expect_swizzle3[] = {3, -1, -1, -1}; - int8_t expect_swizzle4[] = {2, 3, -1, -1}; - - vector expect = { - {5, expect_swizzle1}, - {5, expect_swizzle2}, - {5, expect_swizzle3}, - {5, expect_swizzle4}, - {} - }; - vector result(input.size() + 1); - get_array_remapping(input.size(), &input[0], &result[0]); - - EXPECT_EQ(result[1], expect[0]); - EXPECT_EQ(result[2], expect[1]); - EXPECT_EQ(result[3], expect[2]); - EXPECT_EQ(result[4], expect[3]); - EXPECT_EQ(result[5], expect[4]); - -} - -/* Test two arrays life time simple */ -TEST_F(LifetimeEvaluatorExactTest, TwoArraysSimple) -{ - const vector code = { - { TGSI_OPCODE_MOV , {MT(1, 1, WRITEMASK_XYZW)}, {MT(0, in0, "")}, {}, ARR()}, - { TGSI_OPCODE_MOV , {MT(2, 1, WRITEMASK_XYZW)}, {MT(0, in1, "")}, {}, ARR()}, - { TGSI_OPCODE_ADD , {MT(0,out0, WRITEMASK_XYZW)}, {MT(1,1,"xyzw"), MT(2,1,"xyzw")}, {}, ARR()}, - { TGSI_OPCODE_END} - }; - run (code, array_lt_expect({{1,2,0,2, WRITEMASK_XYZW}, {2,2,1,2, WRITEMASK_XYZW}})); -} - -/* Test two arrays life time simple */ -TEST_F(LifetimeEvaluatorExactTest, TwoArraysSimpleSwizzleX_Y) -{ - const vector code = { - { TGSI_OPCODE_MOV , {MT(1, 1, WRITEMASK_X)}, {MT(0, in0, "")}, {}, ARR()}, - { TGSI_OPCODE_MOV , {MT(2, 1, WRITEMASK_Y)}, {MT(0, in1, "")}, {}, ARR()}, - { TGSI_OPCODE_ADD , {MT(0,out0,1)}, {MT(1,1,"x"), MT(2,1,"y")}, {}, ARR()}, - { TGSI_OPCODE_END} - }; - run (code, array_lt_expect({{1, 2, 0, 2, WRITEMASK_X}, {2, 2, 1, 2, WRITEMASK_Y}})); -} - -/* Test array written before loop and read inside, must survive the loop */ -TEST_F(LifetimeEvaluatorExactTest, ArraysWriteBeforLoopReadInside) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in1}, {}}, - { TGSI_OPCODE_MOV, {MT(1, 1, WRITEMASK_X)}, {MT(0, in0, "")}, {}, ARR()}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_ADD, {MT(0,1, WRITEMASK_X)}, {MT(1,1,"x"), {MT(0,1, "x")}}, {}, ARR()}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, array_lt_expect({{1, 1, 1, 4, WRITEMASK_X}})); -} - -/* Test array written conditionally in loop must survive the whole loop */ -TEST_F(LifetimeEvaluatorExactTest, ArraysConditionalWriteInNestedLoop) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in1}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {1}, {}}, - { TGSI_OPCODE_MOV, {MT(1, 1, WRITEMASK_Z)}, {MT(0, in0, "")}, {}, ARR()}, - { TGSI_OPCODE_ENDIF }, - { TGSI_OPCODE_ADD, {MT(0,1, WRITEMASK_X)}, {MT(1,1,"z"), {MT(0,1, "x")}}, {}, ARR()}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, array_lt_expect({{1, 1, 1, 8, WRITEMASK_Z}})); -} - -/* Test array read conditionally in loop before write must - * survive the whole loop - */ -TEST_F(LifetimeEvaluatorExactTest, ArraysConditionalReadBeforeWriteInNestedLoop) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in1}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {1}, {}}, - { TGSI_OPCODE_ADD, {MT(0,1, WRITEMASK_X)}, {MT(1,1,"z"), {MT(0,1, "x")}}, {}, ARR()}, - { TGSI_OPCODE_ENDIF }, - { TGSI_OPCODE_MOV, {MT(1, 1, WRITEMASK_Z)}, {MT(0, in0, "")}, {}, ARR()}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, array_lt_expect({{1, 1, 1, 8, WRITEMASK_Z}})); -} - - -/* Test array written conditionally in loop must survive the whole loop */ -TEST_F(LifetimeEvaluatorExactTest, ArraysConditionalWriteInNestedLoop2) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in1}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {1}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_MOV, {MT(1, 1, WRITEMASK_Z)}, {MT(0, in0, "")}, {}, ARR()}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_ENDIF }, - { TGSI_OPCODE_ADD, {MT(0,1, WRITEMASK_X)}, {MT(1,1,"z"), {MT(0,1, "x")}}, {}, ARR()}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, array_lt_expect({{1, 1, 1, 10, WRITEMASK_Z}})); -} - - -/* Test distinct loops */ -TEST_F(LifetimeEvaluatorExactTest, ArraysReadWriteInSeparateScopes) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in1}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_MOV, {MT(1, 1, WRITEMASK_W)}, {MT(0, in0, "")}, {}, ARR()}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_ADD, {MT(0,1, WRITEMASK_X)}, {MT(1,1,"w"), {MT(0,1, "x")}}, {}, ARR()}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, array_lt_expect({{1, 1, 2, 6, WRITEMASK_W}})); -} - -class ArrayRemapTest: public MesaTestWithMemCtx { - -public: - void run (const vector& code, - const vector& expect, - vector array_sizes, - vector& remapping) const; - - -}; - -TEST_F(ArrayRemapTest, ApplyMerge) -{ - vector array_sizes{0, 12, 11, 10, 9, 8, 7}; - - int8_t set_swizzle3[] = {1, -1, -1, -1}; - int8_t set_swizzle5[] = {3, -1, -1, -1}; - int8_t set_no_reswizzle[] = {0, 1, 2, 3}; - - vector remapping = { - {}, - array_remapping(), - {1, set_no_reswizzle}, - {1, set_swizzle3}, - {1, set_no_reswizzle}, - {1, set_swizzle5}, - {1, set_no_reswizzle} - }; - - const vector code = { - { TGSI_OPCODE_MOV, {MT(1, 1, WRITEMASK_X)}, {MT(0, in0, "x")}, {}, ARR()}, - { TGSI_OPCODE_MOV, {MT(2, 2, WRITEMASK_XY)}, {MT(0, in0, "xy")}, {}, ARR()}, - { TGSI_OPCODE_MOV, {MT(3, 3, WRITEMASK_X)}, {MT(0, in0, "x")}, {}, ARR()}, - { TGSI_OPCODE_MOV, {MT(4, 4, WRITEMASK_XYZ)}, {MT(0, in0, "xyz")}, {}, ARR()}, - { TGSI_OPCODE_MOV, {MT(5, 5, WRITEMASK_X)}, {MT(0, in0, "x")}, {}, ARR()}, - { TGSI_OPCODE_MOV, {MT(6, 6, WRITEMASK_XYZW)}, {MT(0, in0, "xyzw")}, {}, ARR()}, - - { TGSI_OPCODE_ADD, {MT(0, out0, WRITEMASK_X)}, {MT(1, 1, "x"), MT(0, in0, "y")}, {}, ARR()}, - { TGSI_OPCODE_ADD, {MT(0, out0, WRITEMASK_YZ)}, {MT(2, 2, "xy"), MT(0, in0, "yz")}, {}, ARR()}, - { TGSI_OPCODE_MUL, {MT(0, out0, WRITEMASK_W)}, {MT(3, 3, "x"), MT(0, in0, "x")}, {}, ARR()}, - { TGSI_OPCODE_ADD, {MT(0, out1, WRITEMASK_XYZ)}, {MT(4, 4, "xyz"), MT(0, in0, "xyz")}, {}, ARR()}, - { TGSI_OPCODE_MAD, {MT(0, out1, WRITEMASK_W)}, {MT(5, 5, "x"), MT(3, 1, "x"), MT(1, 1, "x")}, {}, ARR()}, - { TGSI_OPCODE_ADD, {MT(0, out2, WRITEMASK_XYZW)}, {MT(6, 6, "xyzw"), MT(0, in0, "xyzw")}, {}, ARR()}, - - { TGSI_OPCODE_END} - }; - - const vector expect = { - { TGSI_OPCODE_MOV, {MT(1, 1, WRITEMASK_X)}, {MT(0, in0, "x")}, {}, ARR()}, - { TGSI_OPCODE_MOV, {MT(1, 2, WRITEMASK_XY)}, {MT(0, in0, "xy")}, {}, ARR()}, - { TGSI_OPCODE_MOV, {MT(1, 3, WRITEMASK_Y)}, {MT(0, in0, "xx")}, {}, ARR()}, - { TGSI_OPCODE_MOV, {MT(1, 4, WRITEMASK_XYZ)}, {MT(0, in0, "xyz")}, {}, ARR()}, - { TGSI_OPCODE_MOV, {MT(1, 5, WRITEMASK_W)}, {MT(0, in0, "xxxx")}, {}, ARR()}, - { TGSI_OPCODE_MOV, {MT(1, 6, WRITEMASK_XYZW)}, {MT(0, in0, "xyzw")}, {}, ARR()}, - - { TGSI_OPCODE_ADD, {MT(0, out0, WRITEMASK_X)}, {MT(1, 1, "x"), MT(0, in0, "y")}, {}, ARR()}, - { TGSI_OPCODE_ADD, {MT(0, out0, WRITEMASK_YZ)}, {MT(1, 2, "xy"), MT(0, in0, "yz")}, {}, ARR()}, - { TGSI_OPCODE_MUL, {MT(0, out0, WRITEMASK_W)}, {MT(1, 3, "y"), MT(0, in0, "xx")}, {}, ARR()}, - { TGSI_OPCODE_ADD, {MT(0, out1, WRITEMASK_XYZ)}, {MT(1, 4, "xyz"), MT(0, in0, "xyz")}, {}, ARR()}, - { TGSI_OPCODE_MAD, {MT(0, out1, WRITEMASK_W)}, {MT(1, 5, "w"), MT(1, 1, "yyyy"), MT(1, 1, "xxxx")}, {}, ARR()}, - { TGSI_OPCODE_ADD, {MT(0, out2, WRITEMASK_XYZW)}, {MT(1, 6, "xyzw"), MT(0, in0, "xyzw")}, {}, ARR()}, - { TGSI_OPCODE_END} - }; - - run(code, expect, array_sizes, remapping); - -} - -void ArrayRemapTest::run (const vector& code, - const vector& expect, - vector array_sizes, - vector& remapping) const -{ - FakeShader input(code); - FakeShader expect_shader(expect); - exec_list *program = input.get_program(mem_ctx); - - int n_arrays = remap_arrays(array_sizes.size() - 1, &array_sizes[0], - program, &remapping[0]); - - EXPECT_EQ(n_arrays, expect_shader.get_num_arrays()); - - FakeShader remapped_program(program); - - ASSERT_EQ(remapped_program.length(), expect_shader.length()); - - for (size_t i = 0; i < expect_shader.length(); i++) { - EXPECT_EQ(remapped_program.line(i), expect_shader.line(i)); - } - -} diff --git a/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp b/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp deleted file mode 100644 index 4b78ccb356b..00000000000 --- a/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp +++ /dev/null @@ -1,1846 +0,0 @@ -/* - * Copyright © 2017 Gert Wollny - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "tgsi/tgsi_ureg.h" -#include "tgsi/tgsi_info.h" -#include "mesa/program/prog_instruction.h" - -#include -#include -#include -#include - -#include "st_tests_common.h" - -using std::vector; -using std::pair; -using std::make_pair; -using std::transform; -using std::copy; - - -TEST_F(LifetimeEvaluatorExactTest, SimpleMoveAdd) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_UADD, {out0}, {1,in0}, {}}, - { TGSI_OPCODE_END} - }; - run(code, temp_lt_expect({{-1,-1}, {0,1}})); -} - -TEST_F(LifetimeEvaluatorExactTest, SimpleMoveAddMove) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_UADD, {2}, {1,in0}, {}}, - { TGSI_OPCODE_MOV, {out0}, {2}, {}}, - { TGSI_OPCODE_END} - }; - run(code, temp_lt_expect({{-1, -1}, {0,1}, {1,2}})); -} - -/* Test whether the texoffst are actually visited by the - * merge algorithm. Note that it is of no importance - * what instruction is actually used, the MockShader class - * does not consider the details of the operation, only - * the number of arguments is of importance. - */ -TEST_F(LifetimeEvaluatorExactTest, SimpleOpWithTexoffset) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_MOV, {2}, {in1}, {}}, - { TGSI_OPCODE_TEX, {out0}, {in0}, {1,2}}, - { TGSI_OPCODE_END} - }; - run(code, temp_lt_expect({{-1, -1}, {0,2}, {1,2}})); -} - -/* Simple register access involving a loop - * 1: must life up to then end of the loop - * 2: only needs to life from write to read - * 3: only needs to life from write to read outside the loop - */ -TEST_F(LifetimeEvaluatorExactTest, SimpleMoveInLoop) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_UADD, {2}, {1,in0}, {}}, - { TGSI_OPCODE_UADD, {3}, {1,2}, {}}, - { TGSI_OPCODE_UADD, {3}, {3,in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {3}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,5}, {2,3}, {3,6}})); -} - -/* In loop if/else value written only in one path, and read later - * - value must survive the whole loop. - */ -TEST_F(LifetimeEvaluatorExactTest, MoveInIfInLoop) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in1}, {}}, - { TGSI_OPCODE_UADD, {2}, {1,in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_UADD, {3}, {1,2}, {}}, - { TGSI_OPCODE_UADD, {3}, {3,in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {3}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,7}, {1,7}, {5,8}})); -} - -/* A non-dominant write within an IF can be ignored (if it is read - * later) - */ -TEST_F(LifetimeEvaluatorExactTest, NonDominantWriteinIfInLoop) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_IF, {}, {in1}, {}}, - { TGSI_OPCODE_MOV, {1}, {in1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_UADD, {2}, {1,in1}, {}}, - { TGSI_OPCODE_IF, {}, {2}, {}}, - { TGSI_OPCODE_BRK}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {2}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {1,5}, {5,10}})); -} - -/* In Nested loop if/else value written only in one path, and read later - * - value must survive the outer loop. - */ -TEST_F(LifetimeEvaluatorExactTest, MoveInIfInNestedLoop) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in1}, {} }, - { TGSI_OPCODE_UADD, {2}, {1,in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_UADD, {3}, {1,2}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {3}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,8}, {1,8}, {6,9}})); -} - -/* In loop if/else value written in both path, and read later - * - value must survive from first write to last read in loop - * for now we only check that the minimum life time is correct. - */ -TEST_F(LifetimeEvaluatorExactTest, WriteInIfAndElseInLoop) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {1}, {}}, - { TGSI_OPCODE_UADD, {2}, {1,in0}, {}}, - { TGSI_OPCODE_ELSE }, - { TGSI_OPCODE_MOV, {2}, {1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_UADD, {3}, {1,2}, {}}, - { TGSI_OPCODE_UADD, {3}, {3,in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {3}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,9}, {3,7}, {7,10}})); -} - -/* Test that read before write in ELSE path is properly tracked: - * In loop if/else value written in both path but read in else path - * before write and also read later - value must survive the whole loop. - */ -TEST_F(LifetimeEvaluatorExactTest, WriteInIfAndElseReadInElseInLoop) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {1}, {}}, - { TGSI_OPCODE_UADD, {2}, {1,in0}, {}}, - { TGSI_OPCODE_ELSE }, - { TGSI_OPCODE_ADD, {2}, {1,2}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_UADD, {3}, {1,2}, {}}, - { TGSI_OPCODE_UADD, {3}, {3,in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {3}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,9}, {1,9}, {7,10}})); -} - - -/* Test that a write in ELSE path only in loop is properly tracked: - * In loop if/else value written in else path and read outside - * - value must survive the whole loop. - */ -TEST_F(LifetimeEvaluatorExactTest, WriteInElseReadInLoop) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {1}, {}}, - { TGSI_OPCODE_UADD, {2}, {1,in0}, {}}, - { TGSI_OPCODE_ELSE }, - { TGSI_OPCODE_ADD, {3}, {1,2}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_UADD, {1}, {3,in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,9}, {1,8}, {1,8}})); -} - -/* Test that tracking a second write in an ELSE path is not attributed - * to the IF path: In loop if/else value written in else path twice and - * read outside - value must survive the whole loop - */ -TEST_F(LifetimeEvaluatorExactTest, WriteInElseTwiceReadInLoop) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {1}, {}}, - { TGSI_OPCODE_UADD, {2}, {1,in0}, {}}, - { TGSI_OPCODE_ELSE }, - { TGSI_OPCODE_ADD, {3}, {1,2}, {}}, - { TGSI_OPCODE_ADD, {3}, {1,3}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_UADD, {1}, {3,in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,10}, {1,9}, {1,9}})); -} - -/* Test that the IF and ELSE scopes from different IF/ELSE pairs are not - * merged: In loop if/else value written in if, and then in different else path - * and read outside - value must survive the whole loop - */ -TEST_F(LifetimeEvaluatorExactTest, WriteInOneIfandInAnotherElseInLoop) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {1}, {}}, - { TGSI_OPCODE_UADD, {2}, {1,in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_IF, {}, {1}, {}}, - { TGSI_OPCODE_ELSE }, - { TGSI_OPCODE_ADD, {2}, {1,1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_UADD, {1}, {2,in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,11}, {1,10}})); -} - -/* Test that with a new loop the resolution of the IF/ELSE write conditionality - * is restarted: In first loop value is written in both if and else, in second - * loop value is written only in if - must survive the second loop. - * However, the tracking is currently not able to restrict the lifetime - * in the first loop, hence the "AtLeast" test. - */ -TEST_F(LifetimeEvaluatorAtLeastTest, UnconditionalInFirstLoopConditionalInSecond) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {1}, {}}, - { TGSI_OPCODE_UADD, {2}, {1,in0}, {}}, - { TGSI_OPCODE_ELSE }, - { TGSI_OPCODE_UADD, {2}, {1,in1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {1}, {}}, - { TGSI_OPCODE_ADD, {2}, {in0,1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_UADD, {1}, {2,in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,14}, {3,13}})); -} - -/* Test that with a new loop the resolution of the IF/ELSE write conditionality - * is restarted, and also takes care of write before read in else scope: - * In first loop value is written in both if and else, in second loop value is - * also written in both, but first read in if - must survive the second loop. - * However, the tracking is currently not able to restrict the lifetime - * in the first loop, hence the "AtLeast" test. - */ -TEST_F(LifetimeEvaluatorAtLeastTest, UnconditionalInFirstLoopConditionalInSecond2) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {1}, {}}, - { TGSI_OPCODE_UADD, {2}, {1,in0}, {}}, - { TGSI_OPCODE_ELSE }, - { TGSI_OPCODE_UADD, {2}, {1,in1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in1}, {}}, - { TGSI_OPCODE_ADD, {2}, {2,1}, {}}, - { TGSI_OPCODE_ELSE }, - { TGSI_OPCODE_MOV, {2}, {1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_UADD, {1}, {2,in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,16}, {3,15}})); -} - -/* In loop if/else read in one path before written in the same loop - * - value must survive the whole loop - */ -TEST_F(LifetimeEvaluatorExactTest, ReadInIfInLoopBeforeWrite) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_UADD, {2}, {1,3}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_UADD, {3}, {1,2}, {}}, - { TGSI_OPCODE_UADD, {3}, {3,in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {3}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,7}, {1,7}, {1,8}})); -} - -/* In loop if/else read in one path before written in the same loop - * read after the loop, value must survivethe whole loop and - * to the read. - */ -TEST_F(LifetimeEvaluatorExactTest, ReadInLoopInIfBeforeWriteAndLifeToTheEnd) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MUL, {1}, {1,in1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_UADD, {1}, {1,in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,6}})); -} - -/* In loop read before written in the same loop read after the loop, - * value must survive the whole loop and to the read. - * This is kind of undefined behaviour though ... - */ -TEST_F(LifetimeEvaluatorExactTest, ReadInLoopBeforeWriteAndLifeToTheEnd) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_MUL, {1}, {1,in1}, {}}, - { TGSI_OPCODE_UADD, {1}, {1,in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,4}})); -} - -/* Test whether nesting IF/ELSE pairs within a loop is resolved: - * Write in all conditional branches if the inner nesting level and - * read after the outer IF/ELSE pair is closed. The lifetime doesn't have - * to be extended to the full loop. - */ -TEST_F(LifetimeEvaluatorExactTest, NestedIfInLoopAlwaysWriteButNotPropagated) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {3,14}})); -} - -/* Test that nested chaining of IF/ELSE scopes is resolved: - * Write in each IF branch, and open another IF/ELSE scope pair in the ELSE - * branch. At the last nesting level, the temporary is also written in the - * ELSE branch, hence the full constrict results in an unconditional write. - */ -TEST_F(LifetimeEvaluatorExactTest, DeeplyNestedIfElseInLoopResolved) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ADD, {2}, {1, in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {2}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {2,18}, {18, 20}})); -} - -/* The complementary case of the above: Open deeply nested IF/ELSE clauses - * and only at the deepest nesting level the temporary is written in the IF - * branch, but for all ELSE scopes the value is also written. Like above, when - * the full construct has been executed, the temporary has been written - * unconditionally. - */ -TEST_F(LifetimeEvaluatorExactTest, DeeplyNestedIfElseInLoopResolved2) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ADD, {2}, {1, in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {2}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {5,18}, {18, 20}})); -} - -/* Test that a write in an IF scope within IF scope where the temporary already - * can be ignored. - */ -TEST_F(LifetimeEvaluatorExactTest, NestedIfElseInLoopResolvedInOuterScope) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ADD, {2}, {1, in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {2}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {2,9}, {9, 11}})); -} - -/* Here the read before write in the nested if is of no consequence to the - * life time because the variable was already written in the enclosing if-branch. - */ -TEST_F(LifetimeEvaluatorExactTest, NestedIfElseInLoopWithReadResolvedInOuterScope) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_ADD, {1}, {in0, 1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ADD, {2}, {1, in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {2}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {2,9}, {9, 11}})); -} - -/* Here the nested if condition is of no consequence to the life time - * because the variable was already written in the enclosing else-branch. - */ -TEST_F(LifetimeEvaluatorExactTest, NestedIfElseInLoopResolvedInOuterScope2) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ADD, {2}, {1, in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {2}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {2,9}, {9, 11}})); -} - -/* Test that tracking of IF/ELSE scopes does not unnessesarily cross loops, - * i.e. if the inner IF/ELSE pair is enclosed by a loop which is enclosed - * by another IF statement: The resolution of unconditionality of the write - * within the loop is not changed by the fact that the loop is enclosed by - * an IF scope. - */ -TEST_F(LifetimeEvaluatorExactTest, NestedIfInLoopAlwaysWriteParentIfOutsideLoop) -{ - const vector code = { - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {2}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {2}, {in1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {out0}, {2}, {}}, - - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {3,12}, {12, 17}})); -} - -/* The value is written in a loop and in a nested IF, but - * not in all code paths, hence the value must survive the loop. - */ -TEST_F(LifetimeEvaluatorExactTest, NestedIfInLoopWriteNotAlways) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,13}})); -} - -/* Test that reading in an ELSE branach after writing is ignored: - * The value is written in a loop in both branches of if-else but also - * read in the else after writing, should have no effect on lifetime. - */ -TEST_F(LifetimeEvaluatorExactTest, IfElseWriteInLoopAlsoReadInElse) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in1}, {}}, - { TGSI_OPCODE_MUL, {1}, {in0, 1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {2,7}})); -} - -/* Test that a write in an inner IF/ELSE pair is propagated to the outer - * ELSE branch: The value is written in a loop in both branches of a nested - * IF/ELSE pair, but only within the outer else, hence in summary the write is - * conditional within the loop. - */ -TEST_F(LifetimeEvaluatorExactTest, WriteInNestedIfElseOuterElseOnly) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_ADD, {1}, {in1, in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,10}})); -} - -/* Test that reads in an inner ELSE after write within the enclosing IF branch - * is of no consequence (i.e. check that the read in the ELSE branch is not - * attributed as read before write when the outer ELSE branch is scanned: - * Nested if-else in loop. The value is written in the outer if and else and - * read in one inner else, should limit lifetime. - */ -TEST_F(LifetimeEvaluatorExactTest, WriteUnconditionallyReadInNestedElse) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {out1}, {1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {2,10}})); -} - - -/* Nested if-else in loop. The value is written in a loop in both branches - * of if-else but also read in the second nested else before writing. - * Is conditional. - */ -TEST_F(LifetimeEvaluatorExactTest, NestedIfelseReadFirstInInnerElseInLoop) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_ADD, {1}, {in1, 1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,15}})); -} - -/* Test that read before write is properly tracked for nested IF branches. - * The value is written in a loop in both branches of IF/ELSE but also read in - * the second nested IF before writing - is conditional. - */ -TEST_F(LifetimeEvaluatorExactTest, NestedIfelseReadFirstInInnerIfInLoop) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_ADD, {1}, {in1, 1}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,15}})); -} - -/* Same as above, but for the secondary ELSE branch: - * The value is written in a loop in both branches of IF/ELSE but also read in - * the second nested ELSE branch before writing - is conditional. - */ -TEST_F(LifetimeEvaluatorExactTest, WriteInOneElseBranchReadFirstInOtherInLoop) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_ADD, {1}, {in1, 1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {2,11}})); -} - -/* Test that the "write is unconditional" resolution is not overwritten within - * a loop: The value is written in a loop in both branches of an IF/ELSE clause, - * hence the second IF doesn't make it conditional. - */ -TEST_F(LifetimeEvaluatorExactTest, WriteInIfElseBranchSecondIfInLoop) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {1}, {in1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {2,9}})); -} - -/* Within an IF clause within a loop test that if a write occured in both - * branches of a nested IF/ELSE clause, followed by the last read within the - * enclosing IF or ELSE clause, the combined read is registered as unconditional, - * i.e.that it doesn't extend its live range beyond that enclosing IF or ELSE - * clause. - */ -TEST_F(LifetimeEvaluatorExactTest, DeeplyNestedinLoop) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_UIF, {}, {in0}, {}}, - { TGSI_OPCODE_FSEQ, {1}, {in1,in2}, {}}, - { TGSI_OPCODE_UIF, {}, {1}, {}}, - { TGSI_OPCODE_MOV, {2}, {in1}, {}}, - { TGSI_OPCODE_ELSE }, - { TGSI_OPCODE_MOV, {2}, {in2}, {}}, - { TGSI_OPCODE_ENDIF }, - { TGSI_OPCODE_MOV, {3}, {2}, {}}, - { TGSI_OPCODE_ENDIF }, - { TGSI_OPCODE_ADD, {out0}, {3, in1}, {}}, - { TGSI_OPCODE_ENDLOOP } - }; - run (code, temp_lt_expect({{-1,-1}, {2,3}, {4, 8}, {0,11}})); -} - -/** Regression test for bug #104803, - * Read and write in if/else path outside loop and later read in conditional - * within a loop. The first write is to be considered the dominant write. - */ -TEST_F(LifetimeEvaluatorExactTest, IfElseWriteInBothOutsideLoopReadInElseInLoop) -{ - const vector code = { - { TGSI_OPCODE_IF, {}, {in0}, {} }, - { TGSI_OPCODE_MOV, {1}, {in0}, {} }, - { TGSI_OPCODE_ELSE, {}, {}, {} }, - { TGSI_OPCODE_MOV, {1}, {in1}, {} }, - { TGSI_OPCODE_ENDIF, {}, {}, {} }, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {} }, - { TGSI_OPCODE_MOV, {2}, {in1}, {} }, - { TGSI_OPCODE_ELSE, {}, {}, {} }, - { TGSI_OPCODE_MOV, {2}, {1}, {} }, - { TGSI_OPCODE_ENDIF, {}, {}, {} }, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {2}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {1,11}, {7, 12}})); -} - -/* A continue in the loop is not relevant */ -TEST_F(LifetimeEvaluatorExactTest, LoopWithWriteAfterContinue) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_CONT}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {4,6}})); -} - -/* Temporary used to in case must live up to the case - * statement where it is used, the switch we only keep - * for the actual SWITCH opcode like it is in tgsi_exec.c, the - * only current use case. - */ -TEST_F(LifetimeEvaluatorExactTest, UseSwitchCase) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_MOV, {2}, {in1}, {}}, - { TGSI_OPCODE_MOV, {3}, {in2}, {}}, - { TGSI_OPCODE_SWITCH, {}, {3}, {}}, - { TGSI_OPCODE_CASE, {}, {2}, {}}, - { TGSI_OPCODE_CASE, {}, {1}, {}}, - { TGSI_OPCODE_BRK}, - { TGSI_OPCODE_DEFAULT}, - { TGSI_OPCODE_ENDSWITCH}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,5}, {1,4}, {2,3}})); -} - -/* With two destinations, if one result is thrown away, the - * register must be kept past the writing instructions. - */ -TEST_F(LifetimeEvaluatorExactTest, WriteTwoOnlyUseOne) -{ - const vector code = { - { TGSI_OPCODE_DFRACEXP , {1,2}, {in0}, {}}, - { TGSI_OPCODE_ADD , {3}, {2,in0}, {}}, - { TGSI_OPCODE_MOV, {out1}, {3}, {}}, - { TGSI_OPCODE_END}, - - }; - run (code, temp_lt_expect({{-1,-1}, {0,1}, {0,1}, {1,2}})); -} - -/* If a break is in the loop, all variables written after the - * break and used outside the loop must be maintained for the - * whole loop - */ -TEST_F(LifetimeEvaluatorExactTest, LoopWithWriteAfterBreak) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_BRK}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,6}})); -} - -/* If a break is in the loop, all variables written after the - * break and used outside the loop must be maintained for the - * whole loop. The first break in the loop is the defining one. - */ -TEST_F(LifetimeEvaluatorExactTest, LoopWithWriteAfterBreak2Breaks) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_BRK}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BRK}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,7}})); -} - -/* Loop with a break at the beginning and read/write in the post - * break loop scope. The value written and read within the loop - * can be limited to [write, read], but the value read outside the - * loop must survive the whole loop. This is the typical code for - * while and for loops, where the breaking condition is tested at - * the beginning. - */ -TEST_F(LifetimeEvaluatorExactTest, LoopWithWriteAndReadAfterBreak) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_BRK}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_MOV, {2}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {2}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {4,5}, {0,7}})); -} - -/* Same as above, just make sure that the life time of the local variable - * in the outer loop (3) is not accidently promoted to the whole loop. - */ -TEST_F(LifetimeEvaluatorExactTest, NestedLoopWithWriteAndReadAfterBreak) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in1}, {}}, - { TGSI_OPCODE_BRK}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_BGNLOOP}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_BRK}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_MOV, {2}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_ADD, {3}, {2,in0}, {}}, - { TGSI_OPCODE_ADD, {4}, {3,in2}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {4}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {8,9}, {0,13}, {11,12}, {0,14}})); -} - -/* If a break is in the loop inside a switch case, make sure it is - * interpreted as breaking that inner loop, i.e. the variable has to - * survive the loop. - */ -TEST_F(LifetimeEvaluatorExactTest, LoopWithWriteAfterBreakInSwitchInLoop) -{ - const vector code = { - { TGSI_OPCODE_SWITCH, {}, {in1}, {}}, - { TGSI_OPCODE_CASE, {}, {in1}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_BRK}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_DEFAULT, {}, {}, {}}, - { TGSI_OPCODE_ENDSWITCH, {}, {}, {}}, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {2,10}})); -} - -/* Value written conditionally in one loop and read in another loop, - * and both of these loops are within yet another loop. Here the value - * has to survive the outer loop. - */ -TEST_F(LifetimeEvaluatorExactTest, LoopsWithDifferntScopesConditionalWrite) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,7}})); -} - -/* Value written and read in one loop and last read in another loop, - * Here the value has to survive both loops. - */ -TEST_F(LifetimeEvaluatorExactTest, LoopsWithDifferntScopesFirstReadBeforeWrite) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_MUL, {1}, {1,in0}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,5}})); -} - - -/* Value is written in one switch code path within a loop - * must survive the full loop. - */ -TEST_F(LifetimeEvaluatorExactTest, LoopWithWriteInSwitch) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_SWITCH, {}, {in0}, {} }, - { TGSI_OPCODE_CASE, {}, {in0}, {} }, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BRK }, - { TGSI_OPCODE_DEFAULT }, - { TGSI_OPCODE_BRK }, - { TGSI_OPCODE_ENDSWITCH }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,9}})); -} - -/* Value written in one case, and read in other,in loop - * - must survive the loop. - */ -TEST_F(LifetimeEvaluatorExactTest, LoopWithReadWriteInSwitchDifferentCase) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_SWITCH, {}, {in0}, {} }, - { TGSI_OPCODE_CASE, {}, {in0}, {} }, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BRK }, - { TGSI_OPCODE_DEFAULT }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_BRK }, - { TGSI_OPCODE_ENDSWITCH }, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,9}})); -} - -/* Value written in one case, and read in other,in loop - * - must survive the loop, even if the write case falls through. - */ -TEST_F(LifetimeEvaluatorExactTest, LoopWithReadWriteInSwitchDifferentCaseFallThrough) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_SWITCH, {}, {in0}, {} }, - { TGSI_OPCODE_CASE, {}, {in0}, {} }, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_DEFAULT }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_BRK }, - { TGSI_OPCODE_ENDSWITCH }, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,8}})); -} - -/* Here we read and write from an to the same temp in the same instruction, - * but the read is conditional (select operation), hence the lifetime must - * start with the first write. - */ -TEST_F(LifetimeEvaluatorExactTest, WriteSelectFromSelf) -{ - const vector code = { - { TGSI_OPCODE_USEQ, {5}, {in0,in1}, {}}, - { TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}}, - { TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}}, - { TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}}, - { TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}}, - { TGSI_OPCODE_FSLT, {2}, {1,in1}, {}}, - { TGSI_OPCODE_UIF, {}, {2}, {}}, - { TGSI_OPCODE_MOV, {3}, {in1}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {4}, {in1}, {}}, - { TGSI_OPCODE_MOV, {4}, {4}, {}}, - { TGSI_OPCODE_MOV, {3}, {4}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {out1}, {3}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {1,5}, {5,6}, {7,13}, {9,11}, {0,4}})); -} - -/* This test checks wheter the ENDSWITCH is handled properly if the - * last switch case/default doesn't stop with a BRK. - */ -TEST_F(LifetimeEvaluatorExactTest, LoopRWInSwitchCaseLastCaseWithoutBreak) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_SWITCH, {}, {in0}, {} }, - { TGSI_OPCODE_CASE, {}, {in0}, {} }, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BRK }, - { TGSI_OPCODE_DEFAULT }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_ENDSWITCH }, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,8}})); -} - -/* Value read/write in same case, stays there */ -TEST_F(LifetimeEvaluatorExactTest, LoopWithReadWriteInSwitchSameCase) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_SWITCH, {}, {in0}, {} }, - { TGSI_OPCODE_CASE, {}, {in0}, {} }, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_BRK }, - { TGSI_OPCODE_DEFAULT }, - { TGSI_OPCODE_BRK }, - { TGSI_OPCODE_ENDSWITCH }, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {3,4}})); -} - -/* Value read/write in all cases, should only live from first - * write to last read, but currently the whole loop is used. - */ -TEST_F(LifetimeEvaluatorAtLeastTest, LoopWithReadWriteInSwitchSameCase) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_SWITCH, {}, {in0}, {}}, - { TGSI_OPCODE_CASE, {}, {in0}, {} }, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BRK }, - { TGSI_OPCODE_DEFAULT }, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BRK }, - { TGSI_OPCODE_ENDSWITCH }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {3,9}})); -} - -/* First read before first write with nested loops */ -TEST_F(LifetimeEvaluatorExactTest, LoopsWithDifferentScopesCondReadBeforeWrite) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,9}})); -} - -/* First read before first write wiredness with nested loops. - * Here the first read of 2 is logically before the first, dominant - * write, therfore, the 2 has to survive both loops. - */ -TEST_F(LifetimeEvaluatorExactTest, FirstWriteAtferReadInNestedLoop) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_MUL, {2}, {2,1}, {}}, - { TGSI_OPCODE_MOV, {3}, {2}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_ADD, {1}, {1,in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {3}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,7}, {1,7}, {4,8}})); -} - - -#define DST(X, W) vector>(1, make_pair(X, W)) -#define SRC(X, S) vector>(1, make_pair(X, S)) -#define SRC2(X, S, Y, T) vector>({make_pair(X, S), make_pair(Y, T)}) - -/* Partial write to components: one component was written unconditionally - * but another conditionally, temporary must survive the whole loop. - * Test series for all components. - */ -TEST_F(LifetimeEvaluatorExactTest, LoopWithConditionalComponentWrite_X) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP}, - { TGSI_OPCODE_MOV, DST(1, WRITEMASK_Y), SRC(in1, "x"), {}, SWZ()}, - { TGSI_OPCODE_IF, {}, SRC(in0, "xxxx"), {}, SWZ()}, - { TGSI_OPCODE_MOV, DST(1, WRITEMASK_X), SRC(in1, "y"), {}, SWZ()}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, DST(2, WRITEMASK_XY), SRC(1, "xy"), {}, SWZ()}, - { TGSI_OPCODE_ENDLOOP}, - { TGSI_OPCODE_MOV, DST(out0, WRITEMASK_XYZW), SRC(2, "xyxy"), {}, SWZ()}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,6}, {5,7}})); -} - -TEST_F(LifetimeEvaluatorExactTest, LoopWithConditionalComponentWrite_Y) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP}, - { TGSI_OPCODE_MOV, DST(1, WRITEMASK_X), SRC(in1, "x"), {}, SWZ()}, - { TGSI_OPCODE_IF, {}, SRC(in0, "xxxx"), {}, SWZ()}, - { TGSI_OPCODE_MOV, DST(1, WRITEMASK_Y), SRC(in1, "y"), {}, SWZ()}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, DST(2, WRITEMASK_XY), SRC(1, "xy"), {}, SWZ()}, - { TGSI_OPCODE_ENDLOOP}, - { TGSI_OPCODE_MOV, DST(out0, WRITEMASK_XYZW), SRC(2, "xyxy"), {}, SWZ()}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,6}, {5,7}})); -} - -TEST_F(LifetimeEvaluatorExactTest, LoopWithConditionalComponentWrite_Z) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP}, - { TGSI_OPCODE_MOV, DST(1, WRITEMASK_X), SRC(in1, "x"), {}, SWZ()}, - { TGSI_OPCODE_IF, {}, SRC(in0, "xxxx"), {}, SWZ()}, - { TGSI_OPCODE_MOV, DST(1, WRITEMASK_Z), SRC(in1, "y"), {}, SWZ()}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, DST(2, WRITEMASK_XY), SRC(1, "xz"), {}, SWZ()}, - { TGSI_OPCODE_ENDLOOP}, - { TGSI_OPCODE_MOV, DST(out0, WRITEMASK_XYZW), SRC(2, "xyxy"), {}, SWZ()}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,6}, {5,7}})); -} - -TEST_F(LifetimeEvaluatorExactTest, LoopWithConditionalComponentWrite_W) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP}, - { TGSI_OPCODE_MOV, DST(1, WRITEMASK_X), SRC(in1, "x"), {}, SWZ()}, - { TGSI_OPCODE_IF, {}, SRC(in0, "xxxx"), {}, SWZ()}, - { TGSI_OPCODE_MOV, DST(1, WRITEMASK_W), SRC(in1, "y"), {}, SWZ()}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, DST(2, WRITEMASK_XY), SRC(1, "xw"), {}, SWZ()}, - { TGSI_OPCODE_ENDLOOP}, - { TGSI_OPCODE_MOV, DST(out0, WRITEMASK_XYZW), SRC(2, "xyxy"), {}, SWZ()}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,6}, {5,7}})); -} - -TEST_F(LifetimeEvaluatorExactTest, LoopWithConditionalComponentWrite_X_Read_Y_Before) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP}, - { TGSI_OPCODE_MOV, DST(1, WRITEMASK_X), SRC(in1, "x"), {}, SWZ()}, - { TGSI_OPCODE_IF, {}, SRC(in0, "xxxx"), {}, SWZ()}, - { TGSI_OPCODE_MOV, DST(2, WRITEMASK_XYZW), SRC(1, "yyyy"), {}, SWZ()}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, DST(1, WRITEMASK_YZW), SRC(2, "yyzw"), {}, SWZ()}, - { TGSI_OPCODE_ENDLOOP}, - { TGSI_OPCODE_ADD, DST(out0, WRITEMASK_XYZW), - SRC2(2, "yyzw", 1, "xyxy"), {}, SWZ()}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,7}, {0,7}})); -} - -/* The variable is conditionally read before first written, so - * it has to surive all the loops. - */ -TEST_F(LifetimeEvaluatorExactTest, FRaWSameInstructionInLoopAndCondition) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {} }, - { TGSI_OPCODE_ADD, {1}, {1,in0}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {1}, {in1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END}, - - }; - run (code, temp_lt_expect({{-1,-1}, {0,7}})); -} - -/* If unconditionally first written and read in the same - * instruction, then the register must be kept for the - * one write, but not more (undefined behaviour) - */ -TEST_F(LifetimeEvaluatorExactTest, FRaWSameInstruction) -{ - const vector code = { - { TGSI_OPCODE_ADD, {1}, {1,in0}, {}}, - { TGSI_OPCODE_END}, - - }; - run (code, temp_lt_expect({{-1,-1}, {0,1}})); -} - -/* If unconditionally written and read in the same - * instruction, various times then the register must be - * kept past the last write, but not longer (undefined behaviour) - */ -TEST_F(LifetimeEvaluatorExactTest, FRaWSameInstructionMoreThenOnce) -{ - const vector code = { - { TGSI_OPCODE_ADD, {1}, {1,in0}, {}}, - { TGSI_OPCODE_ADD, {1}, {1,in0}, {}}, - { TGSI_OPCODE_MOV, {out0}, {in0}, {}}, - { TGSI_OPCODE_END}, - - }; - run (code, temp_lt_expect({{-1,-1}, {0,2}})); -} - -/* Register is only written. This should not happen, - * but to handle the case we want the register to life - * at least one instruction - */ -TEST_F(LifetimeEvaluatorExactTest, WriteOnly) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,1}})); -} - -/* Register is read in IF. - */ -TEST_F(LifetimeEvaluatorExactTest, SimpleReadForIf) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ADD, {out0}, {in0,in1}, {}}, - { TGSI_OPCODE_IF, {}, {1}, {}}, - { TGSI_OPCODE_ENDIF} - }; - run (code, temp_lt_expect({{-1,-1}, {0,2}})); -} - -TEST_F(LifetimeEvaluatorExactTest, WriteTwoReadOne) -{ - const vector code = { - { TGSI_OPCODE_DFRACEXP , {1,2}, {in0}, {}}, - { TGSI_OPCODE_ADD , {3}, {2,in0}, {}}, - { TGSI_OPCODE_MOV, {out1}, {3}, {}}, - { TGSI_OPCODE_END}, - }; - run (code, temp_lt_expect({{-1,-1}, {0,1}, {0,1}, {1,2}})); -} - -TEST_F(LifetimeEvaluatorExactTest, ReadOnly) -{ - const vector code = { - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_END}, - }; - run (code, temp_lt_expect({{-1,-1}, {-1,-1}})); -} - -/* Test handling of missing END marker -*/ -TEST_F(LifetimeEvaluatorExactTest, SomeScopesAndNoEndProgramId) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_IF, {}, {1}, {}}, - { TGSI_OPCODE_MOV, {2}, {1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_IF, {}, {1}, {}}, - { TGSI_OPCODE_MOV, {out0}, {2}, {}}, - { TGSI_OPCODE_ENDIF}, - }; - run (code, temp_lt_expect({{-1,-1}, {0,4}, {2,5}})); -} - -TEST_F(LifetimeEvaluatorExactTest, SerialReadWrite) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_MOV, {2}, {1}, {}}, - { TGSI_OPCODE_MOV, {3}, {2}, {}}, - { TGSI_OPCODE_MOV, {out0}, {3}, {}}, - { TGSI_OPCODE_END}, - }; - run (code, temp_lt_expect({{-1,-1}, {0,1}, {1,2}, {2,3}})); -} - -/* Check that two destination registers are used */ -TEST_F(LifetimeEvaluatorExactTest, TwoDestRegisters) -{ - const vector code = { - { TGSI_OPCODE_DFRACEXP , {1,2}, {in0}, {}}, - { TGSI_OPCODE_ADD, {out0}, {1,2}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,1}, {0,1}})); -} - -/* Check that writing within a loop in a conditional is propagated - * to the outer loop. - */ -TEST_F(LifetimeEvaluatorExactTest, WriteInLoopInConditionalReadOutside) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_BGNLOOP}, - { TGSI_OPCODE_MOV, {1}, {in1}, {}}, - { TGSI_OPCODE_ENDLOOP}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ADD, {2}, {1,in1}, {}}, - { TGSI_OPCODE_ENDLOOP}, - { TGSI_OPCODE_MOV, {out0}, {2}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,7}, {6,8}})); -} - -/* Check that a register written in a loop that is inside a conditional - * is not propagated past that loop if last read is also within the - * conditional -*/ -TEST_F(LifetimeEvaluatorExactTest, WriteInLoopInCondReadInCondOutsideLoop) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_BGNLOOP}, - { TGSI_OPCODE_MUL, {1}, {in2,in1}, {}}, - { TGSI_OPCODE_ENDLOOP}, - { TGSI_OPCODE_ADD, {2}, {1,in1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ENDLOOP}, - { TGSI_OPCODE_MOV, {out0}, {2}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {3,5}, {0,8}})); -} - -/* Check that a register read before written in a loop that is - * inside a conditional is propagated to the outer loop. - */ -TEST_F(LifetimeEvaluatorExactTest, ReadWriteInLoopInCondReadInCondOutsideLoop) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP}, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_BGNLOOP}, - { TGSI_OPCODE_MUL, {1}, {1,in1}, {}}, - { TGSI_OPCODE_ENDLOOP}, - { TGSI_OPCODE_ADD, {2}, {1,in1}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_ENDLOOP}, - { TGSI_OPCODE_MOV, {out0}, {2}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,7}, {0,8}})); -} - -/* With two destinations if one value is thrown away, we must - * ensure that the two output registers don't merge. In this test - * case the last access for 2 and 3 is in line 4, but 4 can only - * be merged with 3 because it is read,2 on the other hand is written - * to, and merging it with 4 would result in a bug. - */ -TEST_F(LifetimeEvaluatorExactTest, WritePastLastRead2) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_MOV, {2}, {in0}, {}}, - { TGSI_OPCODE_ADD, {3}, {1,2}, {}}, - { TGSI_OPCODE_DFRACEXP , {2,4}, {3}, {}}, - { TGSI_OPCODE_MOV, {out1}, {4}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,2}, {1,4}, {2,3}, {3,4}})); -} - -/* Check that three source registers are used */ -TEST_F(LifetimeEvaluatorExactTest, ThreeSourceRegisters) -{ - const vector code = { - { TGSI_OPCODE_DFRACEXP , {1,2}, {in0}, {}}, - { TGSI_OPCODE_ADD , {3}, {in0,in1}, {}}, - { TGSI_OPCODE_MAD, {out0}, {1,2,3}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,2}, {0,2}, {1,2}})); -} - -/* Check minimal lifetime for registers only written to */ -TEST_F(LifetimeEvaluatorExactTest, OverwriteWrittenOnlyTemps) -{ - const vector code = { - { TGSI_OPCODE_MOV , {1}, {in0}, {}}, - { TGSI_OPCODE_MOV , {2}, {in1}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,1}, {1,2}})); -} - -/* Same register is only written twice. This should not happen, - * but to handle the case we want the register to life - * at least past the last write instruction - */ -TEST_F(LifetimeEvaluatorExactTest, WriteOnlyTwiceSame) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,2}})); -} - -/* Dead code elimination should catch and remove the case - * when a variable is written after its last read, but - * we want the code to be aware of this case. - * The life time of this uselessly written variable is set - * to the instruction after the write, because - * otherwise it could be re-used too early. - */ -TEST_F(LifetimeEvaluatorExactTest, WritePastLastRead) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_MOV, {2}, {1}, {}}, - { TGSI_OPCODE_MOV, {1}, {2}, {}}, - { TGSI_OPCODE_END}, - - }; - run (code, temp_lt_expect({{-1,-1}, {0,3}, {1,2}})); -} - -/* If a break is in the loop, all variables written after the - * break and used outside the loop the variable must survive the - * outer loop - */ -TEST_F(LifetimeEvaluatorExactTest, NestedLoopWithWriteAfterBreak) -{ - const vector code = { - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_BGNLOOP }, - { TGSI_OPCODE_IF, {}, {in0}, {}}, - { TGSI_OPCODE_BRK}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {1}, {in0}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_MOV, {out0}, {1}, {}}, - { TGSI_OPCODE_ENDLOOP }, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,8}})); -} - - -#define MT(X,Y,Z) std::make_tuple(X,Y,Z) -/* Check lifetime estimation with a relative addressing in src. - * Note, since the lifetime estimation always extends the lifetime - * at to at least one instruction after the last write, for the - * test the last read must be at least two instructions after the - * last write to obtain a proper test. - */ - -TEST_F(LifetimeEvaluatorExactTest, ReadIndirectReladdr1) -{ - const vector code = { - { TGSI_OPCODE_MOV, {1}, {in1}, {}}, - { TGSI_OPCODE_MOV, {2}, {in0}, {}}, - { TGSI_OPCODE_MOV, {MT(3,0,0)}, {MT(2,1,0)}, {}, RA()}, - { TGSI_OPCODE_MOV, {out0}, {3}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,2}, {1,2}, {2,3}})); -} - -/* Check lifetime estimation with a relative addressing in src */ -TEST_F(LifetimeEvaluatorExactTest, ReadIndirectReladdr2) -{ - const vector code = { - { TGSI_OPCODE_MOV , {1}, {in1}, {}}, - { TGSI_OPCODE_MOV , {2}, {in0}, {}}, - { TGSI_OPCODE_MOV , {MT(3,0,0)}, {MT(4,0,1)}, {}, RA()}, - { TGSI_OPCODE_MOV , {out0}, {3}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,2}, {1,2},{2,3}})); -} - -/* Check lifetime estimation with a relative addressing in src */ -TEST_F(LifetimeEvaluatorExactTest, ReadIndirectTexOffsReladdr1) -{ - const vector code = { - { TGSI_OPCODE_MOV , {1}, {in1}, {}}, - { TGSI_OPCODE_MOV , {2}, {in0}, {}}, - { TGSI_OPCODE_MOV , {MT(3,0,0)}, {MT(in2,0,0)}, {MT(5,1,0)}, RA()}, - { TGSI_OPCODE_MOV , {out0}, {3}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,2}, {1,2}, {2,3}})); -} - -/* Check lifetime estimation with a relative addressing in src */ -TEST_F(LifetimeEvaluatorExactTest, ReadIndirectTexOffsReladdr2) -{ - const vector code = { - { TGSI_OPCODE_MOV , {1}, {in1}, {}}, - { TGSI_OPCODE_MOV , {2}, {in0}, {}}, - { TGSI_OPCODE_MOV , {MT(3,0,0)}, {MT(in2,0,0)}, {MT(2,0,1)}, RA()}, - { TGSI_OPCODE_MOV , {out0}, {3}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,2}, {1,2}, {2,3}})); -} - -/* Check lifetime estimation with a relative addressing in dst */ -TEST_F(LifetimeEvaluatorExactTest, WriteIndirectReladdr1) -{ - const vector code = { - { TGSI_OPCODE_MOV , {1}, {in0}, {}}, - { TGSI_OPCODE_MOV , {1}, {in1}, {}}, - { TGSI_OPCODE_MOV , {MT(5,1,0)}, {MT(in1,0,0)}, {}, RA()}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,2}})); -} - -/* Check lifetime estimation with a relative addressing in dst */ -TEST_F(LifetimeEvaluatorExactTest, WriteIndirectReladdr2) -{ - const vector code = { - { TGSI_OPCODE_MOV , {1}, {in0}, {}}, - { TGSI_OPCODE_MOV , {2}, {in1}, {}}, - { TGSI_OPCODE_MOV , {MT(5,0,1)}, {MT(in1,0,0)}, {}, RA()}, - { TGSI_OPCODE_MOV , {out0}, {in0}, {}}, - { TGSI_OPCODE_MOV , {out1}, {2}, {}}, - { TGSI_OPCODE_END} - }; - run (code, temp_lt_expect({{-1,-1}, {0,2}, {1,4}})); -} - -/* Test remapping table of registers. The tests don't assume - * that the sorting algorithm used to sort the lifetimes - * based on their 'begin' is stable. - */ -TEST_F(RegisterRemappingTest, RegisterRemapping1) -{ - vector lt({{-1,-1}, - {0,1}, - {0,2}, - {1,2}, - {2,10}, - {3,5}, - {5,10} - }); - - vector expect({0,1,2,1,1,2,2}); - run(lt, expect); -} - -TEST_F(RegisterRemappingTest, RegisterRemapping2) -{ - vector lt({{-1,-1}, - {0,1}, - {0,2}, - {3,4}, - {4,5}, - }); - vector expect({0,1,2,1,1}); - run(lt, expect); -} - -TEST_F(RegisterRemappingTest, RegisterRemappingMergeAllToOne) -{ - vector lt({{-1,-1}, - {0,1}, - {1,2}, - {2,3}, - {3,4}, - }); - vector expect({0,1,1,1,1}); - run(lt, expect); -} - -TEST_F(RegisterRemappingTest, RegisterRemappingIgnoreUnused) -{ - vector lt({{-1,-1}, - {0,1}, - {1,2}, - {2,3}, - {-1,-1}, - {3,4}, - }); - vector expect({0,1,1,1,4,1}); - run(lt, expect); -} - -TEST_F(RegisterRemappingTest, RegisterRemappingMergeZeroLifetimeRegisters) -{ - vector lt({{-1,-1}, - {0,1}, - {1,2}, - {2,3}, - {3,3}, - {3,4}, - }); - vector expect({0,1,1,1,1,1}); - run(lt, expect); -} - -TEST_F(RegisterLifetimeAndRemappingTest, LifetimeAndRemapping) -{ - const vector code = { - { TGSI_OPCODE_USEQ, {5}, {in0,in1}, {}}, - { TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}}, - { TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}}, - { TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}}, - { TGSI_OPCODE_UCMP, {1}, {5,in1,1}, {}}, - { TGSI_OPCODE_FSLT, {2}, {1,in1}, {}}, - { TGSI_OPCODE_UIF, {}, {2}, {}}, - { TGSI_OPCODE_MOV, {3}, {in1}, {}}, - { TGSI_OPCODE_ELSE}, - { TGSI_OPCODE_MOV, {4}, {in1}, {}}, - { TGSI_OPCODE_MOV, {4}, {4}, {}}, - { TGSI_OPCODE_MOV, {3}, {4}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {out1}, {3}, {}}, - { TGSI_OPCODE_END} - }; - run (code, vector({0,1,5,5,1,5})); -} - -TEST_F(RegisterLifetimeAndRemappingTest, LifetimeAndRemappingWithUnusedReadOnlyIgnored) -{ - const vector code = { - { TGSI_OPCODE_USEQ, {1}, {in0,in1}, {}}, - { TGSI_OPCODE_UCMP, {2}, {1,in1,2}, {}}, - { TGSI_OPCODE_UCMP, {4}, {2,in1,1}, {}}, - { TGSI_OPCODE_ADD, {5}, {2,4}, {}}, - { TGSI_OPCODE_UIF, {}, {7}, {}}, - { TGSI_OPCODE_ADD, {8}, {5,4}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {out1}, {8}, {}}, - { TGSI_OPCODE_END} - }; - /* lt: 1: 0-2,2: 1-3 3: u 4: 2-5 5: 3-5 6: u 7: 0-(-1),8: 5-7 */ - run (code, vector({0,1,2,3,1,2,6,7,1})); -} - -TEST_F(RegisterLifetimeAndRemappingTest, LifetimeAndRemappingWithUnusedReadOnlyRemappedTo) -{ - const vector code = { - { TGSI_OPCODE_USEQ, {1}, {in0,in1}, {}}, - { TGSI_OPCODE_UIF, {}, {7}, {}}, - { TGSI_OPCODE_UCMP, {2}, {1,in1,2}, {}}, - { TGSI_OPCODE_UCMP, {4}, {2,in1,1}, {}}, - { TGSI_OPCODE_ADD, {5}, {2,4}, {}}, - { TGSI_OPCODE_ADD, {8}, {5,4}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {out1}, {8}, {}}, - { TGSI_OPCODE_END} - }; - /* lt: 1: 0-3,2: 2-4 3: u 4: 3-5 5: 4-5 6: u 7: 1-1,8: 5-7 */ - run (code, vector({0,1,2,3,1,2,6,7,1})); -} - -TEST_F(RegisterLifetimeAndRemappingTest, LifetimeAndRemappingWithUnusedReadOnlyRemapped) -{ - const vector code = { - { TGSI_OPCODE_USEQ, {0}, {in0,in1}, {}}, - { TGSI_OPCODE_UCMP, {2}, {0,in1,2}, {}}, - { TGSI_OPCODE_UCMP, {4}, {2,in1,0}, {}}, - { TGSI_OPCODE_UIF, {}, {7}, {}}, - { TGSI_OPCODE_ADD, {5}, {4,4}, {}}, - { TGSI_OPCODE_ADD, {8}, {5,4}, {}}, - { TGSI_OPCODE_ENDIF}, - { TGSI_OPCODE_MOV, {out1}, {8}, {}}, - { TGSI_OPCODE_END} - }; - /* lt: 0: 0-2 1: u 2: 1-2 3: u 4: 2-5 5: 4-5 6: u 7:ro 8: 5-7 */ - run (code, vector({0,1,2,3,0,2,6,7,0})); -}