diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 28562a79701..1b05b6bd955 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -112,6 +112,7 @@ LIBGLSL_FILES = \ glsl/lower_packed_varyings.cpp \ glsl/lower_named_interface_blocks.cpp \ glsl/lower_packing_builtins.cpp \ + glsl/lower_precision.cpp \ glsl/lower_subroutine.cpp \ glsl/lower_tess_level.cpp \ glsl/lower_texture_projection.cpp \ diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index 293437acfcd..b13e1fe826b 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -2232,7 +2232,12 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader, shader->Version = state->language_version; shader->IsES = state->es_shader; + struct gl_shader_compiler_options *options = + &ctx->Const.ShaderCompilerOptions[shader->Stage]; + if (!state->error && !shader->ir->is_empty()) { + if (options->LowerPrecision && shader->Stage == MESA_SHADER_FRAGMENT) + lower_precision(shader->ir); assign_subroutine_indexes(state); lower_subroutine(shader->ir, state); opt_shader_and_create_symbol_table(ctx, state->symbols, shader); diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index d8fbc804ff0..e6baf2add06 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -2559,6 +2559,9 @@ nir_visitor::visit(ir_texture *ir) case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; + case GLSL_TYPE_FLOAT16: + instr->dest_type = nir_type_float16; + break; case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index 20766db3354..94412884765 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -188,4 +188,6 @@ ir_variable *compare_index_block(ir_builder::ir_factory &body, bool lower_64bit_integer_instructions(exec_list *instructions, unsigned what_to_lower); +bool lower_precision(exec_list *instructions); + #endif /* GLSL_IR_OPTIMIZATION_H */ diff --git a/src/compiler/glsl/lower_precision.cpp b/src/compiler/glsl/lower_precision.cpp new file mode 100644 index 00000000000..2a944f73c96 --- /dev/null +++ b/src/compiler/glsl/lower_precision.cpp @@ -0,0 +1,618 @@ +/* + * Copyright © 2019 Google, Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_precision.cpp + */ + +#include "main/macros.h" +#include "compiler/glsl_types.h" +#include "ir.h" +#include "ir_builder.h" +#include "ir_optimization.h" +#include "ir_rvalue_visitor.h" +#include "util/half_float.h" +#include "util/set.h" +#include + +namespace { + +class find_precision_visitor : public ir_rvalue_enter_visitor { +public: + find_precision_visitor(); + ~find_precision_visitor(); + + virtual void handle_rvalue(ir_rvalue **rvalue); + + bool progress; + + /* Set of rvalues that can be lowered. This will be filled in by + * find_lowerable_rvalues_visitor. Only the root node of a lowerable section + * will be added to this set. + */ + struct set *lowerable_rvalues; +}; + +class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor { +public: + enum can_lower_state { + UNKNOWN, + CANT_LOWER, + SHOULD_LOWER, + }; + + enum parent_relation { + /* The parent performs a further operation involving the result from the + * child and can be lowered along with it. + */ + COMBINED_OPERATION, + /* The parent instruction’s operation is independent of the child type so + * the child should be lowered separately. + */ + INDEPENDENT_OPERATION, + }; + + struct stack_entry { + ir_instruction *instr; + enum can_lower_state state; + /* List of child rvalues that can be lowered. When this stack entry is + * popped, if this node itself can’t be lowered than all of the children + * are root nodes to lower so we will add them to lowerable_rvalues. + * Otherwise if this node can also be lowered then we won’t add the + * children because we only want to add the topmost lowerable nodes to + * lowerable_rvalues and the children will be lowered as part of lowering + * this node. + */ + std::vector lowerable_children; + }; + + find_lowerable_rvalues_visitor(struct set *result); + + static void stack_enter(class ir_instruction *ir, void *data); + static void stack_leave(class ir_instruction *ir, void *data); + + virtual ir_visitor_status visit(ir_constant *ir); + virtual ir_visitor_status visit(ir_dereference_variable *ir); + + virtual ir_visitor_status visit_enter(ir_dereference_record *ir); + virtual ir_visitor_status visit_enter(ir_dereference_array *ir); + virtual ir_visitor_status visit_enter(ir_texture *ir); + virtual ir_visitor_status visit_enter(ir_expression *ir); + + virtual ir_visitor_status visit_leave(ir_assignment *ir); + virtual ir_visitor_status visit_leave(ir_call *ir); + + static can_lower_state handle_precision(const glsl_type *type, + int precision); + + static parent_relation get_parent_relation(ir_instruction *parent, + ir_instruction *child); + + std::vector stack; + struct set *lowerable_rvalues; + + void pop_stack_entry(); + void add_lowerable_children(const stack_entry &entry); +}; + +class lower_precision_visitor : public ir_rvalue_visitor { +public: + virtual void handle_rvalue(ir_rvalue **rvalue); + virtual ir_visitor_status visit_enter(ir_dereference_array *); + virtual ir_visitor_status visit_enter(ir_dereference_record *); + virtual ir_visitor_status visit_enter(ir_call *ir); + virtual ir_visitor_status visit_enter(ir_texture *ir); + virtual ir_visitor_status visit_leave(ir_expression *); +}; + +bool +can_lower_type(const glsl_type *type) +{ + /* Don’t lower any expressions involving non-float types except bool and + * texture samplers. This will rule out operations that change the type such + * as conversion to ints. Instead it will end up lowering the arguments + * instead and adding a final conversion to float32. We want to handle + * boolean types so that it will do comparisons as 16-bit. + */ + + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_SAMPLER: + return true; + + default: + return false; + } +} + +find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res) +{ + lowerable_rvalues = res; + callback_enter = stack_enter; + callback_leave = stack_leave; + data_enter = this; + data_leave = this; +} + +void +find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir, + void *data) +{ + find_lowerable_rvalues_visitor *state = + (find_lowerable_rvalues_visitor *) data; + + /* Add a new stack entry for this instruction */ + stack_entry entry; + + entry.instr = ir; + entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN; + + state->stack.push_back(entry); +} + +void +find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry) +{ + /* We can’t lower this node so if there were any pending children then they + * are all root lowerable nodes and we should add them to the set. + */ + for (auto &it : entry.lowerable_children) + _mesa_set_add(lowerable_rvalues, it); +} + +void +find_lowerable_rvalues_visitor::pop_stack_entry() +{ + const stack_entry &entry = stack.end()[-1]; + + if (stack.size() >= 2) { + /* Combine this state into the parent state, unless the parent operation + * doesn’t have any relation to the child operations + */ + stack_entry &parent = stack.end()[-2]; + parent_relation rel = get_parent_relation(parent.instr, entry.instr); + + if (rel == COMBINED_OPERATION) { + switch (entry.state) { + case CANT_LOWER: + parent.state = CANT_LOWER; + break; + case SHOULD_LOWER: + if (parent.state == UNKNOWN) + parent.state = SHOULD_LOWER; + break; + case UNKNOWN: + break; + } + } + } + + if (entry.state == SHOULD_LOWER) { + ir_rvalue *rv = entry.instr->as_rvalue(); + + if (rv == NULL) { + add_lowerable_children(entry); + } else if (stack.size() >= 2) { + stack_entry &parent = stack.end()[-2]; + + switch (get_parent_relation(parent.instr, rv)) { + case COMBINED_OPERATION: + /* We only want to add the toplevel lowerable instructions to the + * lowerable set. Therefore if there is a parent then instead of + * adding this instruction to the set we will queue depending on + * the result of the parent instruction. + */ + parent.lowerable_children.push_back(entry.instr); + break; + case INDEPENDENT_OPERATION: + _mesa_set_add(lowerable_rvalues, rv); + break; + } + } else { + /* This is a toplevel node so add it directly to the lowerable + * set. + */ + _mesa_set_add(lowerable_rvalues, rv); + } + } else if (entry.state == CANT_LOWER) { + add_lowerable_children(entry); + } + + stack.pop_back(); +} + +void +find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir, + void *data) +{ + find_lowerable_rvalues_visitor *state = + (find_lowerable_rvalues_visitor *) data; + + state->pop_stack_entry(); +} + +enum find_lowerable_rvalues_visitor::can_lower_state +find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type, + int precision) +{ + if (!can_lower_type(type)) + return CANT_LOWER; + + switch (precision) { + case GLSL_PRECISION_NONE: + return UNKNOWN; + case GLSL_PRECISION_HIGH: + return CANT_LOWER; + case GLSL_PRECISION_MEDIUM: + case GLSL_PRECISION_LOW: + return SHOULD_LOWER; + } + + return CANT_LOWER; +} + +enum find_lowerable_rvalues_visitor::parent_relation +find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent, + ir_instruction *child) +{ + /* If the parent is a dereference instruction then the only child could be + * for example an array dereference and that should be lowered independently + * of the parent. + */ + if (parent->as_dereference()) + return INDEPENDENT_OPERATION; + + /* The precision of texture sampling depend on the precision of the sampler. + * The rest of the arguments don’t matter so we can treat it as an + * independent operation. + */ + if (parent->as_texture()) + return INDEPENDENT_OPERATION; + + return COMBINED_OPERATION; +} + +ir_visitor_status +find_lowerable_rvalues_visitor::visit(ir_constant *ir) +{ + stack_enter(ir, this); + + if (!can_lower_type(ir->type)) + stack.end()[-1].state = CANT_LOWER; + + stack_leave(ir, this); + + return visit_continue; +} + +ir_visitor_status +find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir) +{ + stack_enter(ir, this); + + if (stack.end()[-1].state == UNKNOWN) + stack.end()[-1].state = handle_precision(ir->type, ir->precision()); + + stack_leave(ir, this); + + return visit_continue; +} + +ir_visitor_status +find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir) +{ + ir_hierarchical_visitor::visit_enter(ir); + + if (stack.end()[-1].state == UNKNOWN) + stack.end()[-1].state = handle_precision(ir->type, ir->precision()); + + return visit_continue; +} + +ir_visitor_status +find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir) +{ + ir_hierarchical_visitor::visit_enter(ir); + + if (stack.end()[-1].state == UNKNOWN) + stack.end()[-1].state = handle_precision(ir->type, ir->precision()); + + return visit_continue; +} + +ir_visitor_status +find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir) +{ + ir_hierarchical_visitor::visit_enter(ir); + + if (stack.end()[-1].state == UNKNOWN) { + /* The precision of the sample value depends on the precision of the + * sampler. + */ + stack.end()[-1].state = handle_precision(ir->type, + ir->sampler->precision()); + } + + return visit_continue; +} + +ir_visitor_status +find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir) +{ + ir_hierarchical_visitor::visit_enter(ir); + + if (!can_lower_type(ir->type)) + stack.end()[-1].state = CANT_LOWER; + + /* Don't lower precision for derivative calculations */ + if (ir->operation == ir_unop_dFdx || + ir->operation == ir_unop_dFdx_coarse || + ir->operation == ir_unop_dFdx_fine || + ir->operation == ir_unop_dFdy || + ir->operation == ir_unop_dFdy_coarse || + ir->operation == ir_unop_dFdy_fine) { + stack.end()[-1].state = CANT_LOWER; + } + + return visit_continue; +} + +ir_visitor_status +find_lowerable_rvalues_visitor::visit_leave(ir_call *ir) +{ + ir_hierarchical_visitor::visit_leave(ir); + + /* Special case for handling temporary variables generated by the compiler + * for function calls. If we assign to one of these using a function call + * that has a lowerable return type then we can assume the temporary + * variable should have a medium precision too. + */ + + /* Do nothing if the return type is void. */ + if (!ir->return_deref) + return visit_continue; + + ir_variable *var = ir->return_deref->variable_referenced(); + + assert(var->data.mode == ir_var_temporary); + + can_lower_state lower_state = + handle_precision(var->type, ir->callee->return_precision); + + if (lower_state == SHOULD_LOWER) { + /* There probably shouldn’t be any situations where multiple ir_call + * instructions write to the same temporary? + */ + assert(var->data.precision == GLSL_PRECISION_NONE); + var->data.precision = GLSL_PRECISION_MEDIUM; + } else { + var->data.precision = GLSL_PRECISION_HIGH; + } + + return visit_continue; +} + +ir_visitor_status +find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir) +{ + ir_hierarchical_visitor::visit_leave(ir); + + /* Special case for handling temporary variables generated by the compiler. + * If we assign to one of these using a lowered precision then we can assume + * the temporary variable should have a medium precision too. + */ + ir_variable *var = ir->lhs->variable_referenced(); + + if (var->data.mode == ir_var_temporary) { + if (_mesa_set_search(lowerable_rvalues, ir->rhs)) { + /* Only override the precision if this is the first assignment. For + * temporaries such as the ones generated for the ?: operator there + * can be multiple assignments with different precisions. This way we + * get the highest precision of all of the assignments. + */ + if (var->data.precision == GLSL_PRECISION_NONE) + var->data.precision = GLSL_PRECISION_MEDIUM; + } else if (!ir->rhs->as_constant()) { + var->data.precision = GLSL_PRECISION_HIGH; + } + } + + return visit_continue; +} + +void +find_lowerable_rvalues(exec_list *instructions, + struct set *result) +{ + find_lowerable_rvalues_visitor v(result); + + visit_list_elements(&v, instructions); + + assert(v.stack.empty()); +} + +static ir_rvalue * +convert_precision(int op, ir_rvalue *ir) +{ + unsigned base_type = (op == ir_unop_f2fmp ? + GLSL_TYPE_FLOAT16 : GLSL_TYPE_FLOAT); + const glsl_type *desired_type; + desired_type = glsl_type::get_instance(base_type, + ir->type->vector_elements, + ir->type->matrix_columns); + + void *mem_ctx = ralloc_parent(ir); + return new(mem_ctx) ir_expression(op, desired_type, ir, NULL); +} + +void +lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + ir_rvalue *ir = *rvalue; + + if (ir == NULL) + return; + + if (ir->as_dereference()) { + if (!ir->type->is_boolean()) + *rvalue = convert_precision(ir_unop_f2fmp, ir); + } else if (ir->type->is_float()) { + ir->type = glsl_type::get_instance(GLSL_TYPE_FLOAT16, + ir->type->vector_elements, + ir->type->matrix_columns, + ir->type->explicit_stride, + ir->type->interface_row_major); + + ir_constant *const_ir = ir->as_constant(); + + if (const_ir) { + ir_constant_data value; + + for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++) + value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]); + + const_ir->value = value; + } + } +} + +ir_visitor_status +lower_precision_visitor::visit_enter(ir_dereference_record *ir) +{ + /* We don’t want to lower the variable */ + return visit_continue_with_parent; +} + +ir_visitor_status +lower_precision_visitor::visit_enter(ir_dereference_array *ir) +{ + /* We don’t want to convert the array index or the variable. If the array + * index itself is lowerable that will be handled separately. + */ + return visit_continue_with_parent; +} + +ir_visitor_status +lower_precision_visitor::visit_enter(ir_call *ir) +{ + /* We don’t want to convert the arguments. These will be handled separately. + */ + return visit_continue_with_parent; +} + +ir_visitor_status +lower_precision_visitor::visit_enter(ir_texture *ir) +{ + /* We don’t want to convert the arguments. These will be handled separately. + */ + return visit_continue_with_parent; +} + +ir_visitor_status +lower_precision_visitor::visit_leave(ir_expression *ir) +{ + ir_rvalue_visitor::visit_leave(ir); + + /* If the expression is a conversion operation to or from bool then fix the + * operation. + */ + switch (ir->operation) { + case ir_unop_b2f: + ir->operation = ir_unop_b2f16; + break; + case ir_unop_f2b: + ir->operation = ir_unop_f162b; + break; + default: + break; + } + + return visit_continue; +} + +void +find_precision_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + /* Checking the precision of rvalue can be lowered first throughout + * find_lowerable_rvalues_visitor. + * Once it found the precision of rvalue can be lowered, then we can + * add conversion f2fmp through lower_precision_visitor. + */ + if (*rvalue == NULL) + return; + + struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue); + + if (!entry) + return; + + _mesa_set_remove(lowerable_rvalues, entry); + + /* If the entire expression is just a variable dereference then trying to + * lower it will just directly add pointless to and from conversions without + * any actual operation in-between. Although these will eventually get + * optimised out, avoiding generating them here also avoids breaking inout + * parameters to functions. + */ + if ((*rvalue)->as_dereference()) + return; + + lower_precision_visitor v; + + (*rvalue)->accept(&v); + v.handle_rvalue(rvalue); + + /* We don’t need to add the final conversion if the final type has been + * converted to bool + */ + if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL) + *rvalue = convert_precision(ir_unop_f162f, *rvalue); + + progress = true; +} + +find_precision_visitor::find_precision_visitor() + : progress(false), + lowerable_rvalues(_mesa_pointer_set_create(NULL)) +{ +} + +find_precision_visitor::~find_precision_visitor() +{ + _mesa_set_destroy(lowerable_rvalues, NULL); +} + +} + +bool +lower_precision(exec_list *instructions) +{ + find_precision_visitor v; + + find_lowerable_rvalues(instructions, v.lowerable_rvalues); + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/meson.build b/src/compiler/glsl/meson.build index 3f03d27511d..8985da79b3b 100644 --- a/src/compiler/glsl/meson.build +++ b/src/compiler/glsl/meson.build @@ -162,6 +162,7 @@ files_libglsl = files( 'lower_packed_varyings.cpp', 'lower_named_interface_blocks.cpp', 'lower_packing_builtins.cpp', + 'lower_precision.cpp', 'lower_subroutine.cpp', 'lower_tess_level.cpp', 'lower_texture_projection.cpp', diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 8977a50eaeb..80253cf8296 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3179,6 +3179,12 @@ struct gl_shader_compiler_options * be lowered for transform feedback **/ + /** + * If we can lower the precision of variables based on precision + * qualifiers + */ + GLboolean LowerPrecision; + /** * \name Forms of indirect addressing the driver cannot do. */ diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 85e249a377e..0c03c2fc221 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -338,6 +338,12 @@ void st_init_limits(struct pipe_screen *screen, if (screen->get_param(screen, PIPE_CAP_PSIZ_CLAMPED)) options->LowerBuiltinVariablesXfb |= VARYING_BIT_PSIZ; } + + /* Initialize lower precision shader compiler option based on + * the value of PIPE_SHADER_CAP_FP16. + */ + options->LowerPrecision = + screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_FP16); } c->MaxUserAssignableUniformLocations =