mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
glsl: Rebalance expression trees that are reduction operations.
The intention of this pass was to give us better instruction scheduling opportunities, but it unexpectedly reduced some instruction counts as well: total instructions in shared programs: 1666639 -> 1666073 (-0.03%) instructions in affected programs: 54612 -> 54046 (-1.04%) (and trades 4 SIMD16 programs in SS3)
This commit is contained in:
parent
d300f3f51a
commit
1d9f74eda7
4 changed files with 303 additions and 0 deletions
|
|
@ -96,6 +96,7 @@ LIBGLSL_FILES = \
|
|||
$(GLSL_SRCDIR)/opt_function_inlining.cpp \
|
||||
$(GLSL_SRCDIR)/opt_if_simplification.cpp \
|
||||
$(GLSL_SRCDIR)/opt_noop_swizzle.cpp \
|
||||
$(GLSL_SRCDIR)/opt_rebalance_tree.cpp \
|
||||
$(GLSL_SRCDIR)/opt_redundant_jumps.cpp \
|
||||
$(GLSL_SRCDIR)/opt_structure_splitting.cpp \
|
||||
$(GLSL_SRCDIR)/opt_swizzle_swizzle.cpp \
|
||||
|
|
|
|||
|
|
@ -1569,6 +1569,7 @@ do_common_optimization(exec_list *ir, bool linked,
|
|||
progress = do_constant_variable_unlinked(ir) || progress;
|
||||
progress = do_constant_folding(ir) || progress;
|
||||
progress = do_cse(ir) || progress;
|
||||
progress = do_rebalance_tree(ir) || progress;
|
||||
progress = do_algebraic(ir, native_integers) || progress;
|
||||
progress = do_lower_jumps(ir) || progress;
|
||||
progress = do_vec_index_to_swizzle(ir) || progress;
|
||||
|
|
|
|||
|
|
@ -71,6 +71,7 @@ bool do_common_optimization(exec_list *ir, bool linked,
|
|||
const struct gl_shader_compiler_options *options,
|
||||
bool native_integers);
|
||||
|
||||
bool do_rebalance_tree(exec_list *instructions);
|
||||
bool do_algebraic(exec_list *instructions, bool native_integers);
|
||||
bool do_constant_folding(exec_list *instructions);
|
||||
bool do_constant_variable(exec_list *instructions);
|
||||
|
|
|
|||
300
src/glsl/opt_rebalance_tree.cpp
Normal file
300
src/glsl/opt_rebalance_tree.cpp
Normal file
|
|
@ -0,0 +1,300 @@
|
|||
/*
|
||||
* Copyright © 2014 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file opt_rebalance_tree.cpp
|
||||
*
|
||||
* Rebalances a reduction expression tree.
|
||||
*
|
||||
* For reduction operations (e.g., x + y + z + w) we generate an expression
|
||||
* tree like
|
||||
*
|
||||
* +
|
||||
* / \
|
||||
* + w
|
||||
* / \
|
||||
* + z
|
||||
* / \
|
||||
* x y
|
||||
*
|
||||
* which we can rebalance into
|
||||
*
|
||||
* +
|
||||
* / \
|
||||
* / \
|
||||
* + +
|
||||
* / \ / \
|
||||
* x y z w
|
||||
*
|
||||
* to get a better instruction scheduling.
|
||||
*
|
||||
* See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout
|
||||
* and Bette L. Warren.
|
||||
*
|
||||
* Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable
|
||||
* explanation of the of the tree_to_vine() (rightward rotation) and
|
||||
* vine_to_tree() (leftward rotation) algorithms.
|
||||
*/
|
||||
|
||||
#include "ir.h"
|
||||
#include "ir_visitor.h"
|
||||
#include "ir_rvalue_visitor.h"
|
||||
#include "ir_optimization.h"
|
||||
|
||||
/* The DSW algorithm generates a degenerate tree (really, a linked list) in
|
||||
* tree_to_vine(). We'd rather not leave a binary expression with only one
|
||||
* operand, so trivial modifications (the ternary operators below) are needed
|
||||
* to ensure that we only rotate around the ir_expression nodes of the tree.
|
||||
*/
|
||||
static unsigned
|
||||
tree_to_vine(ir_expression *root)
|
||||
{
|
||||
unsigned size = 0;
|
||||
ir_rvalue *vine_tail = root;
|
||||
ir_rvalue *remainder = root->operands[1];
|
||||
|
||||
while (remainder != NULL) {
|
||||
ir_expression *remainder_temp = remainder->as_expression();
|
||||
ir_expression *remainder_left = remainder_temp ?
|
||||
remainder_temp->operands[0]->as_expression() : NULL;
|
||||
|
||||
if (remainder_left == NULL) {
|
||||
/* move vine_tail down one */
|
||||
vine_tail = remainder;
|
||||
remainder = remainder->as_expression() ?
|
||||
((ir_expression *)remainder)->operands[1] : NULL;
|
||||
size++;
|
||||
} else {
|
||||
/* rotate */
|
||||
ir_expression *tempptr = remainder_left;
|
||||
((ir_expression *)remainder)->operands[0] = tempptr->operands[1];
|
||||
tempptr->operands[1] = remainder;
|
||||
remainder = tempptr;
|
||||
((ir_expression *)vine_tail)->operands[1] = tempptr;
|
||||
}
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static void
|
||||
compression(ir_expression *root, unsigned count)
|
||||
{
|
||||
ir_expression *scanner = root;
|
||||
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
ir_expression *child = (ir_expression *)scanner->operands[1];
|
||||
scanner->operands[1] = child->operands[1];
|
||||
scanner = (ir_expression *)scanner->operands[1];
|
||||
child->operands[1] = scanner->operands[0];
|
||||
scanner->operands[0] = child;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vine_to_tree(ir_expression *root, unsigned size)
|
||||
{
|
||||
int n = size - 1;
|
||||
for (int m = n / 2; m > 0; m = n / 2) {
|
||||
compression(root, m);
|
||||
n -= m + 1;
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class ir_rebalance_visitor : public ir_rvalue_enter_visitor {
|
||||
public:
|
||||
ir_rebalance_visitor()
|
||||
{
|
||||
progress = false;
|
||||
}
|
||||
|
||||
void handle_rvalue(ir_rvalue **rvalue);
|
||||
|
||||
bool progress;
|
||||
};
|
||||
|
||||
struct is_reduction_data {
|
||||
ir_expression_operation operation;
|
||||
const glsl_type *type;
|
||||
unsigned num_expr;
|
||||
bool is_reduction;
|
||||
bool contains_constant;
|
||||
};
|
||||
|
||||
} /* anonymous namespace */
|
||||
|
||||
static bool
|
||||
is_reduction_operation(ir_expression_operation operation)
|
||||
{
|
||||
switch (operation) {
|
||||
case ir_binop_add:
|
||||
case ir_binop_mul:
|
||||
case ir_binop_bit_and:
|
||||
case ir_binop_bit_xor:
|
||||
case ir_binop_bit_or:
|
||||
case ir_binop_logic_and:
|
||||
case ir_binop_logic_xor:
|
||||
case ir_binop_logic_or:
|
||||
case ir_binop_min:
|
||||
case ir_binop_max:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Note that this function does not attempt to recognize that reduction trees
|
||||
* are already balanced.
|
||||
*
|
||||
* We return false from this function for a number of reasons other than an
|
||||
* expression tree not being a mathematical reduction. Namely,
|
||||
*
|
||||
* - if the tree contains multiple constants that we may be able to combine.
|
||||
* - if the tree contains matrices:
|
||||
* - they might contain vec4's with many constant components that we can
|
||||
* simplify after splitting.
|
||||
* - applying the matrix chain ordering optimization is more than just
|
||||
* balancing an expression tree.
|
||||
* - if the tree contains operations on multiple types.
|
||||
* - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c
|
||||
* would trick the visiting pass.
|
||||
*/
|
||||
static void
|
||||
is_reduction(ir_instruction *ir, void *data)
|
||||
{
|
||||
struct is_reduction_data *ird = (struct is_reduction_data *)data;
|
||||
if (!ird->is_reduction)
|
||||
return;
|
||||
|
||||
/* We don't want to balance a tree that contains multiple constants, since
|
||||
* we'll be able to constant fold them if they're not in separate subtrees.
|
||||
*/
|
||||
if (ir->as_constant()) {
|
||||
if (ird->contains_constant) {
|
||||
ird->is_reduction = false;
|
||||
}
|
||||
ird->contains_constant = true;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Array/record dereferences have subtrees that are not part of the expr
|
||||
* tree we're balancing. Skip trees containing them.
|
||||
*/
|
||||
if (ir->ir_type == ir_type_dereference_array ||
|
||||
ir->ir_type == ir_type_dereference_record) {
|
||||
ird->is_reduction = false;
|
||||
return;
|
||||
}
|
||||
|
||||
ir_expression *expr = ir->as_expression();
|
||||
if (!expr)
|
||||
return;
|
||||
|
||||
/* Non-constant matrices might still contain constant vec4 that we can
|
||||
* constant fold once split up. Handling matrices will need some more
|
||||
* work.
|
||||
*/
|
||||
if (expr->type->is_matrix()) {
|
||||
ird->is_reduction = false;
|
||||
return;
|
||||
}
|
||||
|
||||
if (ird->type != NULL && ird->type != expr->type) {
|
||||
ird->is_reduction = false;
|
||||
return;
|
||||
}
|
||||
ird->type = expr->type;
|
||||
|
||||
ird->num_expr++;
|
||||
if (is_reduction_operation(expr->operation)) {
|
||||
if (ird->operation != 0 && ird->operation != expr->operation)
|
||||
ird->is_reduction = false;
|
||||
ird->operation = expr->operation;
|
||||
} else {
|
||||
ird->is_reduction = false;
|
||||
}
|
||||
}
|
||||
|
||||
static ir_rvalue *
|
||||
handle_expression(ir_expression *expr)
|
||||
{
|
||||
struct is_reduction_data ird;
|
||||
ird.operation = (ir_expression_operation)0;
|
||||
ird.type = NULL;
|
||||
ird.num_expr = 0;
|
||||
ird.is_reduction = true;
|
||||
ird.contains_constant = false;
|
||||
|
||||
visit_tree(expr, is_reduction, (void *)&ird);
|
||||
|
||||
if (ird.is_reduction && ird.num_expr > 2) {
|
||||
ir_constant z = ir_constant(0.0f);
|
||||
ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr);
|
||||
|
||||
unsigned size = tree_to_vine(&pseudo_root);
|
||||
vine_to_tree(&pseudo_root, size);
|
||||
|
||||
expr = (ir_expression *)pseudo_root.operands[1];
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
|
||||
void
|
||||
ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue)
|
||||
{
|
||||
if (!*rvalue)
|
||||
return;
|
||||
|
||||
ir_expression *expr = (*rvalue)->as_expression();
|
||||
if (!expr || !is_reduction_operation(expr->operation))
|
||||
return;
|
||||
|
||||
ir_rvalue *new_rvalue = handle_expression(expr);
|
||||
|
||||
/* If we failed to rebalance the tree (e.g., because it wasn't a reduction,
|
||||
* or some other set of cases) new_rvalue will point to the same root as
|
||||
* before.
|
||||
*
|
||||
* Similarly, if the tree rooted at *rvalue was a reduction and was already
|
||||
* balanced, the algorithm will rearrange the tree but will ultimately
|
||||
* return an identical tree, so this check will handle that as well and
|
||||
* will not set progress = true.
|
||||
*/
|
||||
if (new_rvalue == *rvalue)
|
||||
return;
|
||||
|
||||
*rvalue = new_rvalue;
|
||||
this->progress = true;
|
||||
}
|
||||
|
||||
bool
|
||||
do_rebalance_tree(exec_list *instructions)
|
||||
{
|
||||
ir_rebalance_visitor v;
|
||||
|
||||
v.run(instructions);
|
||||
|
||||
return v.progress;
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue