glsl: Use array deref for access to vector components

We've assumed that we could lower per-component vector access from

  vec[i] = scalar

to

  vec = ir_triop_vector_insert(vec, scalar, i)

but with SSBOs (and compute shader SLM and tesselation outputs) this is
no longer valid. If a vector is "externally visible", multiple threads
can write independent components simultaneously. With lowering to
ir_triop_vector_insert, each thread read the entire vector, changes one
component, then writes out the entire vector. This is racy.

Instead of generating a ir_binop_vector_extract when we see v[i], we
generate ir_dereference_array. We then add a lowering pass to lower the
ir_dereference_array to ir_binop_vector_extract for rvalues and for to
vector_insert for lvalues in a separate lowering pass.

The resulting IR is the same as before, but we now have a window between
ast->ir conversion and the lowering pass where v[i] appears in the IR as
an array deref. This lets us run lowering passes that lower the vector
access to I/O (eg for SSBO load/store) before we lower the per-component
access to full vector writes.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Signed-off-by: Kristian Høgsberg Kristensen <krh@bitplanet.net>
This commit is contained in:
Kristian Høgsberg Kristensen 2015-11-04 14:58:54 -08:00
parent 60dd5287ff
commit 96b22fb080
10 changed files with 138 additions and 68 deletions

View file

@ -176,6 +176,7 @@ LIBGLSL_FILES = \
lower_vec_index_to_cond_assign.cpp \
lower_vec_index_to_swizzle.cpp \
lower_vector.cpp \
lower_vector_derefs.cpp \
lower_vector_insert.cpp \
lower_vertex_id.cpp \
lower_output_reads.cpp \

View file

@ -319,10 +319,9 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
* expression.
*/
if (array->type->is_array()
|| array->type->is_matrix()) {
|| array->type->is_matrix()
|| array->type->is_vector()) {
return new(mem_ctx) ir_dereference_array(array, idx);
} else if (array->type->is_vector()) {
return new(mem_ctx) ir_expression(ir_binop_vector_extract, array, idx);
} else if (array->type->is_error()) {
return array;
} else {

View file

@ -256,18 +256,10 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
actual->variable_referenced()->name);
return false;
} else if (!actual->is_lvalue()) {
/* Even though ir_binop_vector_extract is not an l-value, let it
* slop through. generate_call will handle it correctly.
*/
ir_expression *const expr = ((ir_rvalue *) actual)->as_expression();
if (expr == NULL
|| expr->operation != ir_binop_vector_extract
|| !expr->operands[0]->is_lvalue()) {
_mesa_glsl_error(&loc, state,
"function parameter '%s %s' is not an lvalue",
mode, formal->name);
return false;
}
_mesa_glsl_error(&loc, state,
"function parameter '%s %s' is not an lvalue",
mode, formal->name);
return false;
}
}
@ -376,12 +368,8 @@ fix_parameter(void *mem_ctx, ir_rvalue *actual, const glsl_type *formal_type,
ir_rvalue *lhs = actual;
if (expr != NULL && expr->operation == ir_binop_vector_extract) {
rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
expr->operands[0]->type,
expr->operands[0]->clone(mem_ctx, NULL),
rhs,
expr->operands[1]->clone(mem_ctx, NULL));
lhs = expr->operands[0]->clone(mem_ctx, NULL);
lhs == new(mem_ctx) ir_dereference_array(expr->operands[0]->clone(mem_ctx, NULL),
expr->operands[1]->clone(mem_ctx, NULL));
}
ir_assignment *const assignment_2 = new(mem_ctx) ir_assignment(lhs, rhs);

View file

@ -850,43 +850,6 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
{
void *ctx = state;
bool error_emitted = (lhs->type->is_error() || rhs->type->is_error());
ir_rvalue *extract_channel = NULL;
/* If the assignment LHS comes back as an ir_binop_vector_extract
* expression, move it to the RHS as an ir_triop_vector_insert.
*/
if (lhs->ir_type == ir_type_expression) {
ir_expression *const lhs_expr = lhs->as_expression();
if (unlikely(lhs_expr->operation == ir_binop_vector_extract)) {
ir_rvalue *new_rhs =
validate_assignment(state, lhs_loc, lhs,
rhs, is_initializer);
if (new_rhs == NULL) {
return lhs;
} else {
/* This converts:
* - LHS: (expression float vector_extract <vec> <channel>)
* - RHS: <scalar>
* into:
* - LHS: <vec>
* - RHS: (expression vec2 vector_insert <vec> <channel> <scalar>)
*
* The LHS type is now a vector instead of a scalar. Since GLSL
* allows assignments to be used as rvalues, we need to re-extract
* the channel from assignment_temp when returning the rvalue.
*/
extract_channel = lhs_expr->operands[1];
rhs = new(ctx) ir_expression(ir_triop_vector_insert,
lhs_expr->operands[0]->type,
lhs_expr->operands[0],
new_rhs,
extract_channel);
lhs = lhs_expr->operands[0]->clone(ctx, NULL);
}
}
}
ir_variable *lhs_var = lhs->variable_referenced();
if (lhs_var)
@ -984,12 +947,6 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
}
ir_rvalue *rvalue = new(ctx) ir_dereference_variable(var);
if (extract_channel) {
rvalue = new(ctx) ir_expression(ir_binop_vector_extract,
rvalue,
extract_channel->clone(ctx, NULL));
}
*out_rvalue = rvalue;
} else {
if (!error_emitted)

View file

@ -129,6 +129,7 @@ void lower_packed_varyings(void *mem_ctx,
unsigned locations_used, ir_variable_mode mode,
unsigned gs_input_vertices, gl_shader *shader);
bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index);
bool lower_vector_derefs(gl_shader *shader);
void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader);
bool optimize_redundant_jumps(exec_list *instructions);
bool optimize_split_arrays(exec_list *instructions, bool linked);

View file

@ -110,9 +110,10 @@ ir_validate::visit(ir_dereference_variable *ir)
ir_visitor_status
ir_validate::visit_enter(class ir_dereference_array *ir)
{
if (!ir->array->type->is_array() && !ir->array->type->is_matrix()) {
printf("ir_dereference_array @ %p does not specify an array or a "
"matrix\n",
if (!ir->array->type->is_array() && !ir->array->type->is_matrix() &&
!ir->array->type->is_vector()) {
printf("ir_dereference_array @ %p does not specify an array, a vector "
"or a matrix\n",
(void *) ir);
ir->print();
printf("\n");

View file

@ -4451,6 +4451,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
if (ctx->Const.ShaderCompilerOptions[i].LowerBufferInterfaceBlocks)
lower_ubo_reference(prog->_LinkedShaders[i]);
lower_vector_derefs(prog->_LinkedShaders[i]);
}
done:

View file

@ -390,7 +390,19 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
case ir_type_dereference_array: {
ir_dereference_array *deref_array = (ir_dereference_array *) deref;
unsigned array_stride;
if (deref_array->array->type->is_matrix() && *row_major) {
if (deref_array->array->type->is_vector()) {
/* We get this when storing or loading a component out of a vector
* with a non-constant index. This happens for v[i] = f where v is
* a vector (or m[i][j] = f where m is a matrix). If we don't
* lower that here, it gets turned into v = vector_insert(v, i,
* f), which loads the entire vector, modifies one component and
* then write the entire thing back. That breaks if another
* thread or SIMD channel is modifying the same vector.
*/
array_stride = 4;
if (deref_array->array->type->is_double())
array_stride *= 2;
} else if (deref_array->array->type->is_matrix() && *row_major) {
/* When loading a vector out of a row major matrix, the
* step between the columns (vectors) is the size of a
* float, while the step between the rows (elements of a

View file

@ -0,0 +1,104 @@
/*
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "ir.h"
#include "ir_builder.h"
#include "ir_rvalue_visitor.h"
#include "ir_optimization.h"
using namespace ir_builder;
namespace {
class vector_deref_visitor : public ir_rvalue_enter_visitor {
public:
vector_deref_visitor()
: progress(false)
{
}
virtual ~vector_deref_visitor()
{
}
virtual void handle_rvalue(ir_rvalue **rv);
virtual ir_visitor_status visit_enter(ir_assignment *ir);
bool progress;
};
} /* anonymous namespace */
ir_visitor_status
vector_deref_visitor::visit_enter(ir_assignment *ir)
{
if (!ir->lhs || ir->lhs->ir_type != ir_type_dereference_array)
return ir_rvalue_enter_visitor::visit_enter(ir);
ir_dereference_array *const deref = (ir_dereference_array *) ir->lhs;
if (!deref->array->type->is_vector())
return ir_rvalue_enter_visitor::visit_enter(ir);
ir_dereference *const new_lhs = (ir_dereference *) deref->array;
ir->set_lhs(new_lhs);
ir_constant *old_index_constant = deref->array_index->constant_expression_value();
void *mem_ctx = ralloc_parent(ir);
if (!old_index_constant) {
ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
new_lhs->type,
new_lhs->clone(mem_ctx, NULL),
ir->rhs,
deref->array_index);
ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
} else {
ir->write_mask = 1 << old_index_constant->get_int_component(0);
}
return ir_rvalue_enter_visitor::visit_enter(ir);
}
void
vector_deref_visitor::handle_rvalue(ir_rvalue **rv)
{
if (*rv == NULL || (*rv)->ir_type != ir_type_dereference_array)
return;
ir_dereference_array *const deref = (ir_dereference_array *) *rv;
if (!deref->array->type->is_vector())
return;
void *mem_ctx = ralloc_parent(deref);
*rv = new(mem_ctx) ir_expression(ir_binop_vector_extract,
deref->array,
deref->array_index);
}
bool
lower_vector_derefs(gl_shader *shader)
{
vector_deref_visitor v;
visit_list_elements(&v, shader->ir);
return v.progress;
}

View file

@ -197,6 +197,11 @@ process_assignment(void *ctx, ir_assignment *ir, exec_list *assignments)
if (entry->lhs != var)
continue;
/* Skip if the assignment we're trying to eliminate isn't a plain
* variable deref. */
if (entry->ir->lhs->ir_type != ir_type_dereference_variable)
continue;
int remove = entry->unused & ir->write_mask;
if (debug) {
printf("%s 0x%01x - 0x%01x = 0x%01x\n",