mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
glsl: Use array deref for access to vector components
We've assumed that we could lower per-component vector access from vec[i] = scalar to vec = ir_triop_vector_insert(vec, scalar, i) but with SSBOs (and compute shader SLM and tesselation outputs) this is no longer valid. If a vector is "externally visible", multiple threads can write independent components simultaneously. With lowering to ir_triop_vector_insert, each thread read the entire vector, changes one component, then writes out the entire vector. This is racy. Instead of generating a ir_binop_vector_extract when we see v[i], we generate ir_dereference_array. We then add a lowering pass to lower the ir_dereference_array to ir_binop_vector_extract for rvalues and for to vector_insert for lvalues in a separate lowering pass. The resulting IR is the same as before, but we now have a window between ast->ir conversion and the lowering pass where v[i] appears in the IR as an array deref. This lets us run lowering passes that lower the vector access to I/O (eg for SSBO load/store) before we lower the per-component access to full vector writes. Reviewed-by: Jordan Justen <jordan.l.justen@intel.com> Signed-off-by: Kristian Høgsberg Kristensen <krh@bitplanet.net>
This commit is contained in:
parent
60dd5287ff
commit
96b22fb080
10 changed files with 138 additions and 68 deletions
|
|
@ -176,6 +176,7 @@ LIBGLSL_FILES = \
|
|||
lower_vec_index_to_cond_assign.cpp \
|
||||
lower_vec_index_to_swizzle.cpp \
|
||||
lower_vector.cpp \
|
||||
lower_vector_derefs.cpp \
|
||||
lower_vector_insert.cpp \
|
||||
lower_vertex_id.cpp \
|
||||
lower_output_reads.cpp \
|
||||
|
|
|
|||
|
|
@ -319,10 +319,9 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
|
|||
* expression.
|
||||
*/
|
||||
if (array->type->is_array()
|
||||
|| array->type->is_matrix()) {
|
||||
|| array->type->is_matrix()
|
||||
|| array->type->is_vector()) {
|
||||
return new(mem_ctx) ir_dereference_array(array, idx);
|
||||
} else if (array->type->is_vector()) {
|
||||
return new(mem_ctx) ir_expression(ir_binop_vector_extract, array, idx);
|
||||
} else if (array->type->is_error()) {
|
||||
return array;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -256,18 +256,10 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
|
|||
actual->variable_referenced()->name);
|
||||
return false;
|
||||
} else if (!actual->is_lvalue()) {
|
||||
/* Even though ir_binop_vector_extract is not an l-value, let it
|
||||
* slop through. generate_call will handle it correctly.
|
||||
*/
|
||||
ir_expression *const expr = ((ir_rvalue *) actual)->as_expression();
|
||||
if (expr == NULL
|
||||
|| expr->operation != ir_binop_vector_extract
|
||||
|| !expr->operands[0]->is_lvalue()) {
|
||||
_mesa_glsl_error(&loc, state,
|
||||
"function parameter '%s %s' is not an lvalue",
|
||||
mode, formal->name);
|
||||
return false;
|
||||
}
|
||||
_mesa_glsl_error(&loc, state,
|
||||
"function parameter '%s %s' is not an lvalue",
|
||||
mode, formal->name);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -376,12 +368,8 @@ fix_parameter(void *mem_ctx, ir_rvalue *actual, const glsl_type *formal_type,
|
|||
|
||||
ir_rvalue *lhs = actual;
|
||||
if (expr != NULL && expr->operation == ir_binop_vector_extract) {
|
||||
rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
|
||||
expr->operands[0]->type,
|
||||
expr->operands[0]->clone(mem_ctx, NULL),
|
||||
rhs,
|
||||
expr->operands[1]->clone(mem_ctx, NULL));
|
||||
lhs = expr->operands[0]->clone(mem_ctx, NULL);
|
||||
lhs == new(mem_ctx) ir_dereference_array(expr->operands[0]->clone(mem_ctx, NULL),
|
||||
expr->operands[1]->clone(mem_ctx, NULL));
|
||||
}
|
||||
|
||||
ir_assignment *const assignment_2 = new(mem_ctx) ir_assignment(lhs, rhs);
|
||||
|
|
|
|||
|
|
@ -850,43 +850,6 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
|
|||
{
|
||||
void *ctx = state;
|
||||
bool error_emitted = (lhs->type->is_error() || rhs->type->is_error());
|
||||
ir_rvalue *extract_channel = NULL;
|
||||
|
||||
/* If the assignment LHS comes back as an ir_binop_vector_extract
|
||||
* expression, move it to the RHS as an ir_triop_vector_insert.
|
||||
*/
|
||||
if (lhs->ir_type == ir_type_expression) {
|
||||
ir_expression *const lhs_expr = lhs->as_expression();
|
||||
|
||||
if (unlikely(lhs_expr->operation == ir_binop_vector_extract)) {
|
||||
ir_rvalue *new_rhs =
|
||||
validate_assignment(state, lhs_loc, lhs,
|
||||
rhs, is_initializer);
|
||||
|
||||
if (new_rhs == NULL) {
|
||||
return lhs;
|
||||
} else {
|
||||
/* This converts:
|
||||
* - LHS: (expression float vector_extract <vec> <channel>)
|
||||
* - RHS: <scalar>
|
||||
* into:
|
||||
* - LHS: <vec>
|
||||
* - RHS: (expression vec2 vector_insert <vec> <channel> <scalar>)
|
||||
*
|
||||
* The LHS type is now a vector instead of a scalar. Since GLSL
|
||||
* allows assignments to be used as rvalues, we need to re-extract
|
||||
* the channel from assignment_temp when returning the rvalue.
|
||||
*/
|
||||
extract_channel = lhs_expr->operands[1];
|
||||
rhs = new(ctx) ir_expression(ir_triop_vector_insert,
|
||||
lhs_expr->operands[0]->type,
|
||||
lhs_expr->operands[0],
|
||||
new_rhs,
|
||||
extract_channel);
|
||||
lhs = lhs_expr->operands[0]->clone(ctx, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ir_variable *lhs_var = lhs->variable_referenced();
|
||||
if (lhs_var)
|
||||
|
|
@ -984,12 +947,6 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
|
|||
}
|
||||
ir_rvalue *rvalue = new(ctx) ir_dereference_variable(var);
|
||||
|
||||
if (extract_channel) {
|
||||
rvalue = new(ctx) ir_expression(ir_binop_vector_extract,
|
||||
rvalue,
|
||||
extract_channel->clone(ctx, NULL));
|
||||
}
|
||||
|
||||
*out_rvalue = rvalue;
|
||||
} else {
|
||||
if (!error_emitted)
|
||||
|
|
|
|||
|
|
@ -129,6 +129,7 @@ void lower_packed_varyings(void *mem_ctx,
|
|||
unsigned locations_used, ir_variable_mode mode,
|
||||
unsigned gs_input_vertices, gl_shader *shader);
|
||||
bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index);
|
||||
bool lower_vector_derefs(gl_shader *shader);
|
||||
void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader);
|
||||
bool optimize_redundant_jumps(exec_list *instructions);
|
||||
bool optimize_split_arrays(exec_list *instructions, bool linked);
|
||||
|
|
|
|||
|
|
@ -110,9 +110,10 @@ ir_validate::visit(ir_dereference_variable *ir)
|
|||
ir_visitor_status
|
||||
ir_validate::visit_enter(class ir_dereference_array *ir)
|
||||
{
|
||||
if (!ir->array->type->is_array() && !ir->array->type->is_matrix()) {
|
||||
printf("ir_dereference_array @ %p does not specify an array or a "
|
||||
"matrix\n",
|
||||
if (!ir->array->type->is_array() && !ir->array->type->is_matrix() &&
|
||||
!ir->array->type->is_vector()) {
|
||||
printf("ir_dereference_array @ %p does not specify an array, a vector "
|
||||
"or a matrix\n",
|
||||
(void *) ir);
|
||||
ir->print();
|
||||
printf("\n");
|
||||
|
|
|
|||
|
|
@ -4451,6 +4451,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
|
||||
if (ctx->Const.ShaderCompilerOptions[i].LowerBufferInterfaceBlocks)
|
||||
lower_ubo_reference(prog->_LinkedShaders[i]);
|
||||
|
||||
lower_vector_derefs(prog->_LinkedShaders[i]);
|
||||
}
|
||||
|
||||
done:
|
||||
|
|
|
|||
|
|
@ -390,7 +390,19 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
|
|||
case ir_type_dereference_array: {
|
||||
ir_dereference_array *deref_array = (ir_dereference_array *) deref;
|
||||
unsigned array_stride;
|
||||
if (deref_array->array->type->is_matrix() && *row_major) {
|
||||
if (deref_array->array->type->is_vector()) {
|
||||
/* We get this when storing or loading a component out of a vector
|
||||
* with a non-constant index. This happens for v[i] = f where v is
|
||||
* a vector (or m[i][j] = f where m is a matrix). If we don't
|
||||
* lower that here, it gets turned into v = vector_insert(v, i,
|
||||
* f), which loads the entire vector, modifies one component and
|
||||
* then write the entire thing back. That breaks if another
|
||||
* thread or SIMD channel is modifying the same vector.
|
||||
*/
|
||||
array_stride = 4;
|
||||
if (deref_array->array->type->is_double())
|
||||
array_stride *= 2;
|
||||
} else if (deref_array->array->type->is_matrix() && *row_major) {
|
||||
/* When loading a vector out of a row major matrix, the
|
||||
* step between the columns (vectors) is the size of a
|
||||
* float, while the step between the rows (elements of a
|
||||
|
|
|
|||
104
src/glsl/lower_vector_derefs.cpp
Normal file
104
src/glsl/lower_vector_derefs.cpp
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Copyright © 2013 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include "ir.h"
|
||||
#include "ir_builder.h"
|
||||
#include "ir_rvalue_visitor.h"
|
||||
#include "ir_optimization.h"
|
||||
|
||||
using namespace ir_builder;
|
||||
|
||||
namespace {
|
||||
|
||||
class vector_deref_visitor : public ir_rvalue_enter_visitor {
|
||||
public:
|
||||
vector_deref_visitor()
|
||||
: progress(false)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~vector_deref_visitor()
|
||||
{
|
||||
}
|
||||
|
||||
virtual void handle_rvalue(ir_rvalue **rv);
|
||||
virtual ir_visitor_status visit_enter(ir_assignment *ir);
|
||||
|
||||
bool progress;
|
||||
};
|
||||
|
||||
} /* anonymous namespace */
|
||||
|
||||
ir_visitor_status
|
||||
vector_deref_visitor::visit_enter(ir_assignment *ir)
|
||||
{
|
||||
if (!ir->lhs || ir->lhs->ir_type != ir_type_dereference_array)
|
||||
return ir_rvalue_enter_visitor::visit_enter(ir);
|
||||
|
||||
ir_dereference_array *const deref = (ir_dereference_array *) ir->lhs;
|
||||
if (!deref->array->type->is_vector())
|
||||
return ir_rvalue_enter_visitor::visit_enter(ir);
|
||||
|
||||
ir_dereference *const new_lhs = (ir_dereference *) deref->array;
|
||||
ir->set_lhs(new_lhs);
|
||||
|
||||
ir_constant *old_index_constant = deref->array_index->constant_expression_value();
|
||||
void *mem_ctx = ralloc_parent(ir);
|
||||
if (!old_index_constant) {
|
||||
ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
|
||||
new_lhs->type,
|
||||
new_lhs->clone(mem_ctx, NULL),
|
||||
ir->rhs,
|
||||
deref->array_index);
|
||||
ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
|
||||
} else {
|
||||
ir->write_mask = 1 << old_index_constant->get_int_component(0);
|
||||
}
|
||||
|
||||
return ir_rvalue_enter_visitor::visit_enter(ir);
|
||||
}
|
||||
|
||||
void
|
||||
vector_deref_visitor::handle_rvalue(ir_rvalue **rv)
|
||||
{
|
||||
if (*rv == NULL || (*rv)->ir_type != ir_type_dereference_array)
|
||||
return;
|
||||
|
||||
ir_dereference_array *const deref = (ir_dereference_array *) *rv;
|
||||
if (!deref->array->type->is_vector())
|
||||
return;
|
||||
|
||||
void *mem_ctx = ralloc_parent(deref);
|
||||
*rv = new(mem_ctx) ir_expression(ir_binop_vector_extract,
|
||||
deref->array,
|
||||
deref->array_index);
|
||||
}
|
||||
|
||||
bool
|
||||
lower_vector_derefs(gl_shader *shader)
|
||||
{
|
||||
vector_deref_visitor v;
|
||||
|
||||
visit_list_elements(&v, shader->ir);
|
||||
|
||||
return v.progress;
|
||||
}
|
||||
|
|
@ -197,6 +197,11 @@ process_assignment(void *ctx, ir_assignment *ir, exec_list *assignments)
|
|||
if (entry->lhs != var)
|
||||
continue;
|
||||
|
||||
/* Skip if the assignment we're trying to eliminate isn't a plain
|
||||
* variable deref. */
|
||||
if (entry->ir->lhs->ir_type != ir_type_dereference_variable)
|
||||
continue;
|
||||
|
||||
int remove = entry->unused & ir->write_mask;
|
||||
if (debug) {
|
||||
printf("%s 0x%01x - 0x%01x = 0x%01x\n",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue