mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-30 15:50:32 +01:00
This reduces indirect indexing of 1-element arrays to indexing with 0. Without this, we fail an assertion later. Discovered when writing a test. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38113>
1289 lines
45 KiB
C
1289 lines
45 KiB
C
/*
|
|
* Copyright © 2014 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
/*
|
|
* This lowering pass converts references to input/output variables with
|
|
* loads/stores to actual input/output intrinsics.
|
|
*/
|
|
|
|
#include "nir.h"
|
|
#include "nir_builder.h"
|
|
#include "nir_deref.h"
|
|
|
|
#include "util/u_math.h"
|
|
|
|
struct lower_io_state {
|
|
void *dead_ctx;
|
|
nir_builder builder;
|
|
int (*type_size)(const struct glsl_type *type, bool);
|
|
nir_variable_mode modes;
|
|
nir_lower_io_options options;
|
|
struct set variable_names;
|
|
};
|
|
|
|
static const char *
|
|
add_variable_name(struct lower_io_state *state, const char *name)
|
|
{
|
|
if (!name)
|
|
return NULL;
|
|
|
|
bool found = false;
|
|
struct set_entry *entry = _mesa_set_search_or_add(&state->variable_names, name, &found);
|
|
if (!found)
|
|
entry->key = (void *)ralloc_strdup(state->builder.shader, name);
|
|
return entry->key;
|
|
}
|
|
|
|
|
|
/**
|
|
* Some inputs and outputs are arrayed, meaning that there is an extra level
|
|
* of array indexing to handle mismatches between the shader interface and the
|
|
* dispatch pattern of the shader. For instance, geometry shaders are
|
|
* executed per-primitive while their inputs and outputs are specified
|
|
* per-vertex so all inputs and outputs have to be additionally indexed with
|
|
* the vertex index within the primitive.
|
|
*/
|
|
bool
|
|
nir_is_arrayed_io(const nir_variable *var, mesa_shader_stage stage)
|
|
{
|
|
if (var->data.patch || !glsl_type_is_array(var->type))
|
|
return false;
|
|
|
|
if (var->data.per_view) {
|
|
/* Nested arrayed outputs (both per-view and per-{vertex,primitive}) are
|
|
* unsupported. */
|
|
assert(stage == MESA_SHADER_VERTEX);
|
|
assert(var->data.mode == nir_var_shader_out);
|
|
return true;
|
|
}
|
|
|
|
if (stage == MESA_SHADER_MESH) {
|
|
/* NV_mesh_shader: this is flat array for the whole workgroup. */
|
|
if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES)
|
|
return var->data.per_primitive;
|
|
}
|
|
|
|
if (var->data.mode == nir_var_shader_in) {
|
|
if (var->data.per_vertex) {
|
|
assert(stage == MESA_SHADER_FRAGMENT);
|
|
return true;
|
|
}
|
|
|
|
return stage == MESA_SHADER_GEOMETRY ||
|
|
stage == MESA_SHADER_TESS_CTRL ||
|
|
stage == MESA_SHADER_TESS_EVAL;
|
|
}
|
|
|
|
if (var->data.mode == nir_var_shader_out)
|
|
return stage == MESA_SHADER_TESS_CTRL ||
|
|
stage == MESA_SHADER_MESH;
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Add `offset_diff_bytes` bytes to the offset used by `intr`. Takes the
|
|
* offset_shift used by `intr` (if any) into account and, if needed, adjusts
|
|
* it in order to be able to represent the resulting offset in full precision.
|
|
*/
|
|
nir_io_offset
|
|
nir_io_offset_iadd(nir_builder *b, nir_intrinsic_instr *intr,
|
|
int offset_diff_bytes)
|
|
{
|
|
unsigned offset_diff;
|
|
unsigned base_shift;
|
|
unsigned offset_shift;
|
|
|
|
if (nir_intrinsic_has_offset_shift(intr)) {
|
|
unsigned cur_offset_shift = nir_intrinsic_offset_shift(intr);
|
|
|
|
if (util_is_aligned(offset_diff_bytes, (uintmax_t)1 << cur_offset_shift)) {
|
|
/* If the byte offset is properly aligned, we can just shift it and
|
|
* keep the current offset_shift.
|
|
*/
|
|
offset_diff = offset_diff_bytes >> cur_offset_shift;
|
|
base_shift = 0;
|
|
offset_shift = cur_offset_shift;
|
|
} else {
|
|
/* TODO add support for adjusting the base index. */
|
|
assert(!nir_intrinsic_has_base(intr) || nir_intrinsic_base(intr) == 0);
|
|
|
|
/* Otherwise, we have to lower offset_shift in order to not lose
|
|
* precision. We also have to shift the original base offset left to
|
|
* make sure it uses the same units.
|
|
*/
|
|
offset_shift = ffs(offset_diff_bytes) - 1;
|
|
offset_diff = offset_diff_bytes >> offset_shift;
|
|
base_shift = cur_offset_shift - offset_shift;
|
|
}
|
|
} else {
|
|
offset_diff = offset_diff_bytes;
|
|
base_shift = 0;
|
|
offset_shift = 0;
|
|
}
|
|
|
|
nir_src *base_offset_src = nir_get_io_offset_src(intr);
|
|
assert(base_offset_src);
|
|
|
|
nir_def *base_offset = base_offset_src->ssa;
|
|
nir_def *offset =
|
|
nir_iadd_imm(b, nir_ishl_imm(b, base_offset, base_shift), offset_diff);
|
|
|
|
return (nir_io_offset){
|
|
.def = offset,
|
|
.shift = offset_shift,
|
|
};
|
|
}
|
|
|
|
/* Set the offset src and offset_shift of `intr` to `offset`. */
|
|
void
|
|
nir_set_io_offset(nir_intrinsic_instr *intr, nir_io_offset offset)
|
|
{
|
|
nir_src *offset_src = nir_get_io_offset_src(intr);
|
|
assert(offset_src);
|
|
|
|
if (offset_src->ssa) {
|
|
nir_src_rewrite(offset_src, offset.def);
|
|
} else {
|
|
*offset_src = nir_src_for_ssa(offset.def);
|
|
}
|
|
|
|
if (nir_intrinsic_has_offset_shift(intr)) {
|
|
/* TODO add support for adjusting the base index. */
|
|
assert(!nir_intrinsic_has_base(intr) || nir_intrinsic_base(intr) == 0);
|
|
|
|
nir_intrinsic_set_offset_shift(intr, offset.shift);
|
|
} else {
|
|
assert(offset.shift == 0);
|
|
}
|
|
}
|
|
|
|
void
|
|
nir_add_io_offset(nir_builder *b, nir_intrinsic_instr *intr,
|
|
int offset_diff_bytes)
|
|
{
|
|
nir_set_io_offset(intr, nir_io_offset_iadd(b, intr, offset_diff_bytes));
|
|
}
|
|
|
|
static bool
|
|
uses_high_dvec2_semantic(struct lower_io_state *state,
|
|
const nir_variable *var)
|
|
{
|
|
return state->builder.shader->info.stage == MESA_SHADER_VERTEX &&
|
|
state->options & nir_lower_io_lower_64bit_to_32_new &&
|
|
var->data.mode == nir_var_shader_in &&
|
|
glsl_type_is_dual_slot(glsl_without_array(var->type));
|
|
}
|
|
|
|
static unsigned
|
|
get_number_of_slots(struct lower_io_state *state,
|
|
const nir_variable *var)
|
|
{
|
|
const struct glsl_type *type = var->type;
|
|
|
|
if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) {
|
|
assert(glsl_type_is_array(type));
|
|
type = glsl_get_array_element(type);
|
|
}
|
|
|
|
/* NV_mesh_shader:
|
|
* PRIMITIVE_INDICES is a flat array, not a proper arrayed output,
|
|
* as opposed to D3D-style mesh shaders where it's addressed by
|
|
* the primitive index.
|
|
* Prevent assigning several slots to primitive indices,
|
|
* to avoid some issues.
|
|
*/
|
|
if (state->builder.shader->info.stage == MESA_SHADER_MESH &&
|
|
var->data.location == VARYING_SLOT_PRIMITIVE_INDICES &&
|
|
!nir_is_arrayed_io(var, state->builder.shader->info.stage))
|
|
return 1;
|
|
|
|
return state->type_size(type, var->data.bindless) /
|
|
(uses_high_dvec2_semantic(state, var) ? 2 : 1);
|
|
}
|
|
|
|
static nir_def *
|
|
get_io_offset(nir_builder *b, nir_deref_instr *deref,
|
|
nir_def **array_index,
|
|
int (*type_size)(const struct glsl_type *, bool),
|
|
unsigned *component, bool bts)
|
|
{
|
|
nir_deref_path path;
|
|
nir_deref_path_init(&path, deref, NULL);
|
|
|
|
assert(path.path[0]->deref_type == nir_deref_type_var);
|
|
nir_deref_instr **p = &path.path[1];
|
|
|
|
/* For arrayed I/O (e.g., per-vertex input arrays in geometry shader
|
|
* inputs), skip the outermost array index. Process the rest normally.
|
|
*/
|
|
if (array_index != NULL) {
|
|
assert((*p)->deref_type == nir_deref_type_array);
|
|
*array_index = (*p)->arr.index.ssa;
|
|
p++;
|
|
}
|
|
|
|
if (path.path[0]->var->data.compact && nir_src_is_const((*p)->arr.index)) {
|
|
assert((*p)->deref_type == nir_deref_type_array);
|
|
assert(glsl_type_is_scalar((*p)->type));
|
|
|
|
/* We always lower indirect dereferences for "compact" array vars. */
|
|
const unsigned index = nir_src_as_uint((*p)->arr.index);
|
|
const unsigned total_offset = *component + index;
|
|
const unsigned slot_offset = total_offset / 4;
|
|
*component = total_offset % 4;
|
|
return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
|
|
}
|
|
|
|
/* Just emit code and let constant-folding go to town */
|
|
nir_def *offset = nir_imm_int(b, 0);
|
|
|
|
for (; *p; p++) {
|
|
if ((*p)->deref_type == nir_deref_type_array) {
|
|
unsigned size = type_size((*p)->type, bts);
|
|
|
|
nir_def *mul =
|
|
nir_amul_imm(b, (*p)->arr.index.ssa, size);
|
|
|
|
offset = nir_iadd(b, offset, mul);
|
|
} else if ((*p)->deref_type == nir_deref_type_struct) {
|
|
/* p starts at path[1], so this is safe */
|
|
nir_deref_instr *parent = *(p - 1);
|
|
|
|
unsigned field_offset = 0;
|
|
for (unsigned i = 0; i < (*p)->strct.index; i++) {
|
|
field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
|
|
}
|
|
offset = nir_iadd_imm(b, offset, field_offset);
|
|
} else {
|
|
UNREACHABLE("Unsupported deref type");
|
|
}
|
|
}
|
|
|
|
nir_deref_path_finish(&path);
|
|
|
|
return offset;
|
|
}
|
|
|
|
static bool
|
|
is_medium_precision(const nir_shader *shader, const nir_variable *var)
|
|
{
|
|
if (shader->options->io_options & nir_io_mediump_is_32bit)
|
|
return false;
|
|
|
|
return var->data.precision == GLSL_PRECISION_MEDIUM ||
|
|
var->data.precision == GLSL_PRECISION_LOW;
|
|
}
|
|
|
|
static enum glsl_interp_mode
|
|
get_interp_mode(const nir_variable *var)
|
|
{
|
|
unsigned interp_mode = var->data.interpolation;
|
|
|
|
/* INTERP_MODE_NONE is an artifact of OpenGL. Change it to SMOOTH
|
|
* to enable CSE between load_barycentric_pixel(NONE->SMOOTH) and
|
|
* load_barycentric_pixel(SMOOTH), which also enables IO vectorization when
|
|
* one component originally had NONE and an adjacent component had SMOOTH.
|
|
*
|
|
* Color varyings must preserve NONE. NONE for colors means that
|
|
* glShadeModel determines the interpolation mode.
|
|
*/
|
|
if (var->data.location != VARYING_SLOT_COL0 &&
|
|
var->data.location != VARYING_SLOT_COL1 &&
|
|
var->data.location != VARYING_SLOT_BFC0 &&
|
|
var->data.location != VARYING_SLOT_BFC1 &&
|
|
interp_mode == INTERP_MODE_NONE)
|
|
return INTERP_MODE_SMOOTH;
|
|
|
|
return interp_mode;
|
|
}
|
|
|
|
static nir_def *
|
|
simplify_offset_src(nir_builder *b, nir_def *offset, unsigned num_slots)
|
|
{
|
|
/* Force index=0 for any indirect access to array[1]. */
|
|
if (num_slots == 1 &&
|
|
offset->parent_instr->type != nir_instr_type_load_const)
|
|
return nir_imm_int(b, 0);
|
|
|
|
return offset;
|
|
}
|
|
|
|
static nir_def *
|
|
emit_load(struct lower_io_state *state,
|
|
nir_def *array_index, nir_variable *var, nir_def *offset,
|
|
unsigned component, unsigned num_components, unsigned bit_size,
|
|
nir_alu_type dest_type, bool high_dvec2)
|
|
{
|
|
nir_builder *b = &state->builder;
|
|
const nir_shader *nir = b->shader;
|
|
nir_variable_mode mode = var->data.mode;
|
|
nir_def *barycentric = NULL;
|
|
|
|
nir_intrinsic_op op;
|
|
switch (mode) {
|
|
case nir_var_shader_in:
|
|
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
|
|
state->options & nir_lower_io_use_interpolated_input_intrinsics &&
|
|
var->data.interpolation != INTERP_MODE_FLAT &&
|
|
!var->data.per_primitive) {
|
|
if (var->data.interpolation == INTERP_MODE_EXPLICIT ||
|
|
var->data.per_vertex) {
|
|
assert(array_index != NULL);
|
|
op = nir_intrinsic_load_input_vertex;
|
|
} else {
|
|
assert(array_index == NULL);
|
|
|
|
nir_intrinsic_op bary_op;
|
|
if (var->data.sample)
|
|
bary_op = nir_intrinsic_load_barycentric_sample;
|
|
else if (var->data.centroid)
|
|
bary_op = nir_intrinsic_load_barycentric_centroid;
|
|
else
|
|
bary_op = nir_intrinsic_load_barycentric_pixel;
|
|
|
|
barycentric = nir_load_barycentric(&state->builder, bary_op,
|
|
get_interp_mode(var));
|
|
op = nir_intrinsic_load_interpolated_input;
|
|
}
|
|
} else {
|
|
if (var->data.per_primitive)
|
|
op = nir_intrinsic_load_per_primitive_input;
|
|
else if (array_index)
|
|
op = nir_intrinsic_load_per_vertex_input;
|
|
else
|
|
op = nir_intrinsic_load_input;
|
|
}
|
|
break;
|
|
case nir_var_shader_out:
|
|
if (!array_index)
|
|
op = nir_intrinsic_load_output;
|
|
else if (var->data.per_primitive)
|
|
op = nir_intrinsic_load_per_primitive_output;
|
|
else if (var->data.per_view)
|
|
op = nir_intrinsic_load_per_view_output;
|
|
else
|
|
op = nir_intrinsic_load_per_vertex_output;
|
|
break;
|
|
case nir_var_uniform:
|
|
op = nir_intrinsic_load_uniform;
|
|
break;
|
|
default:
|
|
UNREACHABLE("Unknown variable mode");
|
|
}
|
|
|
|
nir_intrinsic_instr *load =
|
|
nir_intrinsic_instr_create(state->builder.shader, op);
|
|
load->num_components = num_components;
|
|
load->name = add_variable_name(state, var->name);
|
|
|
|
nir_intrinsic_set_base(load, var->data.driver_location);
|
|
if (nir_intrinsic_has_range(load)) {
|
|
const struct glsl_type *type = var->type;
|
|
if (array_index)
|
|
type = glsl_get_array_element(type);
|
|
|
|
unsigned var_size = state->type_size(type, var->data.bindless);
|
|
if (var_size)
|
|
nir_intrinsic_set_range(load, var_size);
|
|
else
|
|
nir_intrinsic_set_range(load, ~0);
|
|
}
|
|
|
|
if (mode == nir_var_shader_in || mode == nir_var_shader_out)
|
|
nir_intrinsic_set_component(load, component);
|
|
|
|
if (nir_intrinsic_has_access(load))
|
|
nir_intrinsic_set_access(load, var->data.access);
|
|
|
|
nir_intrinsic_set_dest_type(load, dest_type);
|
|
|
|
if (load->intrinsic != nir_intrinsic_load_uniform) {
|
|
int location = var->data.location;
|
|
unsigned num_slots = get_number_of_slots(state, var);
|
|
|
|
/* Maximum values in nir_io_semantics. */
|
|
assert(num_slots <= 63);
|
|
assert(location >= 0 && location + num_slots <= NUM_TOTAL_VARYING_SLOTS);
|
|
|
|
nir_io_semantics semantics = { 0 };
|
|
semantics.location = location;
|
|
semantics.num_slots = num_slots;
|
|
semantics.fb_fetch_output = var->data.fb_fetch_output;
|
|
if (semantics.fb_fetch_output) {
|
|
semantics.fb_fetch_output_coherent =
|
|
!!(var->data.access & ACCESS_COHERENT);
|
|
}
|
|
semantics.medium_precision = is_medium_precision(b->shader, var);
|
|
semantics.high_dvec2 = high_dvec2;
|
|
/* "per_vertex" is misnamed. It means "explicit interpolation with
|
|
* the original vertex order", which is a stricter version of
|
|
* INTERP_MODE_EXPLICIT.
|
|
*/
|
|
semantics.interp_explicit_strict = var->data.per_vertex;
|
|
nir_intrinsic_set_io_semantics(load, semantics);
|
|
|
|
offset = simplify_offset_src(b, offset, num_slots);
|
|
}
|
|
|
|
if (array_index) {
|
|
load->src[0] = nir_src_for_ssa(array_index);
|
|
load->src[1] = nir_src_for_ssa(offset);
|
|
} else if (barycentric) {
|
|
load->src[0] = nir_src_for_ssa(barycentric);
|
|
load->src[1] = nir_src_for_ssa(offset);
|
|
} else {
|
|
load->src[0] = nir_src_for_ssa(offset);
|
|
}
|
|
|
|
nir_def_init(&load->instr, &load->def, num_components, bit_size);
|
|
nir_builder_instr_insert(b, &load->instr);
|
|
|
|
return &load->def;
|
|
}
|
|
|
|
static nir_def *
|
|
lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
|
nir_def *array_index, nir_variable *var, nir_def *offset,
|
|
unsigned component, const struct glsl_type *type)
|
|
{
|
|
const bool lower_double = !glsl_type_is_integer(type) && state->options & nir_lower_io_lower_64bit_float_to_32;
|
|
if (intrin->def.bit_size == 64 &&
|
|
(lower_double || (state->options & (nir_lower_io_lower_64bit_to_32_new |
|
|
nir_lower_io_lower_64bit_to_32)))) {
|
|
nir_builder *b = &state->builder;
|
|
bool use_high_dvec2_semantic = uses_high_dvec2_semantic(state, var);
|
|
|
|
/* Each slot is a dual slot, so divide the offset within the variable
|
|
* by 2.
|
|
*/
|
|
if (use_high_dvec2_semantic)
|
|
offset = nir_ushr_imm(b, offset, 1);
|
|
|
|
const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
|
|
|
|
nir_def *comp64[4];
|
|
assert(component == 0 || component == 2);
|
|
unsigned dest_comp = 0;
|
|
bool high_dvec2 = false;
|
|
while (dest_comp < intrin->def.num_components) {
|
|
const unsigned num_comps =
|
|
MIN2(intrin->def.num_components - dest_comp,
|
|
(4 - component) / 2);
|
|
|
|
nir_def *data32 =
|
|
emit_load(state, array_index, var, offset, component,
|
|
num_comps * 2, 32, nir_type_uint32, high_dvec2);
|
|
for (unsigned i = 0; i < num_comps; i++) {
|
|
comp64[dest_comp + i] =
|
|
nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
|
|
}
|
|
|
|
/* Only the first store has a component offset */
|
|
component = 0;
|
|
dest_comp += num_comps;
|
|
|
|
if (use_high_dvec2_semantic) {
|
|
/* Increment the offset when we wrap around the dual slot. */
|
|
if (high_dvec2)
|
|
offset = nir_iadd_imm(b, offset, slot_size);
|
|
high_dvec2 = !high_dvec2;
|
|
} else {
|
|
offset = nir_iadd_imm(b, offset, slot_size);
|
|
}
|
|
}
|
|
|
|
return nir_vec(b, comp64, intrin->def.num_components);
|
|
} else if (intrin->def.bit_size == 1) {
|
|
/* Booleans are 32-bit */
|
|
assert(glsl_type_is_boolean(type));
|
|
return nir_b2b1(&state->builder,
|
|
emit_load(state, array_index, var, offset, component,
|
|
intrin->def.num_components, 32,
|
|
nir_type_bool32, false));
|
|
} else {
|
|
return emit_load(state, array_index, var, offset, component,
|
|
intrin->def.num_components,
|
|
intrin->def.bit_size,
|
|
nir_get_nir_type_for_glsl_type(type), false);
|
|
}
|
|
}
|
|
|
|
static void
|
|
emit_store(struct lower_io_state *state, nir_def *data,
|
|
nir_def *array_index, nir_variable *var, nir_def *offset,
|
|
unsigned component, unsigned num_components,
|
|
nir_component_mask_t write_mask, nir_alu_type src_type)
|
|
{
|
|
nir_builder *b = &state->builder;
|
|
|
|
assert(var->data.mode == nir_var_shader_out);
|
|
nir_intrinsic_op op;
|
|
if (!array_index)
|
|
op = nir_intrinsic_store_output;
|
|
else if (var->data.per_view)
|
|
op = nir_intrinsic_store_per_view_output;
|
|
else if (var->data.per_primitive)
|
|
op = nir_intrinsic_store_per_primitive_output;
|
|
else
|
|
op = nir_intrinsic_store_per_vertex_output;
|
|
|
|
nir_intrinsic_instr *store =
|
|
nir_intrinsic_instr_create(state->builder.shader, op);
|
|
store->num_components = num_components;
|
|
store->name = add_variable_name(state, var->name);
|
|
|
|
store->src[0] = nir_src_for_ssa(data);
|
|
|
|
const struct glsl_type *type = var->type;
|
|
if (array_index)
|
|
type = glsl_get_array_element(type);
|
|
unsigned var_size = state->type_size(type, var->data.bindless);
|
|
nir_intrinsic_set_base(store, var->data.driver_location);
|
|
nir_intrinsic_set_range(store, var_size);
|
|
nir_intrinsic_set_component(store, component);
|
|
nir_intrinsic_set_src_type(store, src_type);
|
|
|
|
nir_intrinsic_set_write_mask(store, write_mask);
|
|
|
|
if (nir_intrinsic_has_access(store))
|
|
nir_intrinsic_set_access(store, var->data.access);
|
|
|
|
if (array_index)
|
|
store->src[1] = nir_src_for_ssa(array_index);
|
|
|
|
unsigned num_slots = get_number_of_slots(state, var);
|
|
|
|
offset = simplify_offset_src(b, offset, num_slots);
|
|
store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset);
|
|
|
|
unsigned gs_streams = 0;
|
|
if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) {
|
|
if (var->data.stream & NIR_STREAM_PACKED) {
|
|
gs_streams = var->data.stream & ~NIR_STREAM_PACKED;
|
|
} else {
|
|
assert(var->data.stream < 4);
|
|
gs_streams = 0;
|
|
for (unsigned i = 0; i < num_components; ++i)
|
|
gs_streams |= var->data.stream << (2 * i);
|
|
}
|
|
}
|
|
|
|
int location = var->data.location;
|
|
|
|
/* Maximum values in nir_io_semantics. */
|
|
assert(num_slots <= 63);
|
|
assert(location >= 0 && location + num_slots <= NUM_TOTAL_VARYING_SLOTS);
|
|
|
|
nir_io_semantics semantics = { 0 };
|
|
semantics.location = location;
|
|
semantics.num_slots = num_slots;
|
|
semantics.dual_source_blend_index = var->data.index;
|
|
semantics.gs_streams = gs_streams;
|
|
semantics.medium_precision = is_medium_precision(b->shader, var);
|
|
semantics.per_view = var->data.per_view;
|
|
|
|
nir_intrinsic_set_io_semantics(store, semantics);
|
|
|
|
nir_builder_instr_insert(b, &store->instr);
|
|
}
|
|
|
|
static void
|
|
lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
|
nir_def *array_index, nir_variable *var, nir_def *offset,
|
|
unsigned component, const struct glsl_type *type)
|
|
{
|
|
const bool lower_double = !glsl_type_is_integer(type) && state->options & nir_lower_io_lower_64bit_float_to_32;
|
|
if (intrin->src[1].ssa->bit_size == 64 &&
|
|
(lower_double || (state->options & (nir_lower_io_lower_64bit_to_32 |
|
|
nir_lower_io_lower_64bit_to_32_new)))) {
|
|
nir_builder *b = &state->builder;
|
|
|
|
const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
|
|
|
|
assert(component == 0 || component == 2);
|
|
unsigned src_comp = 0;
|
|
nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
|
|
while (src_comp < intrin->num_components) {
|
|
const unsigned num_comps =
|
|
MIN2(intrin->num_components - src_comp,
|
|
(4 - component) / 2);
|
|
|
|
if (write_mask & BITFIELD_MASK(num_comps)) {
|
|
nir_def *data =
|
|
nir_channels(b, intrin->src[1].ssa,
|
|
BITFIELD_RANGE(src_comp, num_comps));
|
|
nir_def *data32 = nir_bitcast_vector(b, data, 32);
|
|
|
|
uint32_t write_mask32 = 0;
|
|
for (unsigned i = 0; i < num_comps; i++) {
|
|
if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
|
|
write_mask32 |= 3 << (i * 2);
|
|
}
|
|
|
|
emit_store(state, data32, array_index, var, offset,
|
|
component, data32->num_components, write_mask32,
|
|
nir_type_uint32);
|
|
}
|
|
|
|
/* Only the first store has a component offset */
|
|
component = 0;
|
|
src_comp += num_comps;
|
|
write_mask >>= num_comps;
|
|
offset = nir_iadd_imm(b, offset, slot_size);
|
|
}
|
|
} else if (intrin->src[1].ssa->bit_size == 1) {
|
|
/* Booleans are 32-bit */
|
|
assert(glsl_type_is_boolean(type));
|
|
nir_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
|
|
emit_store(state, b32_val, array_index, var, offset,
|
|
component, intrin->num_components,
|
|
nir_intrinsic_write_mask(intrin),
|
|
nir_type_bool32);
|
|
} else {
|
|
emit_store(state, intrin->src[1].ssa, array_index, var, offset,
|
|
component, intrin->num_components,
|
|
nir_intrinsic_write_mask(intrin),
|
|
nir_get_nir_type_for_glsl_type(type));
|
|
}
|
|
}
|
|
|
|
static nir_def *
|
|
lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
|
nir_variable *var, nir_def *offset, unsigned component,
|
|
const struct glsl_type *type)
|
|
{
|
|
nir_builder *b = &state->builder;
|
|
assert(var->data.mode == nir_var_shader_in);
|
|
|
|
/* Ignore interpolateAt() for flat variables - flat is flat. Lower
|
|
* interpolateAtVertex() for explicit variables.
|
|
*/
|
|
if (var->data.interpolation == INTERP_MODE_FLAT ||
|
|
var->data.interpolation == INTERP_MODE_EXPLICIT) {
|
|
nir_def *vertex_index = NULL;
|
|
|
|
if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
|
|
assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
|
|
vertex_index = intrin->src[1].ssa;
|
|
}
|
|
|
|
return lower_load(intrin, state, vertex_index, var, offset, component, type);
|
|
}
|
|
|
|
/* None of the supported APIs allow interpolation on 64-bit things */
|
|
assert(intrin->def.bit_size <= 32);
|
|
|
|
nir_intrinsic_op bary_op;
|
|
switch (intrin->intrinsic) {
|
|
case nir_intrinsic_interp_deref_at_centroid:
|
|
bary_op = nir_intrinsic_load_barycentric_centroid;
|
|
break;
|
|
case nir_intrinsic_interp_deref_at_sample:
|
|
bary_op = nir_intrinsic_load_barycentric_at_sample;
|
|
break;
|
|
case nir_intrinsic_interp_deref_at_offset:
|
|
bary_op = nir_intrinsic_load_barycentric_at_offset;
|
|
break;
|
|
default:
|
|
UNREACHABLE("Bogus interpolateAt() intrinsic.");
|
|
}
|
|
|
|
nir_intrinsic_instr *bary_setup =
|
|
nir_intrinsic_instr_create(state->builder.shader, bary_op);
|
|
|
|
nir_def_init(&bary_setup->instr, &bary_setup->def, 2, 32);
|
|
nir_intrinsic_set_interp_mode(bary_setup, get_interp_mode(var));
|
|
|
|
if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
|
|
intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
|
|
intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
|
|
bary_setup->src[0] = nir_src_for_ssa(intrin->src[1].ssa);
|
|
|
|
nir_builder_instr_insert(b, &bary_setup->instr);
|
|
|
|
nir_io_semantics semantics = { 0 };
|
|
semantics.location = var->data.location;
|
|
semantics.num_slots = get_number_of_slots(state, var);
|
|
semantics.medium_precision = is_medium_precision(b->shader, var);
|
|
|
|
offset = simplify_offset_src(b, offset, semantics.num_slots);
|
|
|
|
nir_def *load =
|
|
nir_load_interpolated_input(&state->builder,
|
|
intrin->def.num_components,
|
|
intrin->def.bit_size,
|
|
&bary_setup->def,
|
|
offset,
|
|
.base = var->data.driver_location,
|
|
.component = component,
|
|
.io_semantics = semantics);
|
|
|
|
return load;
|
|
}
|
|
|
|
/**
|
|
* Convert a compact view index emitted by nir_lower_multiview to an absolute
|
|
* view index.
|
|
*/
|
|
static nir_def *
|
|
uncompact_view_index(nir_builder *b, nir_src compact_index_src)
|
|
{
|
|
/* We require nir_lower_io_vars_to_temporaries when using absolute view indices,
|
|
* which ensures index is constant */
|
|
assert(nir_src_is_const(compact_index_src));
|
|
unsigned compact_index = nir_src_as_uint(compact_index_src);
|
|
|
|
unsigned view_index;
|
|
uint32_t view_mask = b->shader->info.view_mask;
|
|
for (unsigned i = 0; i <= compact_index; i++) {
|
|
view_index = u_bit_scan(&view_mask);
|
|
}
|
|
|
|
return nir_imm_int(b, view_index);
|
|
}
|
|
|
|
static bool
|
|
nir_lower_io_block(nir_block *block,
|
|
struct lower_io_state *state)
|
|
{
|
|
nir_builder *b = &state->builder;
|
|
const nir_shader_compiler_options *options = b->shader->options;
|
|
bool progress = false;
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
switch (intrin->intrinsic) {
|
|
case nir_intrinsic_load_deref:
|
|
case nir_intrinsic_store_deref:
|
|
/* We can lower the io for this nir instrinsic */
|
|
break;
|
|
case nir_intrinsic_interp_deref_at_centroid:
|
|
case nir_intrinsic_interp_deref_at_sample:
|
|
case nir_intrinsic_interp_deref_at_offset:
|
|
case nir_intrinsic_interp_deref_at_vertex:
|
|
/* We can optionally lower these to load_interpolated_input */
|
|
if (state->options & nir_lower_io_use_interpolated_input_intrinsics ||
|
|
options->lower_interpolate_at)
|
|
break;
|
|
FALLTHROUGH;
|
|
default:
|
|
/* We can't lower the io for this nir instrinsic, so skip it */
|
|
continue;
|
|
}
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
|
if (!nir_deref_mode_is_one_of(deref, state->modes))
|
|
continue;
|
|
|
|
nir_variable *var = nir_deref_instr_get_variable(deref);
|
|
|
|
b->cursor = nir_before_instr(instr);
|
|
|
|
const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage);
|
|
|
|
nir_def *offset;
|
|
nir_def *array_index = NULL;
|
|
unsigned component_offset = var->data.location_frac;
|
|
bool bindless_type_size = var->data.mode == nir_var_shader_in ||
|
|
var->data.mode == nir_var_shader_out ||
|
|
var->data.bindless;
|
|
|
|
if (nir_deref_instr_is_known_out_of_bounds(deref)) {
|
|
/* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
|
|
*
|
|
* In the subsections described above for array, vector, matrix and
|
|
* structure accesses, any out-of-bounds access produced undefined
|
|
* behavior....
|
|
* Out-of-bounds reads return undefined values, which
|
|
* include values from other variables of the active program or zero.
|
|
* Out-of-bounds writes may be discarded or overwrite
|
|
* other variables of the active program.
|
|
*
|
|
* GL_KHR_robustness and GL_ARB_robustness encourage us to return zero
|
|
* for reads.
|
|
*
|
|
* Otherwise get_io_offset would return out-of-bound offset which may
|
|
* result in out-of-bound loading/storing of inputs/outputs,
|
|
* that could cause issues in drivers down the line.
|
|
*/
|
|
if (intrin->intrinsic != nir_intrinsic_store_deref) {
|
|
nir_def *zero =
|
|
nir_imm_zero(b, intrin->def.num_components,
|
|
intrin->def.bit_size);
|
|
nir_def_rewrite_uses(&intrin->def,
|
|
zero);
|
|
}
|
|
|
|
nir_instr_remove(&intrin->instr);
|
|
progress = true;
|
|
continue;
|
|
}
|
|
|
|
offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL,
|
|
state->type_size, &component_offset,
|
|
bindless_type_size);
|
|
|
|
if (!options->compact_view_index && array_index && var->data.per_view)
|
|
array_index = uncompact_view_index(b, nir_src_for_ssa(array_index));
|
|
|
|
nir_def *replacement = NULL;
|
|
|
|
switch (intrin->intrinsic) {
|
|
case nir_intrinsic_load_deref:
|
|
replacement = lower_load(intrin, state, array_index, var, offset,
|
|
component_offset, deref->type);
|
|
break;
|
|
|
|
case nir_intrinsic_store_deref:
|
|
lower_store(intrin, state, array_index, var, offset,
|
|
component_offset, deref->type);
|
|
break;
|
|
|
|
case nir_intrinsic_interp_deref_at_centroid:
|
|
case nir_intrinsic_interp_deref_at_sample:
|
|
case nir_intrinsic_interp_deref_at_offset:
|
|
case nir_intrinsic_interp_deref_at_vertex:
|
|
assert(array_index == NULL);
|
|
replacement = lower_interpolate_at(intrin, state, var, offset,
|
|
component_offset, deref->type);
|
|
break;
|
|
|
|
default:
|
|
continue;
|
|
}
|
|
|
|
if (replacement) {
|
|
nir_def_rewrite_uses(&intrin->def,
|
|
replacement);
|
|
}
|
|
nir_instr_remove(&intrin->instr);
|
|
progress = true;
|
|
}
|
|
|
|
return progress;
|
|
}
|
|
|
|
static bool
|
|
nir_lower_io_impl(nir_function_impl *impl,
|
|
nir_variable_mode modes,
|
|
int (*type_size)(const struct glsl_type *, bool),
|
|
nir_lower_io_options options)
|
|
{
|
|
struct lower_io_state state;
|
|
bool progress = false;
|
|
|
|
state.builder = nir_builder_create(impl);
|
|
state.dead_ctx = ralloc_context(NULL);
|
|
state.modes = modes;
|
|
state.type_size = type_size;
|
|
state.options = options;
|
|
_mesa_set_init(&state.variable_names, state.dead_ctx,
|
|
_mesa_hash_string, _mesa_key_string_equal);
|
|
|
|
ASSERTED nir_variable_mode supported_modes =
|
|
nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
|
|
assert(!(modes & ~supported_modes));
|
|
|
|
nir_foreach_block(block, impl) {
|
|
progress |= nir_lower_io_block(block, &state);
|
|
}
|
|
|
|
ralloc_free(state.dead_ctx);
|
|
|
|
nir_progress(true, impl, nir_metadata_none);
|
|
|
|
return progress;
|
|
}
|
|
|
|
/** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
|
|
*
|
|
* This pass is intended to be used for cross-stage shader I/O and driver-
|
|
* managed uniforms to turn deref-based access into a simpler model using
|
|
* locations or offsets. For fragment shader inputs, it can optionally turn
|
|
* load_deref into an explicit interpolation using barycentrics coming from
|
|
* one of the load_barycentric_* intrinsics. This pass requires that all
|
|
* deref chains are complete and contain no casts.
|
|
*/
|
|
bool
|
|
nir_lower_io(nir_shader *shader, nir_variable_mode modes,
|
|
int (*type_size)(const struct glsl_type *, bool),
|
|
nir_lower_io_options options)
|
|
{
|
|
bool progress = false;
|
|
|
|
nir_foreach_function_impl(impl, shader) {
|
|
progress |= nir_lower_io_impl(impl, modes, type_size, options);
|
|
}
|
|
|
|
return progress;
|
|
}
|
|
|
|
/**
|
|
* Return the offset source number for a load/store intrinsic or -1 if there's no offset.
|
|
*/
|
|
int
|
|
nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
|
|
{
|
|
switch (instr->intrinsic) {
|
|
case nir_intrinsic_load_input:
|
|
case nir_intrinsic_load_per_primitive_input:
|
|
case nir_intrinsic_load_output:
|
|
case nir_intrinsic_load_shared:
|
|
case nir_intrinsic_load_task_payload:
|
|
case nir_intrinsic_load_uniform:
|
|
case nir_intrinsic_load_constant:
|
|
case nir_intrinsic_load_push_constant:
|
|
case nir_intrinsic_load_kernel_input:
|
|
case nir_intrinsic_load_global:
|
|
case nir_intrinsic_load_global_2x32:
|
|
case nir_intrinsic_load_global_constant:
|
|
case nir_intrinsic_load_global_etna:
|
|
case nir_intrinsic_load_scratch:
|
|
case nir_intrinsic_load_fs_input_interp_deltas:
|
|
case nir_intrinsic_shared_atomic:
|
|
case nir_intrinsic_shared_atomic_swap:
|
|
case nir_intrinsic_task_payload_atomic:
|
|
case nir_intrinsic_task_payload_atomic_swap:
|
|
case nir_intrinsic_global_atomic:
|
|
case nir_intrinsic_global_atomic_2x32:
|
|
case nir_intrinsic_global_atomic_swap:
|
|
case nir_intrinsic_global_atomic_swap_2x32:
|
|
case nir_intrinsic_load_coefficients_agx:
|
|
case nir_intrinsic_load_shared_block_intel:
|
|
case nir_intrinsic_load_global_block_intel:
|
|
case nir_intrinsic_load_shared_uniform_block_intel:
|
|
case nir_intrinsic_load_global_constant_uniform_block_intel:
|
|
case nir_intrinsic_load_shared2_amd:
|
|
case nir_intrinsic_load_const_ir3:
|
|
case nir_intrinsic_load_shared_ir3:
|
|
return 0;
|
|
case nir_intrinsic_load_ubo:
|
|
case nir_intrinsic_load_ubo_vec4:
|
|
case nir_intrinsic_load_ssbo:
|
|
case nir_intrinsic_load_input_vertex:
|
|
case nir_intrinsic_load_per_vertex_input:
|
|
case nir_intrinsic_load_per_vertex_output:
|
|
case nir_intrinsic_load_per_view_output:
|
|
case nir_intrinsic_load_per_primitive_output:
|
|
case nir_intrinsic_load_interpolated_input:
|
|
case nir_intrinsic_load_global_amd:
|
|
case nir_intrinsic_store_output:
|
|
case nir_intrinsic_store_shared:
|
|
case nir_intrinsic_store_task_payload:
|
|
case nir_intrinsic_store_global:
|
|
case nir_intrinsic_store_global_2x32:
|
|
case nir_intrinsic_store_global_etna:
|
|
case nir_intrinsic_store_scratch:
|
|
case nir_intrinsic_ssbo_atomic:
|
|
case nir_intrinsic_ssbo_atomic_swap:
|
|
case nir_intrinsic_ldc_nv:
|
|
case nir_intrinsic_ldcx_nv:
|
|
case nir_intrinsic_load_ssbo_block_intel:
|
|
case nir_intrinsic_store_global_block_intel:
|
|
case nir_intrinsic_store_shared_block_intel:
|
|
case nir_intrinsic_load_ubo_uniform_block_intel:
|
|
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
|
case nir_intrinsic_load_buffer_amd:
|
|
case nir_intrinsic_store_shared2_amd:
|
|
case nir_intrinsic_store_shared_ir3:
|
|
case nir_intrinsic_load_ssbo_intel:
|
|
return 1;
|
|
case nir_intrinsic_store_ssbo:
|
|
case nir_intrinsic_store_per_vertex_output:
|
|
case nir_intrinsic_store_per_view_output:
|
|
case nir_intrinsic_store_per_primitive_output:
|
|
case nir_intrinsic_load_attribute_pan:
|
|
case nir_intrinsic_store_ssbo_block_intel:
|
|
case nir_intrinsic_store_buffer_amd:
|
|
case nir_intrinsic_store_ssbo_intel:
|
|
case nir_intrinsic_store_global_amd:
|
|
case nir_intrinsic_global_atomic_amd:
|
|
return 2;
|
|
case nir_intrinsic_load_ssbo_ir3:
|
|
/* This intrinsic has 2 offsets (src1 bytes, src2 dwords), we return the
|
|
* dwords one for opt_offsets.
|
|
*/
|
|
return 2;
|
|
case nir_intrinsic_store_ssbo_ir3:
|
|
/* This intrinsic has 2 offsets (src2 bytes, src3 dwords), we return the
|
|
* dwords one for opt_offsets.
|
|
*/
|
|
return 3;
|
|
case nir_intrinsic_global_atomic_swap_amd:
|
|
return 3;
|
|
default:
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Return the offset source for a load/store intrinsic.
|
|
*/
|
|
nir_src *
|
|
nir_get_io_offset_src(nir_intrinsic_instr *instr)
|
|
{
|
|
const int idx = nir_get_io_offset_src_number(instr);
|
|
return idx >= 0 ? &instr->src[idx] : NULL;
|
|
}
|
|
|
|
/**
|
|
* Return the index or handle source number for a load/store intrinsic or -1
|
|
* if there's no index or handle.
|
|
*/
|
|
int
|
|
nir_get_io_index_src_number(const nir_intrinsic_instr *instr)
|
|
{
|
|
switch (instr->intrinsic) {
|
|
case nir_intrinsic_load_ubo:
|
|
case nir_intrinsic_load_ssbo:
|
|
case nir_intrinsic_get_ubo_size:
|
|
case nir_intrinsic_get_ssbo_size:
|
|
case nir_intrinsic_load_input_vertex:
|
|
case nir_intrinsic_load_per_vertex_input:
|
|
case nir_intrinsic_load_per_vertex_output:
|
|
case nir_intrinsic_load_per_view_output:
|
|
case nir_intrinsic_load_per_primitive_output:
|
|
case nir_intrinsic_load_interpolated_input:
|
|
case nir_intrinsic_load_global_amd:
|
|
case nir_intrinsic_global_atomic_amd:
|
|
case nir_intrinsic_global_atomic_swap_amd:
|
|
case nir_intrinsic_ldc_nv:
|
|
case nir_intrinsic_ldcx_nv:
|
|
case nir_intrinsic_load_ssbo_intel:
|
|
case nir_intrinsic_load_ssbo_block_intel:
|
|
case nir_intrinsic_store_global_block_intel:
|
|
case nir_intrinsic_store_shared_block_intel:
|
|
case nir_intrinsic_load_ubo_uniform_block_intel:
|
|
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
|
#define IMG_CASE(name) case nir_intrinsic_image_##name: case nir_intrinsic_bindless_image_##name
|
|
IMG_CASE(load):
|
|
IMG_CASE(store):
|
|
IMG_CASE(sparse_load):
|
|
IMG_CASE(atomic):
|
|
IMG_CASE(atomic_swap):
|
|
IMG_CASE(size):
|
|
IMG_CASE(levels):
|
|
IMG_CASE(samples):
|
|
IMG_CASE(texel_address):
|
|
IMG_CASE(samples_identical):
|
|
IMG_CASE(descriptor_amd):
|
|
IMG_CASE(format):
|
|
IMG_CASE(order):
|
|
IMG_CASE(fragment_mask_load_amd):
|
|
return 0;
|
|
#undef IMG_CASE
|
|
case nir_intrinsic_store_ssbo:
|
|
case nir_intrinsic_store_per_vertex_output:
|
|
case nir_intrinsic_store_per_view_output:
|
|
case nir_intrinsic_store_per_primitive_output:
|
|
case nir_intrinsic_store_ssbo_block_intel:
|
|
case nir_intrinsic_store_ssbo_intel:
|
|
case nir_intrinsic_store_global_amd:
|
|
return 1;
|
|
default:
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Return the offset or handle source for a load/store intrinsic.
|
|
*/
|
|
nir_src *
|
|
nir_get_io_index_src(nir_intrinsic_instr *instr)
|
|
{
|
|
const int idx = nir_get_io_index_src_number(instr);
|
|
return idx >= 0 ? &instr->src[idx] : NULL;
|
|
}
|
|
|
|
/**
|
|
* Return the array index source number for an arrayed load/store intrinsic or -1 if there's no offset.
|
|
*/
|
|
int
|
|
nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr *instr)
|
|
{
|
|
switch (instr->intrinsic) {
|
|
case nir_intrinsic_load_per_vertex_input:
|
|
case nir_intrinsic_load_per_vertex_output:
|
|
case nir_intrinsic_load_per_view_output:
|
|
case nir_intrinsic_load_per_primitive_output:
|
|
return 0;
|
|
case nir_intrinsic_store_per_vertex_output:
|
|
case nir_intrinsic_store_per_view_output:
|
|
case nir_intrinsic_store_per_primitive_output:
|
|
return 1;
|
|
default:
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
bool
|
|
nir_is_output_load(nir_intrinsic_instr *intr)
|
|
{
|
|
return intr->intrinsic == nir_intrinsic_load_output ||
|
|
intr->intrinsic == nir_intrinsic_load_per_vertex_output ||
|
|
intr->intrinsic == nir_intrinsic_load_per_primitive_output ||
|
|
intr->intrinsic == nir_intrinsic_load_per_view_output;
|
|
}
|
|
|
|
/**
|
|
* Return the array index source for an arrayed load/store intrinsic.
|
|
*/
|
|
nir_src *
|
|
nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr)
|
|
{
|
|
const int idx = nir_get_io_arrayed_index_src_number(instr);
|
|
return idx >= 0 ? &instr->src[idx] : NULL;
|
|
}
|
|
|
|
static int
|
|
type_size_vec4(const struct glsl_type *type, bool bindless)
|
|
{
|
|
return glsl_count_attribute_slots(type, false);
|
|
}
|
|
|
|
/**
|
|
* This runs all compiler passes needed to lower IO, lower indirect IO access,
|
|
* set transform feedback info in IO intrinsics, and clean up the IR.
|
|
*
|
|
* \param renumber_vs_inputs
|
|
* Set to true if holes between VS inputs should be removed, which is safe
|
|
* to do in any shader linker that can handle that. Set to false if you want
|
|
* to keep holes between VS inputs, which is recommended to do in gallium
|
|
* drivers so as not to break the mapping of vertex elements to VS inputs
|
|
* expected by gallium frontends.
|
|
*/
|
|
void
|
|
nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs)
|
|
{
|
|
if (mesa_shader_stage_is_compute(nir->info.stage) ||
|
|
nir->info.stage == MESA_SHADER_TASK)
|
|
return;
|
|
|
|
bool lower_indirect_inputs =
|
|
nir->info.stage != MESA_SHADER_MESH &&
|
|
!(nir->options->support_indirect_inputs & BITFIELD_BIT(nir->info.stage));
|
|
|
|
/* Transform feedback requires that indirect outputs are lowered. */
|
|
bool lower_indirect_outputs =
|
|
!(nir->options->support_indirect_outputs & BITFIELD_BIT(nir->info.stage)) ||
|
|
nir->xfb_info;
|
|
|
|
/* TODO: This is a hack until a better solution is available.
|
|
* For all shaders except TCS, lower all outputs to temps because:
|
|
* - there can be output loads (nobody expects those outside of TCS)
|
|
* - drivers don't expect when an output is only written in control flow
|
|
*
|
|
* "lower_indirect_outputs = true" causes all outputs to be lowered to temps,
|
|
* which lowers indirect stores, eliminates output loads, and moves all
|
|
* output stores to the end or GS emits.
|
|
*/
|
|
if (nir->info.stage != MESA_SHADER_TESS_CTRL)
|
|
lower_indirect_outputs = true;
|
|
|
|
/* TODO: Sorting variables by location is required due to some bug
|
|
* in nir_lower_io_vars_to_temporaries. If variables are not sorted,
|
|
* dEQP-GLES31.functional.separate_shader.random.0 fails.
|
|
*
|
|
* This isn't needed if nir_assign_io_var_locations is called because it
|
|
* also sorts variables. However, if IO is lowered sooner than that, we
|
|
* must sort explicitly here to get what nir_assign_io_var_locations does.
|
|
*/
|
|
unsigned varying_var_mask =
|
|
(nir->info.stage != MESA_SHADER_VERTEX &&
|
|
nir->info.stage != MESA_SHADER_MESH ? nir_var_shader_in : 0) |
|
|
(nir->info.stage != MESA_SHADER_FRAGMENT ? nir_var_shader_out : 0);
|
|
nir_sort_variables_by_location(nir, varying_var_mask);
|
|
|
|
if (lower_indirect_outputs) {
|
|
NIR_PASS(_, nir, nir_lower_io_vars_to_temporaries,
|
|
nir_shader_get_entrypoint(nir), true, false);
|
|
|
|
/* We need to lower all the copy_deref's introduced by lower_io_to-
|
|
* _temporaries before calling nir_lower_io.
|
|
*/
|
|
NIR_PASS(_, nir, nir_split_var_copies);
|
|
NIR_PASS(_, nir, nir_lower_var_copies);
|
|
NIR_PASS(_, nir, nir_lower_global_vars_to_local);
|
|
|
|
/* This is partially redundant with nir_lower_io_vars_to_temporaries.
|
|
* The problem is that nir_lower_io_vars_to_temporaries doesn't handle TCS.
|
|
*/
|
|
if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
|
|
NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_shader_out,
|
|
UINT32_MAX);
|
|
}
|
|
}
|
|
|
|
/* The correct lower_64bit_to_32 flag is required by st/mesa depending
|
|
* on whether the GLSL linker lowers IO or not. Setting the wrong flag
|
|
* would break 64-bit vertex attribs for GLSL.
|
|
*/
|
|
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in,
|
|
type_size_vec4,
|
|
(renumber_vs_inputs ? nir_lower_io_lower_64bit_to_32_new : nir_lower_io_lower_64bit_to_32) |
|
|
nir_lower_io_use_interpolated_input_intrinsics);
|
|
|
|
/* nir_io_add_const_offset_to_base needs actual constants. */
|
|
NIR_PASS(_, nir, nir_opt_constant_folding);
|
|
NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
|
|
|
|
/* This must be called after nir_io_add_const_offset_to_base. */
|
|
if (lower_indirect_inputs)
|
|
NIR_PASS(_, nir, nir_lower_io_indirect_loads, nir_var_shader_in);
|
|
|
|
/* Lower and remove dead derefs and variables to clean up the IR. */
|
|
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
|
|
NIR_PASS(_, nir, nir_opt_dce);
|
|
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
|
|
|
/* If IO is lowered before var->data.driver_location is assigned, driver
|
|
* locations are all 0, which means IO bases are all 0. It's not necessary
|
|
* to set driver_location before lowering IO because the only thing that
|
|
* identifies outputs is their semantic, and IO bases can always be
|
|
* computed from the semantics.
|
|
*
|
|
* This assigns IO bases from scratch, using IO semantics to tell which
|
|
* intrinsics refer to the same IO. If the bases already exist, they
|
|
* will be reassigned, sorted by the semantic, and all holes removed.
|
|
* This kind of canonicalizes all bases.
|
|
*
|
|
* This must be done after DCE to remove dead load_input intrinsics.
|
|
*/
|
|
bool recompute_inputs =
|
|
(nir->info.stage != MESA_SHADER_VERTEX || renumber_vs_inputs) &&
|
|
nir->info.stage != MESA_SHADER_MESH;
|
|
NIR_PASS(_, nir, nir_recompute_io_bases,
|
|
(recompute_inputs ? nir_var_shader_in : 0) | nir_var_shader_out);
|
|
|
|
if (nir->xfb_info)
|
|
NIR_PASS(_, nir, nir_io_add_intrinsic_xfb_info);
|
|
|
|
if (nir->options->lower_mediump_io)
|
|
nir->options->lower_mediump_io(nir);
|
|
|
|
nir->info.io_lowered = true;
|
|
}
|