2014-12-01 22:01:05 -08:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2014 Intel Corporation
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*
|
|
|
|
|
* Authors:
|
|
|
|
|
* Connor Abbott (cwabbott0@gmail.com)
|
|
|
|
|
* Jason Ekstrand (jason@jlekstrand.net)
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This lowering pass converts references to input/output variables with
|
|
|
|
|
* loads/stores to actual input/output intrinsics.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "nir.h"
|
2015-08-12 11:26:34 -07:00
|
|
|
#include "nir_builder.h"
|
2018-03-26 18:01:12 -07:00
|
|
|
#include "nir_deref.h"
|
2014-12-01 22:01:05 -08:00
|
|
|
|
2019-04-09 20:18:11 +01:00
|
|
|
#include "util/u_math.h"
|
|
|
|
|
|
2014-12-01 22:01:05 -08:00
|
|
|
struct lower_io_state {
|
2018-03-17 19:27:36 -07:00
|
|
|
void *dead_ctx;
|
2015-08-12 11:26:34 -07:00
|
|
|
nir_builder builder;
|
2019-03-29 12:39:48 +11:00
|
|
|
int (*type_size)(const struct glsl_type *type, bool);
|
2016-04-11 13:43:27 -07:00
|
|
|
nir_variable_mode modes;
|
2016-09-14 10:29:38 -07:00
|
|
|
nir_lower_io_options options;
|
2014-12-01 22:01:05 -08:00
|
|
|
};
|
|
|
|
|
|
2019-01-07 17:17:46 -06:00
|
|
|
static nir_intrinsic_op
|
|
|
|
|
ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
|
|
|
|
|
{
|
|
|
|
|
switch (deref_op) {
|
|
|
|
|
#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O;
|
|
|
|
|
OP(atomic_exchange)
|
|
|
|
|
OP(atomic_comp_swap)
|
|
|
|
|
OP(atomic_add)
|
|
|
|
|
OP(atomic_imin)
|
|
|
|
|
OP(atomic_umin)
|
|
|
|
|
OP(atomic_imax)
|
|
|
|
|
OP(atomic_umax)
|
|
|
|
|
OP(atomic_and)
|
|
|
|
|
OP(atomic_or)
|
|
|
|
|
OP(atomic_xor)
|
|
|
|
|
OP(atomic_fadd)
|
|
|
|
|
OP(atomic_fmin)
|
|
|
|
|
OP(atomic_fmax)
|
|
|
|
|
OP(atomic_fcomp_swap)
|
|
|
|
|
#undef OP
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Invalid SSBO atomic");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_intrinsic_op
|
|
|
|
|
global_atomic_for_deref(nir_intrinsic_op deref_op)
|
|
|
|
|
{
|
|
|
|
|
switch (deref_op) {
|
|
|
|
|
#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O;
|
|
|
|
|
OP(atomic_exchange)
|
|
|
|
|
OP(atomic_comp_swap)
|
|
|
|
|
OP(atomic_add)
|
|
|
|
|
OP(atomic_imin)
|
|
|
|
|
OP(atomic_umin)
|
|
|
|
|
OP(atomic_imax)
|
|
|
|
|
OP(atomic_umax)
|
|
|
|
|
OP(atomic_and)
|
|
|
|
|
OP(atomic_or)
|
|
|
|
|
OP(atomic_xor)
|
|
|
|
|
OP(atomic_fadd)
|
|
|
|
|
OP(atomic_fmin)
|
|
|
|
|
OP(atomic_fmax)
|
|
|
|
|
OP(atomic_fcomp_swap)
|
|
|
|
|
#undef OP
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Invalid SSBO atomic");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-09 17:28:13 +01:00
|
|
|
static nir_intrinsic_op
|
|
|
|
|
shared_atomic_for_deref(nir_intrinsic_op deref_op)
|
|
|
|
|
{
|
|
|
|
|
switch (deref_op) {
|
|
|
|
|
#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O;
|
|
|
|
|
OP(atomic_exchange)
|
|
|
|
|
OP(atomic_comp_swap)
|
|
|
|
|
OP(atomic_add)
|
|
|
|
|
OP(atomic_imin)
|
|
|
|
|
OP(atomic_umin)
|
|
|
|
|
OP(atomic_imax)
|
|
|
|
|
OP(atomic_umax)
|
|
|
|
|
OP(atomic_and)
|
|
|
|
|
OP(atomic_or)
|
|
|
|
|
OP(atomic_xor)
|
|
|
|
|
OP(atomic_fadd)
|
|
|
|
|
OP(atomic_fmin)
|
|
|
|
|
OP(atomic_fmax)
|
|
|
|
|
OP(atomic_fcomp_swap)
|
|
|
|
|
#undef OP
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Invalid shared atomic");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-18 15:04:15 -07:00
|
|
|
void
|
2020-07-20 14:51:04 -05:00
|
|
|
nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
|
|
|
|
|
unsigned *size,
|
2019-03-29 12:39:48 +11:00
|
|
|
int (*type_size)(const struct glsl_type *, bool))
|
2014-12-01 22:01:05 -08:00
|
|
|
{
|
|
|
|
|
unsigned location = 0;
|
|
|
|
|
|
2020-07-20 14:51:04 -05:00
|
|
|
nir_foreach_variable_with_modes(var, shader, mode) {
|
2016-10-25 10:23:25 +11:00
|
|
|
var->data.driver_location = location;
|
2019-03-29 12:39:48 +11:00
|
|
|
bool bindless_type_size = var->data.mode == nir_var_shader_in ||
|
|
|
|
|
var->data.mode == nir_var_shader_out ||
|
|
|
|
|
var->data.bindless;
|
|
|
|
|
location += type_size(var->type, bindless_type_size);
|
2014-12-01 22:01:05 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*size = location;
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-01 00:46:19 -07:00
|
|
|
/**
|
2016-09-25 22:19:07 -07:00
|
|
|
* Return true if the given variable is a per-vertex input/output array.
|
|
|
|
|
* (such as geometry shader inputs).
|
2015-10-01 00:46:19 -07:00
|
|
|
*/
|
2016-09-25 22:19:07 -07:00
|
|
|
bool
|
2017-06-07 02:19:15 +03:00
|
|
|
nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage)
|
2015-10-01 00:46:19 -07:00
|
|
|
{
|
2016-09-25 22:19:07 -07:00
|
|
|
if (var->data.patch || !glsl_type_is_array(var->type))
|
|
|
|
|
return false;
|
2015-09-30 17:17:35 -07:00
|
|
|
|
2016-09-25 22:19:07 -07:00
|
|
|
if (var->data.mode == nir_var_shader_in)
|
|
|
|
|
return stage == MESA_SHADER_GEOMETRY ||
|
|
|
|
|
stage == MESA_SHADER_TESS_CTRL ||
|
|
|
|
|
stage == MESA_SHADER_TESS_EVAL;
|
2015-10-01 00:46:19 -07:00
|
|
|
|
2016-09-25 22:19:07 -07:00
|
|
|
if (var->data.mode == nir_var_shader_out)
|
|
|
|
|
return stage == MESA_SHADER_TESS_CTRL;
|
|
|
|
|
|
|
|
|
|
return false;
|
2015-10-02 00:11:01 -07:00
|
|
|
}
|
|
|
|
|
|
2020-08-11 23:48:12 -04:00
|
|
|
static unsigned get_number_of_slots(struct lower_io_state *state,
|
|
|
|
|
const nir_variable *var)
|
|
|
|
|
{
|
|
|
|
|
const struct glsl_type *type = var->type;
|
|
|
|
|
|
|
|
|
|
if (nir_is_per_vertex_io(var, state->builder.shader->info.stage)) {
|
|
|
|
|
assert(glsl_type_is_array(type));
|
|
|
|
|
type = glsl_get_array_element(type);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return state->type_size(type, var->data.bindless);
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-25 14:14:05 -08:00
|
|
|
static nir_ssa_def *
|
2018-03-26 18:01:12 -07:00
|
|
|
get_io_offset(nir_builder *b, nir_deref_instr *deref,
|
2015-10-01 00:46:19 -07:00
|
|
|
nir_ssa_def **vertex_index,
|
2019-03-29 12:39:48 +11:00
|
|
|
int (*type_size)(const struct glsl_type *, bool),
|
|
|
|
|
unsigned *component, bool bts)
|
2014-12-01 22:01:05 -08:00
|
|
|
{
|
2018-03-26 18:01:12 -07:00
|
|
|
nir_deref_path path;
|
|
|
|
|
nir_deref_path_init(&path, deref, NULL);
|
|
|
|
|
|
|
|
|
|
assert(path.path[0]->deref_type == nir_deref_type_var);
|
|
|
|
|
nir_deref_instr **p = &path.path[1];
|
2015-10-01 00:46:19 -07:00
|
|
|
|
|
|
|
|
/* For per-vertex input arrays (i.e. geometry shader inputs), keep the
|
|
|
|
|
* outermost array index separate. Process the rest normally.
|
|
|
|
|
*/
|
|
|
|
|
if (vertex_index != NULL) {
|
2018-03-26 18:01:12 -07:00
|
|
|
assert((*p)->deref_type == nir_deref_type_array);
|
|
|
|
|
*vertex_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
|
|
|
|
|
p++;
|
2015-10-01 00:46:19 -07:00
|
|
|
}
|
|
|
|
|
|
2018-03-26 18:01:12 -07:00
|
|
|
if (path.path[0]->var->data.compact) {
|
|
|
|
|
assert((*p)->deref_type == nir_deref_type_array);
|
|
|
|
|
assert(glsl_type_is_scalar((*p)->type));
|
|
|
|
|
|
2016-10-03 20:32:22 -07:00
|
|
|
/* We always lower indirect dereferences for "compact" array vars. */
|
2018-10-20 09:10:02 -05:00
|
|
|
const unsigned index = nir_src_as_uint((*p)->arr.index);
|
|
|
|
|
const unsigned total_offset = *component + index;
|
2016-10-03 20:32:22 -07:00
|
|
|
const unsigned slot_offset = total_offset / 4;
|
|
|
|
|
*component = total_offset % 4;
|
2019-03-29 12:39:48 +11:00
|
|
|
return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
|
2016-10-03 20:32:22 -07:00
|
|
|
}
|
|
|
|
|
|
2015-11-25 14:14:05 -08:00
|
|
|
/* Just emit code and let constant-folding go to town */
|
|
|
|
|
nir_ssa_def *offset = nir_imm_int(b, 0);
|
2015-11-25 12:33:38 -08:00
|
|
|
|
2018-03-26 18:01:12 -07:00
|
|
|
for (; *p; p++) {
|
|
|
|
|
if ((*p)->deref_type == nir_deref_type_array) {
|
2019-03-29 12:39:48 +11:00
|
|
|
unsigned size = type_size((*p)->type, bts);
|
2014-12-01 22:01:05 -08:00
|
|
|
|
2018-03-26 18:01:12 -07:00
|
|
|
nir_ssa_def *mul =
|
2019-09-26 10:32:00 -07:00
|
|
|
nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
|
2014-12-01 22:01:05 -08:00
|
|
|
|
2018-03-26 18:01:12 -07:00
|
|
|
offset = nir_iadd(b, offset, mul);
|
|
|
|
|
} else if ((*p)->deref_type == nir_deref_type_struct) {
|
|
|
|
|
/* p starts at path[1], so this is safe */
|
|
|
|
|
nir_deref_instr *parent = *(p - 1);
|
2014-12-01 22:01:05 -08:00
|
|
|
|
2015-11-25 14:14:05 -08:00
|
|
|
unsigned field_offset = 0;
|
2018-03-26 18:01:12 -07:00
|
|
|
for (unsigned i = 0; i < (*p)->strct.index; i++) {
|
2019-03-29 12:39:48 +11:00
|
|
|
field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
|
2015-08-12 14:29:25 -07:00
|
|
|
}
|
2018-11-12 15:58:18 -06:00
|
|
|
offset = nir_iadd_imm(b, offset, field_offset);
|
2018-03-26 18:01:12 -07:00
|
|
|
} else {
|
|
|
|
|
unreachable("Unsupported deref type");
|
2014-12-01 22:01:05 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-26 18:01:12 -07:00
|
|
|
nir_deref_path_finish(&path);
|
|
|
|
|
|
2015-11-25 14:14:05 -08:00
|
|
|
return offset;
|
2014-12-01 22:01:05 -08:00
|
|
|
}
|
|
|
|
|
|
2019-07-19 15:30:27 -05:00
|
|
|
static nir_ssa_def *
|
2019-07-19 17:10:07 -05:00
|
|
|
emit_load(struct lower_io_state *state,
|
|
|
|
|
nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
|
|
|
|
|
unsigned component, unsigned num_components, unsigned bit_size,
|
2020-09-30 21:20:53 -05:00
|
|
|
nir_alu_type dest_type)
|
2015-08-12 10:57:31 -07:00
|
|
|
{
|
2019-07-19 15:30:27 -05:00
|
|
|
nir_builder *b = &state->builder;
|
|
|
|
|
const nir_shader *nir = b->shader;
|
2016-07-12 02:07:29 -07:00
|
|
|
nir_variable_mode mode = var->data.mode;
|
2016-07-12 01:46:53 -07:00
|
|
|
nir_ssa_def *barycentric = NULL;
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2015-08-12 10:57:31 -07:00
|
|
|
nir_intrinsic_op op;
|
|
|
|
|
switch (mode) {
|
|
|
|
|
case nir_var_shader_in:
|
2017-09-14 19:52:38 -07:00
|
|
|
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
|
2016-07-12 01:46:53 -07:00
|
|
|
nir->options->use_interpolated_input_intrinsics &&
|
|
|
|
|
var->data.interpolation != INTERP_MODE_FLAT) {
|
2020-01-27 11:34:00 +01:00
|
|
|
if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
|
|
|
|
|
assert(vertex_index != NULL);
|
|
|
|
|
op = nir_intrinsic_load_input_vertex;
|
|
|
|
|
} else {
|
|
|
|
|
assert(vertex_index == NULL);
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_op bary_op;
|
|
|
|
|
if (var->data.sample ||
|
|
|
|
|
(state->options & nir_lower_io_force_sample_interpolation))
|
|
|
|
|
bary_op = nir_intrinsic_load_barycentric_sample;
|
|
|
|
|
else if (var->data.centroid)
|
|
|
|
|
bary_op = nir_intrinsic_load_barycentric_centroid;
|
|
|
|
|
else
|
|
|
|
|
bary_op = nir_intrinsic_load_barycentric_pixel;
|
|
|
|
|
|
|
|
|
|
barycentric = nir_load_barycentric(&state->builder, bary_op,
|
|
|
|
|
var->data.interpolation);
|
|
|
|
|
op = nir_intrinsic_load_interpolated_input;
|
|
|
|
|
}
|
2016-07-12 01:46:53 -07:00
|
|
|
} else {
|
|
|
|
|
op = vertex_index ? nir_intrinsic_load_per_vertex_input :
|
|
|
|
|
nir_intrinsic_load_input;
|
|
|
|
|
}
|
2015-08-12 10:57:31 -07:00
|
|
|
break;
|
2015-10-19 11:44:28 -07:00
|
|
|
case nir_var_shader_out:
|
2016-07-12 02:07:29 -07:00
|
|
|
op = vertex_index ? nir_intrinsic_load_per_vertex_output :
|
|
|
|
|
nir_intrinsic_load_output;
|
2015-10-19 11:44:28 -07:00
|
|
|
break;
|
2015-08-12 10:57:31 -07:00
|
|
|
case nir_var_uniform:
|
2015-11-25 14:14:05 -08:00
|
|
|
op = nir_intrinsic_load_uniform;
|
2015-08-12 10:57:31 -07:00
|
|
|
break;
|
2016-01-18 09:44:31 -08:00
|
|
|
default:
|
|
|
|
|
unreachable("Unknown variable mode");
|
|
|
|
|
}
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2016-12-24 10:24:29 -08:00
|
|
|
nir_intrinsic_instr *load =
|
|
|
|
|
nir_intrinsic_instr_create(state->builder.shader, op);
|
2019-07-19 17:10:07 -05:00
|
|
|
load->num_components = num_components;
|
2016-07-12 02:07:29 -07:00
|
|
|
|
|
|
|
|
nir_intrinsic_set_base(load, var->data.driver_location);
|
|
|
|
|
if (mode == nir_var_shader_in || mode == nir_var_shader_out)
|
2016-10-03 20:32:22 -07:00
|
|
|
nir_intrinsic_set_component(load, component);
|
2016-07-12 02:07:29 -07:00
|
|
|
|
|
|
|
|
if (load->intrinsic == nir_intrinsic_load_uniform)
|
2019-03-29 12:39:48 +11:00
|
|
|
nir_intrinsic_set_range(load,
|
|
|
|
|
state->type_size(var->type, var->data.bindless));
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2019-05-31 13:44:40 -04:00
|
|
|
if (load->intrinsic == nir_intrinsic_load_input ||
|
2020-01-27 11:34:00 +01:00
|
|
|
load->intrinsic == nir_intrinsic_load_input_vertex ||
|
2019-05-31 13:44:40 -04:00
|
|
|
load->intrinsic == nir_intrinsic_load_uniform)
|
2020-09-30 21:20:53 -05:00
|
|
|
nir_intrinsic_set_dest_type(load, dest_type);
|
2019-05-31 13:44:40 -04:00
|
|
|
|
2020-08-11 23:48:12 -04:00
|
|
|
if (load->intrinsic != nir_intrinsic_load_uniform) {
|
|
|
|
|
nir_io_semantics semantics = {0};
|
|
|
|
|
semantics.location = var->data.location;
|
|
|
|
|
semantics.num_slots = get_number_of_slots(state, var);
|
|
|
|
|
semantics.fb_fetch_output = var->data.fb_fetch_output;
|
2020-09-06 00:24:31 -04:00
|
|
|
semantics.medium_precision =
|
|
|
|
|
var->data.precision == GLSL_PRECISION_MEDIUM ||
|
|
|
|
|
var->data.precision == GLSL_PRECISION_LOW;
|
2020-08-11 23:48:12 -04:00
|
|
|
nir_intrinsic_set_io_semantics(load, semantics);
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-12 01:46:53 -07:00
|
|
|
if (vertex_index) {
|
2016-07-12 02:07:29 -07:00
|
|
|
load->src[0] = nir_src_for_ssa(vertex_index);
|
2016-07-12 01:46:53 -07:00
|
|
|
load->src[1] = nir_src_for_ssa(offset);
|
|
|
|
|
} else if (barycentric) {
|
|
|
|
|
load->src[0] = nir_src_for_ssa(barycentric);
|
|
|
|
|
load->src[1] = nir_src_for_ssa(offset);
|
|
|
|
|
} else {
|
|
|
|
|
load->src[0] = nir_src_for_ssa(offset);
|
|
|
|
|
}
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2019-07-19 15:30:27 -05:00
|
|
|
nir_ssa_dest_init(&load->instr, &load->dest,
|
2019-07-19 17:10:07 -05:00
|
|
|
num_components, bit_size, NULL);
|
2019-07-19 15:30:27 -05:00
|
|
|
nir_builder_instr_insert(b, &load->instr);
|
|
|
|
|
|
|
|
|
|
return &load->dest.ssa;
|
2016-01-18 09:44:31 -08:00
|
|
|
}
|
|
|
|
|
|
2019-07-19 17:10:07 -05:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
|
|
|
|
nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
|
|
|
|
|
unsigned component, const struct glsl_type *type)
|
|
|
|
|
{
|
|
|
|
|
assert(intrin->dest.is_ssa);
|
|
|
|
|
if (intrin->dest.ssa.bit_size == 64 &&
|
|
|
|
|
(state->options & nir_lower_io_lower_64bit_to_32)) {
|
|
|
|
|
nir_builder *b = &state->builder;
|
|
|
|
|
|
|
|
|
|
const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *comp64[4];
|
|
|
|
|
assert(component == 0 || component == 2);
|
|
|
|
|
unsigned dest_comp = 0;
|
|
|
|
|
while (dest_comp < intrin->dest.ssa.num_components) {
|
|
|
|
|
const unsigned num_comps =
|
|
|
|
|
MIN2(intrin->dest.ssa.num_components - dest_comp,
|
|
|
|
|
(4 - component) / 2);
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *data32 =
|
|
|
|
|
emit_load(state, vertex_index, var, offset, component,
|
|
|
|
|
num_comps * 2, 32, nir_type_uint32);
|
|
|
|
|
for (unsigned i = 0; i < num_comps; i++) {
|
|
|
|
|
comp64[dest_comp + i] =
|
|
|
|
|
nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Only the first store has a component offset */
|
|
|
|
|
component = 0;
|
|
|
|
|
dest_comp += num_comps;
|
|
|
|
|
offset = nir_iadd_imm(b, offset, slot_size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nir_vec(b, comp64, intrin->dest.ssa.num_components);
|
2020-03-27 00:30:25 -05:00
|
|
|
} else if (intrin->dest.ssa.bit_size == 1) {
|
|
|
|
|
/* Booleans are 32-bit */
|
|
|
|
|
assert(glsl_type_is_boolean(type));
|
|
|
|
|
return nir_b2b1(&state->builder,
|
|
|
|
|
emit_load(state, vertex_index, var, offset, component,
|
|
|
|
|
intrin->dest.ssa.num_components, 32,
|
|
|
|
|
nir_type_bool32));
|
2019-07-19 17:10:07 -05:00
|
|
|
} else {
|
|
|
|
|
return emit_load(state, vertex_index, var, offset, component,
|
|
|
|
|
intrin->dest.ssa.num_components,
|
|
|
|
|
intrin->dest.ssa.bit_size,
|
|
|
|
|
nir_get_nir_type_for_glsl_type(type));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-19 15:30:27 -05:00
|
|
|
static void
|
2019-07-19 17:10:07 -05:00
|
|
|
emit_store(struct lower_io_state *state, nir_ssa_def *data,
|
|
|
|
|
nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
|
|
|
|
|
unsigned component, unsigned num_components,
|
2020-09-30 21:20:53 -05:00
|
|
|
nir_component_mask_t write_mask, nir_alu_type src_type)
|
2016-01-18 09:44:31 -08:00
|
|
|
{
|
2019-07-19 15:30:27 -05:00
|
|
|
nir_builder *b = &state->builder;
|
2016-07-12 02:07:29 -07:00
|
|
|
nir_variable_mode mode = var->data.mode;
|
|
|
|
|
|
2020-06-10 12:51:01 -05:00
|
|
|
assert(mode == nir_var_shader_out);
|
2016-01-18 09:44:31 -08:00
|
|
|
nir_intrinsic_op op;
|
2020-06-10 12:51:01 -05:00
|
|
|
op = vertex_index ? nir_intrinsic_store_per_vertex_output :
|
|
|
|
|
nir_intrinsic_store_output;
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2016-12-24 10:24:29 -08:00
|
|
|
nir_intrinsic_instr *store =
|
|
|
|
|
nir_intrinsic_instr_create(state->builder.shader, op);
|
2019-07-19 17:10:07 -05:00
|
|
|
store->num_components = num_components;
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2019-07-19 17:10:07 -05:00
|
|
|
store->src[0] = nir_src_for_ssa(data);
|
2016-07-12 02:07:29 -07:00
|
|
|
|
|
|
|
|
nir_intrinsic_set_base(store, var->data.driver_location);
|
|
|
|
|
|
|
|
|
|
if (mode == nir_var_shader_out)
|
2016-10-03 20:32:22 -07:00
|
|
|
nir_intrinsic_set_component(store, component);
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2019-05-31 13:44:40 -04:00
|
|
|
if (store->intrinsic == nir_intrinsic_store_output)
|
2020-09-30 21:20:53 -05:00
|
|
|
nir_intrinsic_set_src_type(store, src_type);
|
2019-05-31 13:44:40 -04:00
|
|
|
|
2019-07-19 17:10:07 -05:00
|
|
|
nir_intrinsic_set_write_mask(store, write_mask);
|
2016-07-12 02:07:29 -07:00
|
|
|
|
|
|
|
|
if (vertex_index)
|
|
|
|
|
store->src[1] = nir_src_for_ssa(vertex_index);
|
|
|
|
|
|
|
|
|
|
store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset);
|
|
|
|
|
|
2020-08-11 23:48:12 -04:00
|
|
|
unsigned gs_streams = 0;
|
|
|
|
|
if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) {
|
|
|
|
|
if (var->data.stream & NIR_STREAM_PACKED) {
|
|
|
|
|
gs_streams = var->data.stream & ~NIR_STREAM_PACKED;
|
|
|
|
|
} else {
|
|
|
|
|
assert(var->data.stream < 4);
|
|
|
|
|
gs_streams = 0;
|
|
|
|
|
for (unsigned i = 0; i < num_components; ++i)
|
|
|
|
|
gs_streams |= var->data.stream << (2 * i);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_io_semantics semantics = {0};
|
|
|
|
|
semantics.location = var->data.location;
|
|
|
|
|
semantics.num_slots = get_number_of_slots(state, var);
|
|
|
|
|
semantics.dual_source_blend_index = var->data.index;
|
|
|
|
|
semantics.gs_streams = gs_streams;
|
2020-09-06 00:24:31 -04:00
|
|
|
semantics.medium_precision =
|
|
|
|
|
var->data.precision == GLSL_PRECISION_MEDIUM ||
|
|
|
|
|
var->data.precision == GLSL_PRECISION_LOW;
|
2020-09-02 11:20:06 +02:00
|
|
|
semantics.per_view = var->data.per_view;
|
2020-08-11 23:48:12 -04:00
|
|
|
nir_intrinsic_set_io_semantics(store, semantics);
|
|
|
|
|
|
2019-07-19 15:30:27 -05:00
|
|
|
nir_builder_instr_insert(b, &store->instr);
|
2015-08-12 10:57:31 -07:00
|
|
|
}
|
|
|
|
|
|
2019-07-19 17:10:07 -05:00
|
|
|
static void
|
|
|
|
|
lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
|
|
|
|
nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
|
|
|
|
|
unsigned component, const struct glsl_type *type)
|
|
|
|
|
{
|
|
|
|
|
assert(intrin->src[1].is_ssa);
|
|
|
|
|
if (intrin->src[1].ssa->bit_size == 64 &&
|
|
|
|
|
(state->options & nir_lower_io_lower_64bit_to_32)) {
|
|
|
|
|
nir_builder *b = &state->builder;
|
|
|
|
|
|
|
|
|
|
const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
|
|
|
|
|
|
|
|
|
|
assert(component == 0 || component == 2);
|
|
|
|
|
unsigned src_comp = 0;
|
|
|
|
|
nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
|
|
|
|
|
while (src_comp < intrin->num_components) {
|
|
|
|
|
const unsigned num_comps =
|
|
|
|
|
MIN2(intrin->num_components - src_comp,
|
|
|
|
|
(4 - component) / 2);
|
|
|
|
|
|
|
|
|
|
if (write_mask & BITFIELD_MASK(num_comps)) {
|
|
|
|
|
nir_ssa_def *data =
|
|
|
|
|
nir_channels(b, intrin->src[1].ssa,
|
|
|
|
|
BITFIELD_RANGE(src_comp, num_comps));
|
|
|
|
|
nir_ssa_def *data32 = nir_bitcast_vector(b, data, 32);
|
|
|
|
|
|
|
|
|
|
nir_component_mask_t write_mask32 = 0;
|
|
|
|
|
for (unsigned i = 0; i < num_comps; i++) {
|
|
|
|
|
if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
|
|
|
|
|
write_mask32 |= 3 << (i * 2);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
emit_store(state, data32, vertex_index, var, offset,
|
|
|
|
|
component, data32->num_components, write_mask32,
|
|
|
|
|
nir_type_uint32);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Only the first store has a component offset */
|
|
|
|
|
component = 0;
|
|
|
|
|
src_comp += num_comps;
|
|
|
|
|
write_mask >>= num_comps;
|
|
|
|
|
offset = nir_iadd_imm(b, offset, slot_size);
|
|
|
|
|
}
|
2020-03-27 00:30:25 -05:00
|
|
|
} else if (intrin->dest.ssa.bit_size == 1) {
|
|
|
|
|
/* Booleans are 32-bit */
|
|
|
|
|
assert(glsl_type_is_boolean(type));
|
|
|
|
|
nir_ssa_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
|
|
|
|
|
emit_store(state, b32_val, vertex_index, var, offset,
|
|
|
|
|
component, intrin->num_components,
|
|
|
|
|
nir_intrinsic_write_mask(intrin),
|
|
|
|
|
nir_type_bool32);
|
2019-07-19 17:10:07 -05:00
|
|
|
} else {
|
|
|
|
|
emit_store(state, intrin->src[1].ssa, vertex_index, var, offset,
|
|
|
|
|
component, intrin->num_components,
|
|
|
|
|
nir_intrinsic_write_mask(intrin),
|
|
|
|
|
nir_get_nir_type_for_glsl_type(type));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-19 15:30:27 -05:00
|
|
|
static nir_ssa_def *
|
2016-07-12 01:46:53 -07:00
|
|
|
lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
2019-05-31 13:44:40 -04:00
|
|
|
nir_variable *var, nir_ssa_def *offset, unsigned component,
|
|
|
|
|
const struct glsl_type *type)
|
2016-07-12 01:46:53 -07:00
|
|
|
{
|
2019-07-19 15:30:27 -05:00
|
|
|
nir_builder *b = &state->builder;
|
2016-07-12 01:46:53 -07:00
|
|
|
assert(var->data.mode == nir_var_shader_in);
|
|
|
|
|
|
2020-01-27 11:34:00 +01:00
|
|
|
/* Ignore interpolateAt() for flat variables - flat is flat. Lower
|
|
|
|
|
* interpolateAtVertex() for explicit variables.
|
|
|
|
|
*/
|
|
|
|
|
if (var->data.interpolation == INTERP_MODE_FLAT ||
|
|
|
|
|
var->data.interpolation == INTERP_MODE_EXPLICIT) {
|
|
|
|
|
nir_ssa_def *vertex_index = NULL;
|
|
|
|
|
|
|
|
|
|
if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
|
|
|
|
|
assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
|
|
|
|
|
vertex_index = intrin->src[1].ssa;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return lower_load(intrin, state, vertex_index, var, offset, component, type);
|
|
|
|
|
}
|
2016-07-21 17:42:01 -07:00
|
|
|
|
2019-07-19 17:10:07 -05:00
|
|
|
/* None of the supported APIs allow interpolation on 64-bit things */
|
|
|
|
|
assert(intrin->dest.is_ssa && intrin->dest.ssa.bit_size <= 32);
|
|
|
|
|
|
2016-07-12 01:46:53 -07:00
|
|
|
nir_intrinsic_op bary_op;
|
|
|
|
|
switch (intrin->intrinsic) {
|
2018-03-17 19:27:36 -07:00
|
|
|
case nir_intrinsic_interp_deref_at_centroid:
|
2016-09-14 10:29:38 -07:00
|
|
|
bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
|
|
|
|
|
nir_intrinsic_load_barycentric_sample :
|
|
|
|
|
nir_intrinsic_load_barycentric_centroid;
|
2016-07-12 01:46:53 -07:00
|
|
|
break;
|
2018-03-17 19:27:36 -07:00
|
|
|
case nir_intrinsic_interp_deref_at_sample:
|
2016-07-12 01:46:53 -07:00
|
|
|
bary_op = nir_intrinsic_load_barycentric_at_sample;
|
|
|
|
|
break;
|
2018-03-17 19:27:36 -07:00
|
|
|
case nir_intrinsic_interp_deref_at_offset:
|
2016-07-12 01:46:53 -07:00
|
|
|
bary_op = nir_intrinsic_load_barycentric_at_offset;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Bogus interpolateAt() intrinsic.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *bary_setup =
|
2016-12-24 10:24:29 -08:00
|
|
|
nir_intrinsic_instr_create(state->builder.shader, bary_op);
|
2016-07-12 01:46:53 -07:00
|
|
|
|
|
|
|
|
nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
|
|
|
|
|
nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
|
|
|
|
|
|
2018-03-17 19:27:36 -07:00
|
|
|
if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
|
2020-01-24 16:01:04 +01:00
|
|
|
intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
|
2018-03-17 19:27:36 -07:00
|
|
|
nir_src_copy(&bary_setup->src[0], &intrin->src[1], bary_setup);
|
|
|
|
|
|
2019-07-19 15:30:27 -05:00
|
|
|
nir_builder_instr_insert(b, &bary_setup->instr);
|
2016-07-12 01:46:53 -07:00
|
|
|
|
2020-08-11 23:48:12 -04:00
|
|
|
nir_io_semantics semantics = {0};
|
|
|
|
|
semantics.location = var->data.location;
|
|
|
|
|
semantics.num_slots = get_number_of_slots(state, var);
|
2020-09-06 00:24:31 -04:00
|
|
|
semantics.medium_precision =
|
|
|
|
|
var->data.precision == GLSL_PRECISION_MEDIUM ||
|
|
|
|
|
var->data.precision == GLSL_PRECISION_LOW;
|
2016-07-12 01:46:53 -07:00
|
|
|
|
2019-07-19 15:30:27 -05:00
|
|
|
assert(intrin->dest.is_ssa);
|
2021-01-01 20:37:53 +01:00
|
|
|
nir_ssa_def *load =
|
|
|
|
|
nir_load_interpolated_input(&state->builder,
|
|
|
|
|
intrin->dest.ssa.num_components,
|
|
|
|
|
intrin->dest.ssa.bit_size,
|
|
|
|
|
&bary_setup->dest.ssa,
|
|
|
|
|
offset,
|
|
|
|
|
.base = var->data.driver_location,
|
|
|
|
|
.component = component,
|
|
|
|
|
.io_semantics = semantics);
|
|
|
|
|
|
|
|
|
|
return load;
|
2016-07-12 01:46:53 -07:00
|
|
|
}
|
|
|
|
|
|
2014-12-01 22:01:05 -08:00
|
|
|
static bool
|
2016-04-08 16:16:56 -04:00
|
|
|
nir_lower_io_block(nir_block *block,
|
|
|
|
|
struct lower_io_state *state)
|
2014-12-01 22:01:05 -08:00
|
|
|
{
|
2015-11-25 12:33:38 -08:00
|
|
|
nir_builder *b = &state->builder;
|
2016-07-12 01:46:53 -07:00
|
|
|
const nir_shader_compiler_options *options = b->shader->options;
|
2017-03-09 11:01:22 -08:00
|
|
|
bool progress = false;
|
2015-11-25 12:33:38 -08:00
|
|
|
|
2016-04-26 18:34:19 -07:00
|
|
|
nir_foreach_instr_safe(instr, block) {
|
2014-12-01 22:01:05 -08:00
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
|
|
2016-01-18 09:59:19 -08:00
|
|
|
switch (intrin->intrinsic) {
|
2018-03-17 19:27:36 -07:00
|
|
|
case nir_intrinsic_load_deref:
|
|
|
|
|
case nir_intrinsic_store_deref:
|
2016-01-18 09:59:19 -08:00
|
|
|
/* We can lower the io for this nir instrinsic */
|
|
|
|
|
break;
|
2018-03-17 19:27:36 -07:00
|
|
|
case nir_intrinsic_interp_deref_at_centroid:
|
|
|
|
|
case nir_intrinsic_interp_deref_at_sample:
|
|
|
|
|
case nir_intrinsic_interp_deref_at_offset:
|
2020-01-24 16:01:04 +01:00
|
|
|
case nir_intrinsic_interp_deref_at_vertex:
|
2016-07-12 01:46:53 -07:00
|
|
|
/* We can optionally lower these to load_interpolated_input */
|
2020-08-07 10:34:30 +02:00
|
|
|
if (options->use_interpolated_input_intrinsics ||
|
|
|
|
|
options->lower_interpolate_at)
|
2016-07-12 01:46:53 -07:00
|
|
|
break;
|
2020-11-24 11:02:00 +01:00
|
|
|
FALLTHROUGH;
|
2016-01-18 09:59:19 -08:00
|
|
|
default:
|
|
|
|
|
/* We can't lower the io for this nir instrinsic, so skip it */
|
2015-08-12 15:14:35 -07:00
|
|
|
continue;
|
2016-01-18 09:59:19 -08:00
|
|
|
}
|
2015-08-12 15:14:35 -07:00
|
|
|
|
2018-03-26 18:01:12 -07:00
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
2020-11-01 16:58:27 -06:00
|
|
|
if (!nir_deref_mode_is_one_of(deref, state->modes))
|
2015-08-12 15:14:35 -07:00
|
|
|
continue;
|
|
|
|
|
|
2019-07-10 11:55:24 +02:00
|
|
|
nir_variable *var = nir_deref_instr_get_variable(deref);
|
|
|
|
|
|
2015-11-25 12:33:38 -08:00
|
|
|
b->cursor = nir_before_instr(instr);
|
|
|
|
|
|
2017-09-14 19:52:38 -07:00
|
|
|
const bool per_vertex = nir_is_per_vertex_io(var, b->shader->info.stage);
|
2015-10-01 00:46:19 -07:00
|
|
|
|
2016-07-12 02:16:30 -07:00
|
|
|
nir_ssa_def *offset;
|
2016-07-12 02:07:29 -07:00
|
|
|
nir_ssa_def *vertex_index = NULL;
|
2016-10-03 20:32:22 -07:00
|
|
|
unsigned component_offset = var->data.location_frac;
|
2020-10-30 12:30:09 -05:00
|
|
|
bool bindless_type_size = var->data.mode == nir_var_shader_in ||
|
|
|
|
|
var->data.mode == nir_var_shader_out ||
|
2019-03-29 12:39:48 +11:00
|
|
|
var->data.bindless;
|
2015-10-01 00:36:25 -07:00
|
|
|
|
2020-08-21 13:42:55 +03:00
|
|
|
if (nir_deref_instr_is_known_out_of_bounds(deref)) {
|
|
|
|
|
/* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
|
|
|
|
|
*
|
|
|
|
|
* In the subsections described above for array, vector, matrix and
|
|
|
|
|
* structure accesses, any out-of-bounds access produced undefined
|
|
|
|
|
* behavior....
|
|
|
|
|
* Out-of-bounds reads return undefined values, which
|
|
|
|
|
* include values from other variables of the active program or zero.
|
|
|
|
|
* Out-of-bounds writes may be discarded or overwrite
|
|
|
|
|
* other variables of the active program.
|
|
|
|
|
*
|
|
|
|
|
* GL_KHR_robustness and GL_ARB_robustness encourage us to return zero
|
|
|
|
|
* for reads.
|
|
|
|
|
*
|
|
|
|
|
* Otherwise get_io_offset would return out-of-bound offset which may
|
|
|
|
|
* result in out-of-bound loading/storing of inputs/outputs,
|
|
|
|
|
* that could cause issues in drivers down the line.
|
|
|
|
|
*/
|
|
|
|
|
if (intrin->intrinsic != nir_intrinsic_store_deref) {
|
|
|
|
|
nir_ssa_def *zero =
|
|
|
|
|
nir_imm_zero(b, intrin->dest.ssa.num_components,
|
|
|
|
|
intrin->dest.ssa.bit_size);
|
|
|
|
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
|
|
|
|
nir_src_for_ssa(zero));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_instr_remove(&intrin->instr);
|
|
|
|
|
progress = true;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-17 19:27:36 -07:00
|
|
|
offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL,
|
2019-03-29 12:39:48 +11:00
|
|
|
state->type_size, &component_offset,
|
|
|
|
|
bindless_type_size);
|
2014-12-01 22:01:05 -08:00
|
|
|
|
2019-07-19 15:30:27 -05:00
|
|
|
nir_ssa_def *replacement = NULL;
|
2016-07-12 02:30:02 -07:00
|
|
|
|
2016-07-12 02:16:30 -07:00
|
|
|
switch (intrin->intrinsic) {
|
2018-03-17 19:27:36 -07:00
|
|
|
case nir_intrinsic_load_deref:
|
|
|
|
|
replacement = lower_load(intrin, state, vertex_index, var, offset,
|
2019-05-31 13:44:40 -04:00
|
|
|
component_offset, deref->type);
|
2014-12-01 22:01:05 -08:00
|
|
|
break;
|
|
|
|
|
|
2018-03-17 19:27:36 -07:00
|
|
|
case nir_intrinsic_store_deref:
|
2019-07-19 15:30:27 -05:00
|
|
|
lower_store(intrin, state, vertex_index, var, offset,
|
|
|
|
|
component_offset, deref->type);
|
2014-12-01 22:01:05 -08:00
|
|
|
break;
|
|
|
|
|
|
2018-03-17 19:27:36 -07:00
|
|
|
case nir_intrinsic_interp_deref_at_centroid:
|
|
|
|
|
case nir_intrinsic_interp_deref_at_sample:
|
|
|
|
|
case nir_intrinsic_interp_deref_at_offset:
|
2020-01-24 16:01:04 +01:00
|
|
|
case nir_intrinsic_interp_deref_at_vertex:
|
2016-07-12 01:46:53 -07:00
|
|
|
assert(vertex_index == NULL);
|
2018-03-17 19:27:36 -07:00
|
|
|
replacement = lower_interpolate_at(intrin, state, var, offset,
|
2019-05-31 13:44:40 -04:00
|
|
|
component_offset, deref->type);
|
2016-07-12 01:46:53 -07:00
|
|
|
break;
|
|
|
|
|
|
2016-07-12 02:30:02 -07:00
|
|
|
default:
|
2016-07-18 22:42:44 -07:00
|
|
|
continue;
|
2016-07-12 02:30:02 -07:00
|
|
|
}
|
|
|
|
|
|
2019-07-19 15:30:27 -05:00
|
|
|
if (replacement) {
|
|
|
|
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
|
|
|
|
nir_src_for_ssa(replacement));
|
2016-01-18 09:59:19 -08:00
|
|
|
}
|
2016-07-12 02:30:02 -07:00
|
|
|
nir_instr_remove(&intrin->instr);
|
2017-03-09 11:01:22 -08:00
|
|
|
progress = true;
|
2014-12-01 22:01:05 -08:00
|
|
|
}
|
|
|
|
|
|
2017-03-09 11:01:22 -08:00
|
|
|
return progress;
|
2014-12-01 22:01:05 -08:00
|
|
|
}
|
|
|
|
|
|
2017-03-09 11:01:22 -08:00
|
|
|
static bool
|
2015-08-12 15:14:35 -07:00
|
|
|
nir_lower_io_impl(nir_function_impl *impl,
|
2016-04-11 13:43:27 -07:00
|
|
|
nir_variable_mode modes,
|
2019-03-29 12:39:48 +11:00
|
|
|
int (*type_size)(const struct glsl_type *, bool),
|
2016-09-14 10:29:38 -07:00
|
|
|
nir_lower_io_options options)
|
2014-12-01 22:01:05 -08:00
|
|
|
{
|
|
|
|
|
struct lower_io_state state;
|
2017-03-09 11:01:22 -08:00
|
|
|
bool progress = false;
|
2014-12-01 22:01:05 -08:00
|
|
|
|
2015-08-12 11:26:34 -07:00
|
|
|
nir_builder_init(&state.builder, impl);
|
2018-03-17 19:27:36 -07:00
|
|
|
state.dead_ctx = ralloc_context(NULL);
|
2016-04-11 13:43:27 -07:00
|
|
|
state.modes = modes;
|
2015-08-12 14:29:25 -07:00
|
|
|
state.type_size = type_size;
|
2016-09-14 10:29:38 -07:00
|
|
|
state.options = options;
|
2014-12-01 22:01:05 -08:00
|
|
|
|
2020-06-10 12:47:50 -05:00
|
|
|
ASSERTED nir_variable_mode supported_modes =
|
2020-06-10 12:51:01 -05:00
|
|
|
nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
|
2020-06-10 12:47:50 -05:00
|
|
|
assert(!(modes & ~supported_modes));
|
|
|
|
|
|
2016-04-08 16:16:56 -04:00
|
|
|
nir_foreach_block(block, impl) {
|
2017-03-09 11:01:22 -08:00
|
|
|
progress |= nir_lower_io_block(block, &state);
|
2016-04-08 16:16:56 -04:00
|
|
|
}
|
2014-12-12 16:25:38 -08:00
|
|
|
|
2018-03-17 19:27:36 -07:00
|
|
|
ralloc_free(state.dead_ctx);
|
|
|
|
|
|
2020-08-15 01:54:45 -05:00
|
|
|
nir_metadata_preserve(impl, nir_metadata_none);
|
|
|
|
|
|
2017-03-09 11:01:22 -08:00
|
|
|
return progress;
|
2014-12-01 22:01:05 -08:00
|
|
|
}
|
|
|
|
|
|
2020-06-11 16:08:06 -05:00
|
|
|
/** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
|
|
|
|
|
*
|
|
|
|
|
* This pass is intended to be used for cross-stage shader I/O and driver-
|
|
|
|
|
* managed uniforms to turn deref-based access into a simpler model using
|
|
|
|
|
* locations or offsets. For fragment shader inputs, it can optionally turn
|
|
|
|
|
* load_deref into an explicit interpolation using barycentrics coming from
|
|
|
|
|
* one of the load_barycentric_* intrinsics. This pass requires that all
|
|
|
|
|
* deref chains are complete and contain no casts.
|
|
|
|
|
*/
|
2017-03-09 11:01:22 -08:00
|
|
|
bool
|
2016-04-11 13:43:27 -07:00
|
|
|
nir_lower_io(nir_shader *shader, nir_variable_mode modes,
|
2019-03-29 12:39:48 +11:00
|
|
|
int (*type_size)(const struct glsl_type *, bool),
|
2016-09-14 10:29:38 -07:00
|
|
|
nir_lower_io_options options)
|
2014-12-01 22:01:05 -08:00
|
|
|
{
|
2017-03-09 11:01:22 -08:00
|
|
|
bool progress = false;
|
|
|
|
|
|
2016-04-26 20:26:42 -07:00
|
|
|
nir_foreach_function(function, shader) {
|
2016-09-14 10:29:38 -07:00
|
|
|
if (function->impl) {
|
2017-03-09 11:01:22 -08:00
|
|
|
progress |= nir_lower_io_impl(function->impl, modes,
|
|
|
|
|
type_size, options);
|
2016-09-14 10:29:38 -07:00
|
|
|
}
|
2014-12-01 22:01:05 -08:00
|
|
|
}
|
2017-03-09 11:01:22 -08:00
|
|
|
|
|
|
|
|
return progress;
|
2014-12-01 22:01:05 -08:00
|
|
|
}
|
2015-11-07 22:35:33 -08:00
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
static unsigned
|
|
|
|
|
type_scalar_size_bytes(const struct glsl_type *type)
|
|
|
|
|
{
|
|
|
|
|
assert(glsl_type_is_vector_or_scalar(type) ||
|
|
|
|
|
glsl_type_is_matrix(type));
|
|
|
|
|
return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
build_addr_iadd(nir_builder *b, nir_ssa_def *addr,
|
2020-08-14 18:20:12 -05:00
|
|
|
nir_address_format addr_format,
|
|
|
|
|
nir_variable_mode modes,
|
|
|
|
|
nir_ssa_def *offset)
|
2018-11-27 21:31:42 -06:00
|
|
|
{
|
|
|
|
|
assert(offset->num_components == 1);
|
|
|
|
|
|
|
|
|
|
switch (addr_format) {
|
2019-01-07 17:17:46 -06:00
|
|
|
case nir_address_format_32bit_global:
|
|
|
|
|
case nir_address_format_64bit_global:
|
2019-05-03 14:34:55 -07:00
|
|
|
case nir_address_format_32bit_offset:
|
2020-05-25 08:57:14 -07:00
|
|
|
assert(addr->bit_size == offset->bit_size);
|
2019-01-07 17:17:46 -06:00
|
|
|
assert(addr->num_components == 1);
|
|
|
|
|
return nir_iadd(b, addr, offset);
|
|
|
|
|
|
2020-05-25 08:57:14 -07:00
|
|
|
case nir_address_format_32bit_offset_as_64bit:
|
|
|
|
|
assert(addr->num_components == 1);
|
|
|
|
|
assert(offset->bit_size == 32);
|
|
|
|
|
return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset));
|
|
|
|
|
|
2019-01-09 14:56:02 -06:00
|
|
|
case nir_address_format_64bit_bounded_global:
|
|
|
|
|
assert(addr->num_components == 4);
|
2020-05-25 08:57:14 -07:00
|
|
|
assert(addr->bit_size == offset->bit_size);
|
2019-01-09 14:56:02 -06:00
|
|
|
return nir_vec4(b, nir_channel(b, addr, 0),
|
|
|
|
|
nir_channel(b, addr, 1),
|
|
|
|
|
nir_channel(b, addr, 2),
|
|
|
|
|
nir_iadd(b, nir_channel(b, addr, 3), offset));
|
|
|
|
|
|
2019-03-09 10:10:37 -06:00
|
|
|
case nir_address_format_32bit_index_offset:
|
2018-11-27 21:31:42 -06:00
|
|
|
assert(addr->num_components == 2);
|
2020-05-25 08:57:14 -07:00
|
|
|
assert(addr->bit_size == offset->bit_size);
|
2018-11-27 21:31:42 -06:00
|
|
|
return nir_vec2(b, nir_channel(b, addr, 0),
|
|
|
|
|
nir_iadd(b, nir_channel(b, addr, 1), offset));
|
2020-05-25 08:53:00 -07:00
|
|
|
|
|
|
|
|
case nir_address_format_32bit_index_offset_pack64:
|
|
|
|
|
assert(addr->num_components == 1);
|
|
|
|
|
assert(offset->bit_size == 32);
|
|
|
|
|
return nir_pack_64_2x32_split(b,
|
|
|
|
|
nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset),
|
|
|
|
|
nir_unpack_64_2x32_split_y(b, addr));
|
|
|
|
|
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
case nir_address_format_vec2_index_32bit_offset:
|
|
|
|
|
assert(addr->num_components == 3);
|
2020-05-25 08:57:14 -07:00
|
|
|
assert(offset->bit_size == 32);
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
return nir_vec3(b, nir_channel(b, addr, 0), nir_channel(b, addr, 1),
|
|
|
|
|
nir_iadd(b, nir_channel(b, addr, 2), offset));
|
2020-05-25 08:53:00 -07:00
|
|
|
|
2020-08-15 00:57:14 -05:00
|
|
|
case nir_address_format_62bit_generic:
|
|
|
|
|
assert(addr->num_components == 1);
|
|
|
|
|
assert(addr->bit_size == 64);
|
|
|
|
|
assert(offset->bit_size == 64);
|
|
|
|
|
if (!(modes & ~(nir_var_function_temp |
|
|
|
|
|
nir_var_shader_temp |
|
|
|
|
|
nir_var_mem_shared))) {
|
|
|
|
|
/* If we're sure it's one of these modes, we can do an easy 32-bit
|
|
|
|
|
* addition and don't need to bother with 64-bit math.
|
|
|
|
|
*/
|
|
|
|
|
nir_ssa_def *addr32 = nir_unpack_64_2x32_split_x(b, addr);
|
|
|
|
|
nir_ssa_def *type = nir_unpack_64_2x32_split_y(b, addr);
|
|
|
|
|
addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset));
|
|
|
|
|
return nir_pack_64_2x32_split(b, addr32, type);
|
|
|
|
|
} else {
|
|
|
|
|
return nir_iadd(b, addr, offset);
|
|
|
|
|
}
|
|
|
|
|
|
2019-05-01 13:24:45 -07:00
|
|
|
case nir_address_format_logical:
|
|
|
|
|
unreachable("Unsupported address format");
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
unreachable("Invalid address format");
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-25 08:57:14 -07:00
|
|
|
static unsigned
|
|
|
|
|
addr_get_offset_bit_size(nir_ssa_def *addr, nir_address_format addr_format)
|
|
|
|
|
{
|
2020-05-25 08:53:00 -07:00
|
|
|
if (addr_format == nir_address_format_32bit_offset_as_64bit ||
|
|
|
|
|
addr_format == nir_address_format_32bit_index_offset_pack64)
|
2020-05-25 08:57:14 -07:00
|
|
|
return 32;
|
|
|
|
|
return addr->bit_size;
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
|
2020-08-14 18:20:12 -05:00
|
|
|
nir_address_format addr_format,
|
|
|
|
|
nir_variable_mode modes,
|
|
|
|
|
int64_t offset)
|
2018-11-27 21:31:42 -06:00
|
|
|
{
|
2020-08-14 18:20:12 -05:00
|
|
|
return build_addr_iadd(b, addr, addr_format, modes,
|
2020-05-25 08:57:14 -07:00
|
|
|
nir_imm_intN_t(b, offset,
|
|
|
|
|
addr_get_offset_bit_size(addr, addr_format)));
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
2020-08-15 00:53:45 -05:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
build_addr_for_var(nir_builder *b, nir_variable *var,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared |
|
2020-08-18 14:43:39 -05:00
|
|
|
nir_var_shader_temp | nir_var_function_temp |
|
2020-05-27 17:08:28 -05:00
|
|
|
nir_var_mem_push_const | nir_var_mem_constant));
|
2020-08-15 00:53:45 -05:00
|
|
|
|
|
|
|
|
const unsigned num_comps = nir_address_format_num_components(addr_format);
|
|
|
|
|
const unsigned bit_size = nir_address_format_bit_size(addr_format);
|
|
|
|
|
|
|
|
|
|
switch (addr_format) {
|
|
|
|
|
case nir_address_format_32bit_global:
|
|
|
|
|
case nir_address_format_64bit_global: {
|
|
|
|
|
nir_ssa_def *base_addr;
|
|
|
|
|
switch (var->data.mode) {
|
|
|
|
|
case nir_var_shader_temp:
|
2020-11-30 10:41:35 -06:00
|
|
|
base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0);
|
2020-08-15 00:53:45 -05:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case nir_var_function_temp:
|
2020-11-30 10:41:35 -06:00
|
|
|
base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1);
|
2020-08-15 00:53:45 -05:00
|
|
|
break;
|
|
|
|
|
|
2020-08-18 14:43:39 -05:00
|
|
|
case nir_var_mem_constant:
|
|
|
|
|
base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size);
|
|
|
|
|
break;
|
|
|
|
|
|
2020-08-18 10:27:41 -05:00
|
|
|
case nir_var_mem_shared:
|
|
|
|
|
base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size);
|
|
|
|
|
break;
|
|
|
|
|
|
2020-08-15 00:53:45 -05:00
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported variable mode");
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-14 18:20:12 -05:00
|
|
|
return build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode,
|
2020-08-15 00:53:45 -05:00
|
|
|
var->data.driver_location);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_address_format_32bit_offset:
|
|
|
|
|
assert(var->data.driver_location <= UINT32_MAX);
|
|
|
|
|
return nir_imm_int(b, var->data.driver_location);
|
|
|
|
|
|
|
|
|
|
case nir_address_format_32bit_offset_as_64bit:
|
|
|
|
|
assert(var->data.driver_location <= UINT32_MAX);
|
|
|
|
|
return nir_imm_int64(b, var->data.driver_location);
|
|
|
|
|
|
2020-08-15 00:57:14 -05:00
|
|
|
case nir_address_format_62bit_generic:
|
|
|
|
|
switch (var->data.mode) {
|
|
|
|
|
case nir_var_shader_temp:
|
|
|
|
|
case nir_var_function_temp:
|
|
|
|
|
assert(var->data.driver_location <= UINT32_MAX);
|
|
|
|
|
return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64);
|
|
|
|
|
|
|
|
|
|
case nir_var_mem_shared:
|
|
|
|
|
assert(var->data.driver_location <= UINT32_MAX);
|
|
|
|
|
return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64);
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported variable mode");
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-15 00:53:45 -05:00
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported address format");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-15 00:32:46 -05:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
build_runtime_addr_mode_check(nir_builder *b, nir_ssa_def *addr,
|
|
|
|
|
nir_address_format addr_format,
|
|
|
|
|
nir_variable_mode mode)
|
|
|
|
|
{
|
|
|
|
|
/* The compile-time check failed; do a run-time check */
|
|
|
|
|
switch (addr_format) {
|
2020-08-15 00:57:14 -05:00
|
|
|
case nir_address_format_62bit_generic: {
|
|
|
|
|
assert(addr->num_components == 1);
|
|
|
|
|
assert(addr->bit_size == 64);
|
|
|
|
|
nir_ssa_def *mode_enum = nir_ushr(b, addr, nir_imm_int(b, 62));
|
|
|
|
|
switch (mode) {
|
|
|
|
|
case nir_var_function_temp:
|
|
|
|
|
case nir_var_shader_temp:
|
|
|
|
|
return nir_ieq_imm(b, mode_enum, 0x2);
|
|
|
|
|
|
|
|
|
|
case nir_var_mem_shared:
|
|
|
|
|
return nir_ieq_imm(b, mode_enum, 0x1);
|
|
|
|
|
|
|
|
|
|
case nir_var_mem_global:
|
|
|
|
|
return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0),
|
|
|
|
|
nir_ieq_imm(b, mode_enum, 0x3));
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Invalid mode check intrinsic");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-15 00:32:46 -05:00
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported address mode");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
addr_to_index(nir_builder *b, nir_ssa_def *addr,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
2020-05-25 08:53:00 -07:00
|
|
|
switch (addr_format) {
|
|
|
|
|
case nir_address_format_32bit_index_offset:
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
assert(addr->num_components == 2);
|
|
|
|
|
return nir_channel(b, addr, 0);
|
2020-05-25 08:53:00 -07:00
|
|
|
case nir_address_format_32bit_index_offset_pack64:
|
|
|
|
|
return nir_unpack_64_2x32_split_y(b, addr);
|
|
|
|
|
case nir_address_format_vec2_index_32bit_offset:
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
assert(addr->num_components == 3);
|
|
|
|
|
return nir_channels(b, addr, 0x3);
|
2020-05-25 08:53:00 -07:00
|
|
|
default: unreachable("Invalid address format");
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
addr_to_offset(nir_builder *b, nir_ssa_def *addr,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
2020-05-25 08:57:14 -07:00
|
|
|
switch (addr_format) {
|
|
|
|
|
case nir_address_format_32bit_index_offset:
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
assert(addr->num_components == 2);
|
|
|
|
|
return nir_channel(b, addr, 1);
|
2020-05-25 08:53:00 -07:00
|
|
|
case nir_address_format_32bit_index_offset_pack64:
|
|
|
|
|
return nir_unpack_64_2x32_split_x(b, addr);
|
2020-05-25 08:57:14 -07:00
|
|
|
case nir_address_format_vec2_index_32bit_offset:
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
assert(addr->num_components == 3);
|
|
|
|
|
return nir_channel(b, addr, 2);
|
2020-05-25 08:57:14 -07:00
|
|
|
case nir_address_format_32bit_offset:
|
|
|
|
|
return addr;
|
|
|
|
|
case nir_address_format_32bit_offset_as_64bit:
|
2020-08-15 00:57:14 -05:00
|
|
|
case nir_address_format_62bit_generic:
|
2020-05-25 08:57:14 -07:00
|
|
|
return nir_u2u32(b, addr);
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Invalid address format");
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
2019-01-07 17:17:46 -06:00
|
|
|
/** Returns true if the given address format resolves to a global address */
|
|
|
|
|
static bool
|
2020-08-15 00:39:00 -05:00
|
|
|
addr_format_is_global(nir_address_format addr_format,
|
|
|
|
|
nir_variable_mode mode)
|
2019-01-07 17:17:46 -06:00
|
|
|
{
|
2020-08-15 00:57:14 -05:00
|
|
|
if (addr_format == nir_address_format_62bit_generic)
|
|
|
|
|
return mode == nir_var_mem_global;
|
|
|
|
|
|
2019-01-07 17:17:46 -06:00
|
|
|
return addr_format == nir_address_format_32bit_global ||
|
2019-01-09 14:56:02 -06:00
|
|
|
addr_format == nir_address_format_64bit_global ||
|
|
|
|
|
addr_format == nir_address_format_64bit_bounded_global;
|
2019-01-07 17:17:46 -06:00
|
|
|
}
|
|
|
|
|
|
2020-07-14 10:38:32 -07:00
|
|
|
static bool
|
2020-08-15 00:39:00 -05:00
|
|
|
addr_format_is_offset(nir_address_format addr_format,
|
|
|
|
|
nir_variable_mode mode)
|
2020-07-14 10:38:32 -07:00
|
|
|
{
|
2020-08-15 00:57:14 -05:00
|
|
|
if (addr_format == nir_address_format_62bit_generic)
|
|
|
|
|
return mode != nir_var_mem_global;
|
|
|
|
|
|
2020-05-25 08:57:14 -07:00
|
|
|
return addr_format == nir_address_format_32bit_offset ||
|
|
|
|
|
addr_format == nir_address_format_32bit_offset_as_64bit;
|
2020-07-14 10:38:32 -07:00
|
|
|
}
|
|
|
|
|
|
2019-01-07 17:17:46 -06:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
addr_to_global(nir_builder *b, nir_ssa_def *addr,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
switch (addr_format) {
|
|
|
|
|
case nir_address_format_32bit_global:
|
|
|
|
|
case nir_address_format_64bit_global:
|
2020-08-15 00:57:14 -05:00
|
|
|
case nir_address_format_62bit_generic:
|
2019-01-07 17:17:46 -06:00
|
|
|
assert(addr->num_components == 1);
|
|
|
|
|
return addr;
|
|
|
|
|
|
2019-01-09 14:56:02 -06:00
|
|
|
case nir_address_format_64bit_bounded_global:
|
|
|
|
|
assert(addr->num_components == 4);
|
|
|
|
|
return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)),
|
|
|
|
|
nir_u2u64(b, nir_channel(b, addr, 3)));
|
|
|
|
|
|
2019-03-09 10:10:37 -06:00
|
|
|
case nir_address_format_32bit_index_offset:
|
2020-05-25 08:53:00 -07:00
|
|
|
case nir_address_format_32bit_index_offset_pack64:
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
case nir_address_format_vec2_index_32bit_offset:
|
2019-05-03 14:34:55 -07:00
|
|
|
case nir_address_format_32bit_offset:
|
2020-05-25 08:57:14 -07:00
|
|
|
case nir_address_format_32bit_offset_as_64bit:
|
2019-05-01 13:24:45 -07:00
|
|
|
case nir_address_format_logical:
|
2019-01-07 17:17:46 -06:00
|
|
|
unreachable("Cannot get a 64-bit address with this address format");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unreachable("Invalid address format");
|
|
|
|
|
}
|
|
|
|
|
|
2019-01-09 14:56:02 -06:00
|
|
|
static bool
|
|
|
|
|
addr_format_needs_bounds_check(nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
return addr_format == nir_address_format_64bit_bounded_global;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
addr_is_in_bounds(nir_builder *b, nir_ssa_def *addr,
|
|
|
|
|
nir_address_format addr_format, unsigned size)
|
|
|
|
|
{
|
|
|
|
|
assert(addr_format == nir_address_format_64bit_bounded_global);
|
|
|
|
|
assert(addr->num_components == 4);
|
|
|
|
|
return nir_ige(b, nir_channel(b, addr, 2),
|
|
|
|
|
nir_iadd_imm(b, nir_channel(b, addr, 3), size));
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-04 13:25:05 -07:00
|
|
|
static void
|
|
|
|
|
nir_get_explicit_deref_range(nir_deref_instr *deref,
|
|
|
|
|
nir_address_format addr_format,
|
|
|
|
|
uint32_t *out_base,
|
|
|
|
|
uint32_t *out_range)
|
|
|
|
|
{
|
|
|
|
|
uint32_t base = 0;
|
|
|
|
|
uint32_t range = glsl_get_explicit_size(deref->type, false);
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
|
|
|
|
|
|
|
|
|
switch (deref->deref_type) {
|
|
|
|
|
case nir_deref_type_array:
|
|
|
|
|
case nir_deref_type_array_wildcard:
|
|
|
|
|
case nir_deref_type_ptr_as_array: {
|
|
|
|
|
const unsigned stride = nir_deref_instr_array_stride(deref);
|
|
|
|
|
if (stride == 0)
|
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
|
|
if (!parent)
|
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
|
|
if (deref->deref_type != nir_deref_type_array_wildcard &&
|
2020-09-15 23:53:14 +02:00
|
|
|
nir_src_is_const(deref->arr.index)) {
|
2020-09-04 13:25:05 -07:00
|
|
|
base += stride * nir_src_as_uint(deref->arr.index);
|
2020-09-15 23:53:14 +02:00
|
|
|
} else {
|
|
|
|
|
if (glsl_get_length(parent->type) == 0)
|
|
|
|
|
goto fail;
|
2020-09-04 13:25:05 -07:00
|
|
|
range += stride * (glsl_get_length(parent->type) - 1);
|
2020-09-15 23:53:14 +02:00
|
|
|
}
|
2020-09-04 13:25:05 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_struct: {
|
|
|
|
|
if (!parent)
|
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
|
|
base += glsl_get_struct_field_offset(parent->type, deref->strct.index);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_cast: {
|
|
|
|
|
nir_instr *parent_instr = deref->parent.ssa->parent_instr;
|
|
|
|
|
|
|
|
|
|
switch (parent_instr->type) {
|
|
|
|
|
case nir_instr_type_load_const: {
|
|
|
|
|
nir_load_const_instr *load = nir_instr_as_load_const(parent_instr);
|
|
|
|
|
|
|
|
|
|
switch (addr_format) {
|
|
|
|
|
case nir_address_format_32bit_offset:
|
|
|
|
|
base += load->value[1].u32;
|
|
|
|
|
break;
|
|
|
|
|
case nir_address_format_32bit_index_offset:
|
|
|
|
|
base += load->value[1].u32;
|
|
|
|
|
break;
|
|
|
|
|
case nir_address_format_vec2_index_32bit_offset:
|
|
|
|
|
base += load->value[2].u32;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*out_base = base;
|
|
|
|
|
*out_range = range;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_instr_type_intrinsic: {
|
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr);
|
|
|
|
|
switch (intr->intrinsic) {
|
|
|
|
|
case nir_intrinsic_load_vulkan_descriptor:
|
|
|
|
|
/* Assume that a load_vulkan_descriptor won't contribute to an
|
|
|
|
|
* offset within the resource.
|
|
|
|
|
*/
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*out_base = base;
|
|
|
|
|
*out_range = range;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
deref = parent;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fail:
|
|
|
|
|
*out_base = 0;
|
|
|
|
|
*out_range = ~0;
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-15 01:54:45 -05:00
|
|
|
static nir_variable_mode
|
|
|
|
|
canonicalize_generic_modes(nir_variable_mode modes)
|
|
|
|
|
{
|
|
|
|
|
assert(modes != 0);
|
|
|
|
|
if (util_bitcount(modes) == 1)
|
|
|
|
|
return modes;
|
|
|
|
|
|
|
|
|
|
assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp |
|
|
|
|
|
nir_var_mem_shared | nir_var_mem_global)));
|
|
|
|
|
|
|
|
|
|
/* Canonicalize by converting shader_temp to function_temp */
|
|
|
|
|
if (modes & nir_var_shader_temp) {
|
|
|
|
|
modes &= ~nir_var_shader_temp;
|
|
|
|
|
modes |= nir_var_function_temp;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return modes;
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
|
|
|
|
|
nir_ssa_def *addr, nir_address_format addr_format,
|
2020-08-15 01:54:45 -05:00
|
|
|
nir_variable_mode modes,
|
2020-08-24 10:57:57 -05:00
|
|
|
uint32_t align_mul, uint32_t align_offset,
|
2018-11-27 21:31:42 -06:00
|
|
|
unsigned num_components)
|
|
|
|
|
{
|
2020-09-04 13:25:05 -07:00
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
2020-08-15 01:54:45 -05:00
|
|
|
modes = canonicalize_generic_modes(modes);
|
|
|
|
|
|
|
|
|
|
if (util_bitcount(modes) > 1) {
|
|
|
|
|
if (addr_format_is_global(addr_format, modes)) {
|
|
|
|
|
return build_explicit_io_load(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_global,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
num_components);
|
|
|
|
|
} else if (modes & nir_var_function_temp) {
|
|
|
|
|
nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
|
|
|
|
|
nir_var_function_temp));
|
|
|
|
|
nir_ssa_def *res1 =
|
|
|
|
|
build_explicit_io_load(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_function_temp,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
num_components);
|
|
|
|
|
nir_push_else(b, NULL);
|
|
|
|
|
nir_ssa_def *res2 =
|
|
|
|
|
build_explicit_io_load(b, intrin, addr, addr_format,
|
|
|
|
|
modes & ~nir_var_function_temp,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
num_components);
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
return nir_if_phi(b, res1, res2);
|
|
|
|
|
} else {
|
|
|
|
|
nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
|
|
|
|
|
nir_var_mem_shared));
|
|
|
|
|
assert(modes & nir_var_mem_shared);
|
|
|
|
|
nir_ssa_def *res1 =
|
|
|
|
|
build_explicit_io_load(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_shared,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
num_components);
|
|
|
|
|
nir_push_else(b, NULL);
|
|
|
|
|
assert(modes & nir_var_mem_global);
|
|
|
|
|
nir_ssa_def *res2 =
|
|
|
|
|
build_explicit_io_load(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_global,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
num_components);
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
return nir_if_phi(b, res1, res2);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(util_bitcount(modes) == 1);
|
|
|
|
|
const nir_variable_mode mode = modes;
|
2018-11-27 21:31:42 -06:00
|
|
|
|
|
|
|
|
nir_intrinsic_op op;
|
2020-10-05 14:46:36 -07:00
|
|
|
switch (intrin->intrinsic) {
|
|
|
|
|
case nir_intrinsic_load_deref:
|
|
|
|
|
switch (mode) {
|
|
|
|
|
case nir_var_mem_ubo:
|
|
|
|
|
op = nir_intrinsic_load_ubo;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_ssbo:
|
|
|
|
|
if (addr_format_is_global(addr_format, mode))
|
|
|
|
|
op = nir_intrinsic_load_global;
|
|
|
|
|
else
|
|
|
|
|
op = nir_intrinsic_load_ssbo;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_global:
|
2020-08-15 00:39:00 -05:00
|
|
|
assert(addr_format_is_global(addr_format, mode));
|
2020-07-14 13:32:19 -05:00
|
|
|
op = nir_intrinsic_load_global;
|
2020-10-05 14:46:36 -07:00
|
|
|
break;
|
|
|
|
|
case nir_var_uniform:
|
|
|
|
|
assert(addr_format_is_offset(addr_format, mode));
|
|
|
|
|
assert(b->shader->info.stage == MESA_SHADER_KERNEL);
|
|
|
|
|
op = nir_intrinsic_load_kernel_input;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_shared:
|
|
|
|
|
assert(addr_format_is_offset(addr_format, mode));
|
|
|
|
|
op = nir_intrinsic_load_shared;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_shader_temp:
|
|
|
|
|
case nir_var_function_temp:
|
|
|
|
|
if (addr_format_is_offset(addr_format, mode)) {
|
|
|
|
|
op = nir_intrinsic_load_scratch;
|
|
|
|
|
} else {
|
|
|
|
|
assert(addr_format_is_global(addr_format, mode));
|
|
|
|
|
op = nir_intrinsic_load_global;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_push_const:
|
|
|
|
|
assert(addr_format == nir_address_format_32bit_offset);
|
|
|
|
|
op = nir_intrinsic_load_push_constant;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_constant:
|
|
|
|
|
if (addr_format_is_offset(addr_format, mode)) {
|
|
|
|
|
op = nir_intrinsic_load_constant;
|
|
|
|
|
} else {
|
|
|
|
|
assert(addr_format_is_global(addr_format, mode));
|
|
|
|
|
op = nir_intrinsic_load_global_constant;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported explicit IO variable mode");
|
2020-07-14 13:32:19 -05:00
|
|
|
}
|
2020-05-26 12:21:33 -07:00
|
|
|
break;
|
2020-10-05 14:46:36 -07:00
|
|
|
|
|
|
|
|
case nir_intrinsic_load_deref_block_intel:
|
|
|
|
|
switch (mode) {
|
|
|
|
|
case nir_var_mem_ssbo:
|
|
|
|
|
if (addr_format_is_global(addr_format, mode))
|
|
|
|
|
op = nir_intrinsic_load_global_block_intel;
|
|
|
|
|
else
|
|
|
|
|
op = nir_intrinsic_load_ssbo_block_intel;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_global:
|
|
|
|
|
op = nir_intrinsic_load_global_block_intel;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_shared:
|
|
|
|
|
op = nir_intrinsic_load_shared_block_intel;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported explicit IO variable mode");
|
2020-08-18 14:43:39 -05:00
|
|
|
}
|
|
|
|
|
break;
|
2020-10-05 14:46:36 -07:00
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
default:
|
2020-10-05 14:46:36 -07:00
|
|
|
unreachable("Invalid intrinsic");
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
|
|
|
|
|
|
2020-08-15 00:39:00 -05:00
|
|
|
if (addr_format_is_global(addr_format, mode)) {
|
2019-01-07 17:17:46 -06:00
|
|
|
load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
|
2020-08-15 00:39:00 -05:00
|
|
|
} else if (addr_format_is_offset(addr_format, mode)) {
|
2019-04-09 17:28:13 +01:00
|
|
|
assert(addr->num_components == 1);
|
2020-05-25 08:57:14 -07:00
|
|
|
load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
2019-01-07 17:17:46 -06:00
|
|
|
} else {
|
|
|
|
|
load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
|
|
|
|
|
load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
|
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
|
2020-08-20 14:01:23 +01:00
|
|
|
if (nir_intrinsic_has_access(load))
|
2018-11-27 21:31:42 -06:00
|
|
|
nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
|
|
|
|
|
|
2020-08-18 14:43:39 -05:00
|
|
|
if (op == nir_intrinsic_load_constant) {
|
|
|
|
|
nir_intrinsic_set_base(load, 0);
|
|
|
|
|
nir_intrinsic_set_range(load, b->shader->constant_data_size);
|
2020-05-27 17:08:28 -05:00
|
|
|
} else if (mode == nir_var_mem_push_const) {
|
|
|
|
|
/* Push constants are required to be able to be chased back to the
|
|
|
|
|
* variable so we can provide a base/range.
|
|
|
|
|
*/
|
|
|
|
|
nir_variable *var = nir_deref_instr_get_variable(deref);
|
|
|
|
|
nir_intrinsic_set_base(load, 0);
|
|
|
|
|
nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false));
|
2020-08-18 14:43:39 -05:00
|
|
|
}
|
|
|
|
|
|
2019-08-27 18:32:07 -07:00
|
|
|
unsigned bit_size = intrin->dest.ssa.bit_size;
|
|
|
|
|
if (bit_size == 1) {
|
|
|
|
|
/* TODO: Make the native bool bit_size an option. */
|
|
|
|
|
bit_size = 32;
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-27 17:08:28 -05:00
|
|
|
if (nir_intrinsic_has_align(load))
|
|
|
|
|
nir_intrinsic_set_align(load, align_mul, align_offset);
|
2018-11-27 21:31:42 -06:00
|
|
|
|
2020-09-04 13:25:05 -07:00
|
|
|
if (nir_intrinsic_has_range_base(load)) {
|
|
|
|
|
unsigned base, range;
|
|
|
|
|
nir_get_explicit_deref_range(deref, addr_format, &base, &range);
|
|
|
|
|
nir_intrinsic_set_range_base(load, base);
|
|
|
|
|
nir_intrinsic_set_range(load, range);
|
nir: Add a range_base+range to nir_intrinsic_load_ubo().
For UBO accesses to be the same performance as classic GL default uniform
block uniforms, we need to be able to push them through the same path. On
freedreno, we haven't been uploading UBOs as push constants when they're
used for indirect array access, because we don't know what range of the
UBO is needed for an access.
I believe we won't be able to calculate the range in general in spirv
given casts that can happen, so we define a [0, ~0] range to be "We don't
know anything". We use that at the moment for all UBO loads except for
nir_lower_uniforms_to_ubo, where we now avoid losing the range information
that default uniform block loads come with.
In a departure from other NIR intrinsics with a "base", I didn't make the
base an be something you have to add to the src[1] offset. This keeps us
from needing to modify all drivers (particularly since the base+offset
thing can mean needing to do addition in the backend), makes backend
tracking of ranges easy, and makes the range calculations in
load_store_vectorizer reasonable. However, this could definitely cause
some confusion for people used to the normal NIR base.
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Reviewed-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6359>
2020-08-14 13:10:02 -07:00
|
|
|
}
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
assert(intrin->dest.is_ssa);
|
|
|
|
|
load->num_components = num_components;
|
|
|
|
|
nir_ssa_dest_init(&load->instr, &load->dest, num_components,
|
2019-08-27 18:32:07 -07:00
|
|
|
bit_size, intrin->dest.ssa.name);
|
2018-11-27 21:31:42 -06:00
|
|
|
|
2019-08-27 18:32:07 -07:00
|
|
|
assert(bit_size % 8 == 0);
|
2019-01-09 14:56:02 -06:00
|
|
|
|
2019-08-27 18:32:07 -07:00
|
|
|
nir_ssa_def *result;
|
2019-01-09 14:56:02 -06:00
|
|
|
if (addr_format_needs_bounds_check(addr_format)) {
|
|
|
|
|
/* The Vulkan spec for robustBufferAccess gives us quite a few options
|
|
|
|
|
* as to what we can do with an OOB read. Unfortunately, returning
|
|
|
|
|
* undefined values isn't one of them so we return an actual zero.
|
|
|
|
|
*/
|
2019-08-27 18:32:07 -07:00
|
|
|
nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
|
2019-01-09 14:56:02 -06:00
|
|
|
|
2020-10-05 14:46:36 -07:00
|
|
|
/* TODO: Better handle block_intel. */
|
2019-08-27 18:32:07 -07:00
|
|
|
const unsigned load_size = (bit_size / 8) * load->num_components;
|
2019-01-09 14:56:02 -06:00
|
|
|
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
|
|
|
|
|
|
|
|
|
|
nir_builder_instr_insert(b, &load->instr);
|
|
|
|
|
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
|
2019-08-27 18:32:07 -07:00
|
|
|
result = nir_if_phi(b, &load->dest.ssa, zero);
|
2019-01-09 14:56:02 -06:00
|
|
|
} else {
|
|
|
|
|
nir_builder_instr_insert(b, &load->instr);
|
2019-08-27 18:32:07 -07:00
|
|
|
result = &load->dest.ssa;
|
2019-01-09 14:56:02 -06:00
|
|
|
}
|
2019-08-27 18:32:07 -07:00
|
|
|
|
2020-03-27 00:29:14 -05:00
|
|
|
if (intrin->dest.ssa.bit_size == 1) {
|
|
|
|
|
/* For shared, we can go ahead and use NIR's and/or the back-end's
|
|
|
|
|
* standard encoding for booleans rather than forcing a 0/1 boolean.
|
|
|
|
|
* This should save an instruction or two.
|
|
|
|
|
*/
|
2020-07-15 14:09:06 -05:00
|
|
|
if (mode == nir_var_mem_shared ||
|
|
|
|
|
mode == nir_var_shader_temp ||
|
|
|
|
|
mode == nir_var_function_temp)
|
2020-03-27 00:29:14 -05:00
|
|
|
result = nir_b2b1(b, result);
|
|
|
|
|
else
|
|
|
|
|
result = nir_i2b(b, result);
|
|
|
|
|
}
|
2019-08-27 18:32:07 -07:00
|
|
|
|
|
|
|
|
return result;
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
|
|
|
|
|
nir_ssa_def *addr, nir_address_format addr_format,
|
2020-08-15 01:54:45 -05:00
|
|
|
nir_variable_mode modes,
|
2020-08-24 10:57:57 -05:00
|
|
|
uint32_t align_mul, uint32_t align_offset,
|
2018-11-27 21:31:42 -06:00
|
|
|
nir_ssa_def *value, nir_component_mask_t write_mask)
|
|
|
|
|
{
|
2020-08-15 01:54:45 -05:00
|
|
|
modes = canonicalize_generic_modes(modes);
|
|
|
|
|
|
|
|
|
|
if (util_bitcount(modes) > 1) {
|
|
|
|
|
if (addr_format_is_global(addr_format, modes)) {
|
|
|
|
|
build_explicit_io_store(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_global,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
value, write_mask);
|
|
|
|
|
} else if (modes & nir_var_function_temp) {
|
|
|
|
|
nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
|
|
|
|
|
nir_var_function_temp));
|
|
|
|
|
build_explicit_io_store(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_function_temp,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
value, write_mask);
|
|
|
|
|
nir_push_else(b, NULL);
|
|
|
|
|
build_explicit_io_store(b, intrin, addr, addr_format,
|
|
|
|
|
modes & ~nir_var_function_temp,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
value, write_mask);
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
} else {
|
|
|
|
|
nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
|
|
|
|
|
nir_var_mem_shared));
|
|
|
|
|
assert(modes & nir_var_mem_shared);
|
|
|
|
|
build_explicit_io_store(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_shared,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
value, write_mask);
|
|
|
|
|
nir_push_else(b, NULL);
|
|
|
|
|
assert(modes & nir_var_mem_global);
|
|
|
|
|
build_explicit_io_store(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_global,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
value, write_mask);
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(util_bitcount(modes) == 1);
|
|
|
|
|
const nir_variable_mode mode = modes;
|
2018-11-27 21:31:42 -06:00
|
|
|
|
|
|
|
|
nir_intrinsic_op op;
|
2020-10-05 14:46:36 -07:00
|
|
|
switch (intrin->intrinsic) {
|
|
|
|
|
case nir_intrinsic_store_deref:
|
|
|
|
|
assert(write_mask != 0);
|
|
|
|
|
|
|
|
|
|
switch (mode) {
|
|
|
|
|
case nir_var_mem_ssbo:
|
|
|
|
|
if (addr_format_is_global(addr_format, mode))
|
|
|
|
|
op = nir_intrinsic_store_global;
|
|
|
|
|
else
|
|
|
|
|
op = nir_intrinsic_store_ssbo;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_global:
|
2020-08-15 00:39:00 -05:00
|
|
|
assert(addr_format_is_global(addr_format, mode));
|
2020-07-14 13:32:19 -05:00
|
|
|
op = nir_intrinsic_store_global;
|
2020-10-05 14:46:36 -07:00
|
|
|
break;
|
|
|
|
|
case nir_var_mem_shared:
|
|
|
|
|
assert(addr_format_is_offset(addr_format, mode));
|
|
|
|
|
op = nir_intrinsic_store_shared;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_shader_temp:
|
|
|
|
|
case nir_var_function_temp:
|
|
|
|
|
if (addr_format_is_offset(addr_format, mode)) {
|
|
|
|
|
op = nir_intrinsic_store_scratch;
|
|
|
|
|
} else {
|
|
|
|
|
assert(addr_format_is_global(addr_format, mode));
|
|
|
|
|
op = nir_intrinsic_store_global;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported explicit IO variable mode");
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case nir_intrinsic_store_deref_block_intel:
|
|
|
|
|
assert(write_mask == 0);
|
|
|
|
|
|
|
|
|
|
switch (mode) {
|
|
|
|
|
case nir_var_mem_ssbo:
|
|
|
|
|
if (addr_format_is_global(addr_format, mode))
|
|
|
|
|
op = nir_intrinsic_store_global_block_intel;
|
|
|
|
|
else
|
|
|
|
|
op = nir_intrinsic_store_ssbo_block_intel;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_global:
|
|
|
|
|
op = nir_intrinsic_store_global_block_intel;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_shared:
|
|
|
|
|
op = nir_intrinsic_store_shared_block_intel;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported explicit IO variable mode");
|
2020-07-14 13:32:19 -05:00
|
|
|
}
|
2020-05-26 12:21:33 -07:00
|
|
|
break;
|
2020-10-05 14:46:36 -07:00
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
default:
|
2020-10-05 14:46:36 -07:00
|
|
|
unreachable("Invalid intrinsic");
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
|
|
|
|
|
|
2019-08-27 18:32:07 -07:00
|
|
|
if (value->bit_size == 1) {
|
2020-03-27 00:29:14 -05:00
|
|
|
/* For shared, we can go ahead and use NIR's and/or the back-end's
|
|
|
|
|
* standard encoding for booleans rather than forcing a 0/1 boolean.
|
|
|
|
|
* This should save an instruction or two.
|
|
|
|
|
*
|
|
|
|
|
* TODO: Make the native bool bit_size an option.
|
|
|
|
|
*/
|
2020-07-15 14:09:06 -05:00
|
|
|
if (mode == nir_var_mem_shared ||
|
|
|
|
|
mode == nir_var_shader_temp ||
|
|
|
|
|
mode == nir_var_function_temp)
|
2020-03-27 00:29:14 -05:00
|
|
|
value = nir_b2b32(b, value);
|
|
|
|
|
else
|
|
|
|
|
value = nir_b2i(b, value, 32);
|
2019-08-27 18:32:07 -07:00
|
|
|
}
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
store->src[0] = nir_src_for_ssa(value);
|
2020-08-15 00:39:00 -05:00
|
|
|
if (addr_format_is_global(addr_format, mode)) {
|
2019-01-07 17:17:46 -06:00
|
|
|
store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
|
2020-08-15 00:39:00 -05:00
|
|
|
} else if (addr_format_is_offset(addr_format, mode)) {
|
2019-04-09 17:28:13 +01:00
|
|
|
assert(addr->num_components == 1);
|
2020-05-25 08:57:14 -07:00
|
|
|
store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
2019-01-07 17:17:46 -06:00
|
|
|
} else {
|
|
|
|
|
store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
|
|
|
|
|
store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
|
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
|
|
|
|
|
nir_intrinsic_set_write_mask(store, write_mask);
|
|
|
|
|
|
2020-08-20 14:01:23 +01:00
|
|
|
if (nir_intrinsic_has_access(store))
|
2019-04-09 17:28:13 +01:00
|
|
|
nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
|
2018-11-27 21:31:42 -06:00
|
|
|
|
2020-08-24 10:57:57 -05:00
|
|
|
nir_intrinsic_set_align(store, align_mul, align_offset);
|
2018-11-27 21:31:42 -06:00
|
|
|
|
|
|
|
|
assert(value->num_components == 1 ||
|
|
|
|
|
value->num_components == intrin->num_components);
|
|
|
|
|
store->num_components = value->num_components;
|
2019-01-09 14:56:02 -06:00
|
|
|
|
|
|
|
|
assert(value->bit_size % 8 == 0);
|
|
|
|
|
|
|
|
|
|
if (addr_format_needs_bounds_check(addr_format)) {
|
2020-10-05 14:46:36 -07:00
|
|
|
/* TODO: Better handle block_intel. */
|
2019-01-09 14:56:02 -06:00
|
|
|
const unsigned store_size = (value->bit_size / 8) * store->num_components;
|
|
|
|
|
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
|
|
|
|
|
|
|
|
|
|
nir_builder_instr_insert(b, &store->instr);
|
|
|
|
|
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
} else {
|
|
|
|
|
nir_builder_instr_insert(b, &store->instr);
|
|
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
|
2020-08-15 01:54:45 -05:00
|
|
|
nir_ssa_def *addr, nir_address_format addr_format,
|
|
|
|
|
nir_variable_mode modes)
|
2018-11-27 21:31:42 -06:00
|
|
|
{
|
2020-08-15 01:54:45 -05:00
|
|
|
modes = canonicalize_generic_modes(modes);
|
|
|
|
|
|
|
|
|
|
if (util_bitcount(modes) > 1) {
|
|
|
|
|
if (addr_format_is_global(addr_format, modes)) {
|
|
|
|
|
return build_explicit_io_atomic(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_global);
|
|
|
|
|
} else if (modes & nir_var_function_temp) {
|
|
|
|
|
nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
|
|
|
|
|
nir_var_function_temp));
|
|
|
|
|
nir_ssa_def *res1 =
|
|
|
|
|
build_explicit_io_atomic(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_function_temp);
|
|
|
|
|
nir_push_else(b, NULL);
|
|
|
|
|
nir_ssa_def *res2 =
|
|
|
|
|
build_explicit_io_atomic(b, intrin, addr, addr_format,
|
|
|
|
|
modes & ~nir_var_function_temp);
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
return nir_if_phi(b, res1, res2);
|
|
|
|
|
} else {
|
|
|
|
|
nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
|
|
|
|
|
nir_var_mem_shared));
|
|
|
|
|
assert(modes & nir_var_mem_shared);
|
|
|
|
|
nir_ssa_def *res1 =
|
|
|
|
|
build_explicit_io_atomic(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_shared);
|
|
|
|
|
nir_push_else(b, NULL);
|
|
|
|
|
assert(modes & nir_var_mem_global);
|
|
|
|
|
nir_ssa_def *res2 =
|
|
|
|
|
build_explicit_io_atomic(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_global);
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
return nir_if_phi(b, res1, res2);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(util_bitcount(modes) == 1);
|
|
|
|
|
const nir_variable_mode mode = modes;
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
const unsigned num_data_srcs =
|
|
|
|
|
nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_op op;
|
|
|
|
|
switch (mode) {
|
2019-01-16 00:11:23 +01:00
|
|
|
case nir_var_mem_ssbo:
|
2020-08-15 00:39:00 -05:00
|
|
|
if (addr_format_is_global(addr_format, mode))
|
2019-01-07 17:17:46 -06:00
|
|
|
op = global_atomic_for_deref(intrin->intrinsic);
|
|
|
|
|
else
|
|
|
|
|
op = ssbo_atomic_for_deref(intrin->intrinsic);
|
2018-11-27 21:31:42 -06:00
|
|
|
break;
|
2019-01-19 18:50:48 -06:00
|
|
|
case nir_var_mem_global:
|
2020-08-15 00:39:00 -05:00
|
|
|
assert(addr_format_is_global(addr_format, mode));
|
2019-01-19 18:50:48 -06:00
|
|
|
op = global_atomic_for_deref(intrin->intrinsic);
|
|
|
|
|
break;
|
2019-04-09 17:28:13 +01:00
|
|
|
case nir_var_mem_shared:
|
2020-08-15 00:39:00 -05:00
|
|
|
assert(addr_format_is_offset(addr_format, mode));
|
2019-04-09 17:28:13 +01:00
|
|
|
op = shared_atomic_for_deref(intrin->intrinsic);
|
|
|
|
|
break;
|
2018-11-27 21:31:42 -06:00
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported explicit IO variable mode");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
|
|
|
|
|
|
2019-01-07 17:17:46 -06:00
|
|
|
unsigned src = 0;
|
2020-08-15 00:39:00 -05:00
|
|
|
if (addr_format_is_global(addr_format, mode)) {
|
2019-01-07 17:17:46 -06:00
|
|
|
atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
|
2020-08-15 00:39:00 -05:00
|
|
|
} else if (addr_format_is_offset(addr_format, mode)) {
|
2019-04-09 17:28:13 +01:00
|
|
|
assert(addr->num_components == 1);
|
2020-05-25 08:57:14 -07:00
|
|
|
atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
2019-01-07 17:17:46 -06:00
|
|
|
} else {
|
|
|
|
|
atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
|
|
|
|
|
atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
|
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
for (unsigned i = 0; i < num_data_srcs; i++) {
|
2019-01-07 17:17:46 -06:00
|
|
|
atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
2019-03-04 13:04:45 -06:00
|
|
|
/* Global atomics don't have access flags because they assume that the
|
|
|
|
|
* address may be non-uniform.
|
|
|
|
|
*/
|
2020-08-20 14:01:23 +01:00
|
|
|
if (nir_intrinsic_has_access(atomic))
|
2019-03-04 13:04:45 -06:00
|
|
|
nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
assert(intrin->dest.ssa.num_components == 1);
|
|
|
|
|
nir_ssa_dest_init(&atomic->instr, &atomic->dest,
|
|
|
|
|
1, intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
|
|
|
|
|
|
2019-01-09 14:56:02 -06:00
|
|
|
assert(atomic->dest.ssa.bit_size % 8 == 0);
|
|
|
|
|
|
|
|
|
|
if (addr_format_needs_bounds_check(addr_format)) {
|
|
|
|
|
const unsigned atomic_size = atomic->dest.ssa.bit_size / 8;
|
|
|
|
|
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
|
|
|
|
|
|
|
|
|
|
nir_builder_instr_insert(b, &atomic->instr);
|
|
|
|
|
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
return nir_if_phi(b, &atomic->dest.ssa,
|
|
|
|
|
nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size));
|
|
|
|
|
} else {
|
|
|
|
|
nir_builder_instr_insert(b, &atomic->instr);
|
|
|
|
|
return &atomic->dest.ssa;
|
|
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
2019-01-07 18:00:22 -06:00
|
|
|
nir_ssa_def *
|
|
|
|
|
nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
|
|
|
|
|
nir_ssa_def *base_addr,
|
|
|
|
|
nir_address_format addr_format)
|
2018-11-27 21:31:42 -06:00
|
|
|
{
|
|
|
|
|
assert(deref->dest.is_ssa);
|
|
|
|
|
switch (deref->deref_type) {
|
|
|
|
|
case nir_deref_type_var:
|
2020-08-15 00:53:45 -05:00
|
|
|
return build_addr_for_var(b, deref->var, addr_format);
|
2018-11-27 21:31:42 -06:00
|
|
|
|
|
|
|
|
case nir_deref_type_array: {
|
2020-08-27 11:59:54 -05:00
|
|
|
unsigned stride = nir_deref_instr_array_stride(deref);
|
2018-11-27 21:31:42 -06:00
|
|
|
assert(stride > 0);
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
|
2020-05-25 08:57:14 -07:00
|
|
|
index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format));
|
2020-08-14 18:20:12 -05:00
|
|
|
return build_addr_iadd(b, base_addr, addr_format, deref->modes,
|
2019-09-26 10:32:00 -07:00
|
|
|
nir_amul_imm(b, index, stride));
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_ptr_as_array: {
|
|
|
|
|
nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
|
2020-05-25 08:57:14 -07:00
|
|
|
index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format));
|
2020-08-27 11:59:54 -05:00
|
|
|
unsigned stride = nir_deref_instr_array_stride(deref);
|
2020-08-14 18:20:12 -05:00
|
|
|
return build_addr_iadd(b, base_addr, addr_format, deref->modes,
|
2019-09-26 10:32:00 -07:00
|
|
|
nir_amul_imm(b, index, stride));
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_array_wildcard:
|
|
|
|
|
unreachable("Wildcards should be lowered by now");
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_struct: {
|
|
|
|
|
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
|
|
|
|
int offset = glsl_get_struct_field_offset(parent->type,
|
|
|
|
|
deref->strct.index);
|
|
|
|
|
assert(offset >= 0);
|
2020-08-14 18:20:12 -05:00
|
|
|
return build_addr_iadd_imm(b, base_addr, addr_format,
|
|
|
|
|
deref->modes, offset);
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_cast:
|
|
|
|
|
/* Nothing to do here */
|
2019-01-07 18:00:22 -06:00
|
|
|
return base_addr;
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
2019-01-07 18:00:22 -06:00
|
|
|
unreachable("Invalid NIR deref type");
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
2019-01-07 18:00:22 -06:00
|
|
|
void
|
|
|
|
|
nir_lower_explicit_io_instr(nir_builder *b,
|
|
|
|
|
nir_intrinsic_instr *intrin,
|
|
|
|
|
nir_ssa_def *addr,
|
|
|
|
|
nir_address_format addr_format)
|
2018-11-27 21:31:42 -06:00
|
|
|
{
|
|
|
|
|
b->cursor = nir_after_instr(&intrin->instr);
|
|
|
|
|
|
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
|
|
|
|
unsigned vec_stride = glsl_get_explicit_stride(deref->type);
|
|
|
|
|
unsigned scalar_size = type_scalar_size_bytes(deref->type);
|
|
|
|
|
assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
|
|
|
|
|
assert(vec_stride == 0 || vec_stride >= scalar_size);
|
|
|
|
|
|
2020-08-24 10:57:57 -05:00
|
|
|
uint32_t align_mul, align_offset;
|
|
|
|
|
if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) {
|
|
|
|
|
/* If we don't have an alignment from the deref, assume scalar */
|
|
|
|
|
align_mul = scalar_size;
|
|
|
|
|
align_offset = 0;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-05 14:25:10 -07:00
|
|
|
switch (intrin->intrinsic) {
|
|
|
|
|
case nir_intrinsic_load_deref: {
|
2018-11-27 21:31:42 -06:00
|
|
|
nir_ssa_def *value;
|
|
|
|
|
if (vec_stride > scalar_size) {
|
2020-09-08 17:56:54 -07:00
|
|
|
nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { NULL, };
|
2018-11-27 21:31:42 -06:00
|
|
|
for (unsigned i = 0; i < intrin->num_components; i++) {
|
2020-08-24 10:57:57 -05:00
|
|
|
unsigned comp_offset = i * vec_stride;
|
2018-11-27 21:31:42 -06:00
|
|
|
nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
|
2020-08-14 18:20:12 -05:00
|
|
|
deref->modes,
|
2020-08-24 10:57:57 -05:00
|
|
|
comp_offset);
|
2018-11-27 21:31:42 -06:00
|
|
|
comps[i] = build_explicit_io_load(b, intrin, comp_addr,
|
2020-08-15 01:54:45 -05:00
|
|
|
addr_format, deref->modes,
|
|
|
|
|
align_mul,
|
2020-08-24 10:57:57 -05:00
|
|
|
(align_offset + comp_offset) %
|
|
|
|
|
align_mul,
|
|
|
|
|
1);
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
value = nir_vec(b, comps, intrin->num_components);
|
|
|
|
|
} else {
|
|
|
|
|
value = build_explicit_io_load(b, intrin, addr, addr_format,
|
2020-08-15 01:54:45 -05:00
|
|
|
deref->modes, align_mul, align_offset,
|
2018-11-27 21:31:42 -06:00
|
|
|
intrin->num_components);
|
|
|
|
|
}
|
|
|
|
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
|
2020-10-05 14:25:10 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_intrinsic_store_deref: {
|
2018-11-27 21:31:42 -06:00
|
|
|
assert(intrin->src[1].is_ssa);
|
|
|
|
|
nir_ssa_def *value = intrin->src[1].ssa;
|
|
|
|
|
nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
|
|
|
|
|
if (vec_stride > scalar_size) {
|
|
|
|
|
for (unsigned i = 0; i < intrin->num_components; i++) {
|
|
|
|
|
if (!(write_mask & (1 << i)))
|
|
|
|
|
continue;
|
|
|
|
|
|
2020-08-24 10:57:57 -05:00
|
|
|
unsigned comp_offset = i * vec_stride;
|
2018-11-27 21:31:42 -06:00
|
|
|
nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
|
2020-08-14 18:20:12 -05:00
|
|
|
deref->modes,
|
2020-08-24 10:57:57 -05:00
|
|
|
comp_offset);
|
2018-11-27 21:31:42 -06:00
|
|
|
build_explicit_io_store(b, intrin, comp_addr, addr_format,
|
2020-08-15 01:54:45 -05:00
|
|
|
deref->modes, align_mul,
|
2020-08-24 10:57:57 -05:00
|
|
|
(align_offset + comp_offset) % align_mul,
|
2018-11-27 21:31:42 -06:00
|
|
|
nir_channel(b, value, i), 1);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
build_explicit_io_store(b, intrin, addr, addr_format,
|
2020-08-15 01:54:45 -05:00
|
|
|
deref->modes, align_mul, align_offset,
|
2018-11-27 21:31:42 -06:00
|
|
|
value, write_mask);
|
|
|
|
|
}
|
2020-10-05 14:25:10 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-05 14:46:36 -07:00
|
|
|
case nir_intrinsic_load_deref_block_intel: {
|
|
|
|
|
nir_ssa_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
|
|
|
|
|
deref->modes,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
intrin->num_components);
|
|
|
|
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_intrinsic_store_deref_block_intel: {
|
|
|
|
|
assert(intrin->src[1].is_ssa);
|
|
|
|
|
nir_ssa_def *value = intrin->src[1].ssa;
|
|
|
|
|
const nir_component_mask_t write_mask = 0;
|
|
|
|
|
build_explicit_io_store(b, intrin, addr, addr_format,
|
|
|
|
|
deref->modes, align_mul, align_offset,
|
|
|
|
|
value, write_mask);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-05 14:25:10 -07:00
|
|
|
default: {
|
2018-11-27 21:31:42 -06:00
|
|
|
nir_ssa_def *value =
|
2020-08-15 01:54:45 -05:00
|
|
|
build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
|
2018-11-27 21:31:42 -06:00
|
|
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
|
2020-10-05 14:25:10 -07:00
|
|
|
break;
|
|
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_instr_remove(&intrin->instr);
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-24 10:48:51 -05:00
|
|
|
bool
|
|
|
|
|
nir_get_explicit_deref_align(nir_deref_instr *deref,
|
|
|
|
|
bool default_to_type_align,
|
|
|
|
|
uint32_t *align_mul,
|
|
|
|
|
uint32_t *align_offset)
|
|
|
|
|
{
|
|
|
|
|
if (deref->deref_type == nir_deref_type_var) {
|
|
|
|
|
/* If we see a variable, align_mul is effectively infinite because we
|
|
|
|
|
* know the offset exactly (up to the offset of the base pointer for the
|
|
|
|
|
* given variable mode). We have to pick something so we choose 256B
|
|
|
|
|
* as an arbitrary alignment which seems high enough for any reasonable
|
|
|
|
|
* wide-load use-case. Back-ends should clamp alignments down if 256B
|
|
|
|
|
* is too large for some reason.
|
|
|
|
|
*/
|
|
|
|
|
*align_mul = 256;
|
|
|
|
|
*align_offset = deref->var->data.driver_location % 256;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If we're a cast deref that has an alignment, use that. */
|
|
|
|
|
if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) {
|
|
|
|
|
*align_mul = deref->cast.align_mul;
|
|
|
|
|
*align_offset = deref->cast.align_offset;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Otherwise, we need to compute the alignment based on the parent */
|
|
|
|
|
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
|
|
|
|
if (parent == NULL) {
|
|
|
|
|
assert(deref->deref_type == nir_deref_type_cast);
|
|
|
|
|
if (default_to_type_align) {
|
|
|
|
|
/* If we don't have a parent, assume the type's alignment, if any. */
|
|
|
|
|
unsigned type_align = glsl_get_explicit_alignment(deref->type);
|
|
|
|
|
if (type_align == 0)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
*align_mul = type_align;
|
|
|
|
|
*align_offset = 0;
|
|
|
|
|
return true;
|
|
|
|
|
} else {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t parent_mul, parent_offset;
|
|
|
|
|
if (!nir_get_explicit_deref_align(parent, default_to_type_align,
|
|
|
|
|
&parent_mul, &parent_offset))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
switch (deref->deref_type) {
|
|
|
|
|
case nir_deref_type_var:
|
|
|
|
|
unreachable("Handled above");
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_array:
|
|
|
|
|
case nir_deref_type_array_wildcard:
|
|
|
|
|
case nir_deref_type_ptr_as_array: {
|
|
|
|
|
const unsigned stride = nir_deref_instr_array_stride(deref);
|
|
|
|
|
if (stride == 0)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (deref->deref_type != nir_deref_type_array_wildcard &&
|
|
|
|
|
nir_src_is_const(deref->arr.index)) {
|
|
|
|
|
unsigned offset = nir_src_as_uint(deref->arr.index) * stride;
|
|
|
|
|
*align_mul = parent_mul;
|
|
|
|
|
*align_offset = (parent_offset + offset) % parent_mul;
|
|
|
|
|
} else {
|
|
|
|
|
/* If this is a wildcard or an indirect deref, we have to go with the
|
|
|
|
|
* power-of-two gcd.
|
|
|
|
|
*/
|
2020-09-06 20:09:01 -05:00
|
|
|
*align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1));
|
|
|
|
|
*align_offset = parent_offset % *align_mul;
|
2020-08-24 10:48:51 -05:00
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_struct: {
|
|
|
|
|
const int offset = glsl_get_struct_field_offset(parent->type,
|
|
|
|
|
deref->strct.index);
|
|
|
|
|
if (offset < 0)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
*align_mul = parent_mul;
|
|
|
|
|
*align_offset = (parent_offset + offset) % parent_mul;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_cast:
|
|
|
|
|
/* We handled the explicit alignment case above. */
|
|
|
|
|
assert(deref->cast.align_mul == 0);
|
|
|
|
|
*align_mul = parent_mul;
|
|
|
|
|
*align_offset = parent_offset;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unreachable("Invalid deref_instr_type");
|
|
|
|
|
}
|
|
|
|
|
|
2019-01-07 18:00:22 -06:00
|
|
|
static void
|
|
|
|
|
lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
/* Just delete the deref if it's not used. We can't use
|
|
|
|
|
* nir_deref_instr_remove_if_unused here because it may remove more than
|
|
|
|
|
* one deref which could break our list walking since we walk the list
|
|
|
|
|
* backwards.
|
|
|
|
|
*/
|
2019-10-28 21:27:52 +11:00
|
|
|
assert(list_is_empty(&deref->dest.ssa.if_uses));
|
|
|
|
|
if (list_is_empty(&deref->dest.ssa.uses)) {
|
2019-01-07 18:00:22 -06:00
|
|
|
nir_instr_remove(&deref->instr);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
b->cursor = nir_after_instr(&deref->instr);
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *base_addr = NULL;
|
|
|
|
|
if (deref->deref_type != nir_deref_type_var) {
|
|
|
|
|
assert(deref->parent.is_ssa);
|
|
|
|
|
base_addr = deref->parent.ssa;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
|
|
|
|
|
addr_format);
|
2020-08-21 16:55:02 -05:00
|
|
|
assert(addr->bit_size == deref->dest.ssa.bit_size);
|
|
|
|
|
assert(addr->num_components == deref->dest.ssa.num_components);
|
2019-01-07 18:00:22 -06:00
|
|
|
|
|
|
|
|
nir_instr_remove(&deref->instr);
|
|
|
|
|
nir_ssa_def_rewrite_uses(&deref->dest.ssa, nir_src_for_ssa(addr));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
assert(intrin->src[0].is_ssa);
|
|
|
|
|
nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-10 08:35:00 -05:00
|
|
|
static void
|
|
|
|
|
lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
b->cursor = nir_after_instr(&intrin->instr);
|
|
|
|
|
|
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
|
|
|
|
|
|
|
|
|
assert(glsl_type_is_array(deref->type));
|
|
|
|
|
assert(glsl_get_length(deref->type) == 0);
|
2020-10-30 12:19:25 -05:00
|
|
|
assert(nir_deref_mode_is(deref, nir_var_mem_ssbo));
|
2019-03-10 08:35:00 -05:00
|
|
|
unsigned stride = glsl_get_explicit_stride(deref->type);
|
|
|
|
|
assert(stride > 0);
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *addr = &deref->dest.ssa;
|
|
|
|
|
nir_ssa_def *index = addr_to_index(b, addr, addr_format);
|
|
|
|
|
nir_ssa_def *offset = addr_to_offset(b, addr, addr_format);
|
2020-10-12 15:03:28 +01:00
|
|
|
unsigned access = nir_intrinsic_access(intrin);
|
2019-03-10 08:35:00 -05:00
|
|
|
|
2020-10-12 15:03:28 +01:00
|
|
|
nir_ssa_def *arr_size = nir_get_ssbo_size(b, index, .access=access);
|
2020-12-07 14:44:15 +00:00
|
|
|
arr_size = nir_imax(b, nir_isub(b, arr_size, offset), nir_imm_int(b, 0u));
|
|
|
|
|
arr_size = nir_idiv(b, arr_size, nir_imm_int(b, stride));
|
2019-03-10 08:35:00 -05:00
|
|
|
|
|
|
|
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(arr_size));
|
|
|
|
|
nir_instr_remove(&intrin->instr);
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-15 00:32:46 -05:00
|
|
|
static void
|
|
|
|
|
lower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
if (addr_format_is_global(addr_format, 0)) {
|
|
|
|
|
/* If the address format is always global, then the driver can use
|
|
|
|
|
* global addresses regardless of the mode. In that case, don't create
|
|
|
|
|
* a check, just whack the intrinsic to addr_mode_is and delegate to the
|
|
|
|
|
* driver lowering that.
|
|
|
|
|
*/
|
|
|
|
|
intrin->intrinsic = nir_intrinsic_addr_mode_is;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(intrin->src[0].is_ssa);
|
|
|
|
|
nir_ssa_def *addr = intrin->src[0].ssa;
|
|
|
|
|
|
|
|
|
|
b->cursor = nir_instr_remove(&intrin->instr);
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *is_mode =
|
|
|
|
|
build_runtime_addr_mode_check(b, addr, addr_format,
|
|
|
|
|
nir_intrinsic_memory_modes(intrin));
|
|
|
|
|
|
|
|
|
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(is_mode));
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
static bool
|
|
|
|
|
nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
|
|
|
|
nir_builder b;
|
|
|
|
|
nir_builder_init(&b, impl);
|
|
|
|
|
|
|
|
|
|
/* Walk in reverse order so that we can see the full deref chain when we
|
|
|
|
|
* lower the access operations. We lower them assuming that the derefs
|
|
|
|
|
* will be turned into address calculations later.
|
|
|
|
|
*/
|
|
|
|
|
nir_foreach_block_reverse(block, impl) {
|
|
|
|
|
nir_foreach_instr_reverse_safe(instr, block) {
|
|
|
|
|
switch (instr->type) {
|
|
|
|
|
case nir_instr_type_deref: {
|
|
|
|
|
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
2020-11-01 16:58:27 -06:00
|
|
|
if (nir_deref_mode_is_in_set(deref, modes)) {
|
2018-11-27 21:31:42 -06:00
|
|
|
lower_explicit_io_deref(&b, deref, addr_format);
|
|
|
|
|
progress = true;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_instr_type_intrinsic: {
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
|
switch (intrin->intrinsic) {
|
|
|
|
|
case nir_intrinsic_load_deref:
|
|
|
|
|
case nir_intrinsic_store_deref:
|
2020-10-05 14:46:36 -07:00
|
|
|
case nir_intrinsic_load_deref_block_intel:
|
|
|
|
|
case nir_intrinsic_store_deref_block_intel:
|
2018-11-27 21:31:42 -06:00
|
|
|
case nir_intrinsic_deref_atomic_add:
|
|
|
|
|
case nir_intrinsic_deref_atomic_imin:
|
|
|
|
|
case nir_intrinsic_deref_atomic_umin:
|
|
|
|
|
case nir_intrinsic_deref_atomic_imax:
|
|
|
|
|
case nir_intrinsic_deref_atomic_umax:
|
|
|
|
|
case nir_intrinsic_deref_atomic_and:
|
|
|
|
|
case nir_intrinsic_deref_atomic_or:
|
|
|
|
|
case nir_intrinsic_deref_atomic_xor:
|
|
|
|
|
case nir_intrinsic_deref_atomic_exchange:
|
|
|
|
|
case nir_intrinsic_deref_atomic_comp_swap:
|
|
|
|
|
case nir_intrinsic_deref_atomic_fadd:
|
|
|
|
|
case nir_intrinsic_deref_atomic_fmin:
|
|
|
|
|
case nir_intrinsic_deref_atomic_fmax:
|
|
|
|
|
case nir_intrinsic_deref_atomic_fcomp_swap: {
|
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
2020-11-01 16:58:27 -06:00
|
|
|
if (nir_deref_mode_is_in_set(deref, modes)) {
|
2018-11-27 21:31:42 -06:00
|
|
|
lower_explicit_io_access(&b, intrin, addr_format);
|
|
|
|
|
progress = true;
|
2019-03-10 08:35:00 -05:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_intrinsic_deref_buffer_array_length: {
|
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
2020-11-01 16:58:27 -06:00
|
|
|
if (nir_deref_mode_is_in_set(deref, modes)) {
|
2019-03-10 08:35:00 -05:00
|
|
|
lower_explicit_io_array_length(&b, intrin, addr_format);
|
|
|
|
|
progress = true;
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-15 00:32:46 -05:00
|
|
|
case nir_intrinsic_deref_mode_is: {
|
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
|
|
|
|
if (nir_deref_mode_is_in_set(deref, modes)) {
|
|
|
|
|
lower_explicit_io_mode_check(&b, intrin, addr_format);
|
|
|
|
|
progress = true;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
/* Nothing to do */
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (progress) {
|
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_block_index |
|
|
|
|
|
nir_metadata_dominance);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-11 16:08:06 -05:00
|
|
|
/** Lower explicitly laid out I/O access to byte offset/address intrinsics
|
|
|
|
|
*
|
|
|
|
|
* This pass is intended to be used for any I/O which touches memory external
|
|
|
|
|
* to the shader or which is directly visible to the client. It requires that
|
|
|
|
|
* all data types in the given modes have a explicit stride/offset decorations
|
|
|
|
|
* to tell it exactly how to calculate the offset/address for the given load,
|
|
|
|
|
* store, or atomic operation. If the offset/stride information does not come
|
|
|
|
|
* from the client explicitly (as with shared variables in GL or Vulkan),
|
|
|
|
|
* nir_lower_vars_to_explicit_types() can be used to add them.
|
|
|
|
|
*
|
|
|
|
|
* Unlike nir_lower_io, this pass is fully capable of handling incomplete
|
|
|
|
|
* pointer chains which may contain cast derefs. It does so by walking the
|
|
|
|
|
* deref chain backwards and simply replacing each deref, one at a time, with
|
|
|
|
|
* the appropriate address calculation. The pass takes a nir_address_format
|
|
|
|
|
* parameter which describes how the offset or address is to be represented
|
|
|
|
|
* during calculations. By ensuring that the address is always in a
|
|
|
|
|
* consistent format, pointers can safely be conjured from thin air by the
|
|
|
|
|
* driver, stored to variables, passed through phis, etc.
|
|
|
|
|
*
|
|
|
|
|
* The one exception to the simple algorithm described above is for handling
|
|
|
|
|
* row-major matrices in which case we may look down one additional level of
|
|
|
|
|
* the deref chain.
|
2020-11-01 16:58:27 -06:00
|
|
|
*
|
2020-08-15 00:57:14 -05:00
|
|
|
* This pass is also capable of handling OpenCL generic pointers. If the
|
|
|
|
|
* address mode is global, it will lowering any ambiguous (more than one mode)
|
|
|
|
|
* access to global and passing through the deref_mode_is run-time checks as
|
|
|
|
|
* addr_mode_is. This assumes the driver has somehow mapped shared and
|
|
|
|
|
* scratch memory to the global address space. For other modes such as
|
|
|
|
|
* 62bit_generic, there is an enum embedded in the address and we lower
|
|
|
|
|
* ambiguous access to an if-ladder and deref_mode_is to a check against the
|
|
|
|
|
* embedded enum. If nir_lower_explicit_io is called on any shader that
|
|
|
|
|
* contains generic pointers, it must either be used on all of the generic
|
|
|
|
|
* modes or none.
|
2020-06-11 16:08:06 -05:00
|
|
|
*/
|
2018-11-27 21:31:42 -06:00
|
|
|
bool
|
|
|
|
|
nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
|
|
|
|
nir_foreach_function(function, shader) {
|
|
|
|
|
if (function->impl &&
|
|
|
|
|
nir_lower_explicit_io_impl(function->impl, modes, addr_format))
|
|
|
|
|
progress = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-09 20:18:11 +01:00
|
|
|
static bool
|
|
|
|
|
nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
|
|
|
|
|
nir_variable_mode modes,
|
|
|
|
|
glsl_type_size_align_func type_info)
|
|
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
|
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
|
nir_foreach_instr(instr, block) {
|
|
|
|
|
if (instr->type != nir_instr_type_deref)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
2020-11-01 16:58:27 -06:00
|
|
|
if (!nir_deref_mode_is_in_set(deref, modes))
|
2019-04-09 20:18:11 +01:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
unsigned size, alignment;
|
|
|
|
|
const struct glsl_type *new_type =
|
|
|
|
|
glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
|
|
|
|
|
if (new_type != deref->type) {
|
|
|
|
|
progress = true;
|
|
|
|
|
deref->type = new_type;
|
|
|
|
|
}
|
|
|
|
|
if (deref->deref_type == nir_deref_type_cast) {
|
|
|
|
|
/* See also glsl_type::get_explicit_type_for_size_align() */
|
|
|
|
|
unsigned new_stride = align(size, alignment);
|
|
|
|
|
if (new_stride != deref->cast.ptr_stride) {
|
|
|
|
|
deref->cast.ptr_stride = new_stride;
|
|
|
|
|
progress = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (progress) {
|
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_block_index |
|
|
|
|
|
nir_metadata_dominance |
|
|
|
|
|
nir_metadata_live_ssa_defs |
|
|
|
|
|
nir_metadata_loop_analysis);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
lower_vars_to_explicit(nir_shader *shader,
|
|
|
|
|
struct exec_list *vars, nir_variable_mode mode,
|
|
|
|
|
glsl_type_size_align_func type_info)
|
|
|
|
|
{
|
|
|
|
|
bool progress = false;
|
2020-05-26 12:20:20 -07:00
|
|
|
unsigned offset;
|
|
|
|
|
switch (mode) {
|
2020-08-27 16:00:00 -05:00
|
|
|
case nir_var_uniform:
|
|
|
|
|
assert(shader->info.stage == MESA_SHADER_KERNEL);
|
|
|
|
|
offset = 0;
|
|
|
|
|
break;
|
2020-05-26 12:20:20 -07:00
|
|
|
case nir_var_function_temp:
|
|
|
|
|
case nir_var_shader_temp:
|
|
|
|
|
offset = shader->scratch_size;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_shared:
|
|
|
|
|
offset = 0;
|
|
|
|
|
break;
|
2020-08-18 14:43:39 -05:00
|
|
|
case nir_var_mem_constant:
|
|
|
|
|
offset = shader->constant_data_size;
|
|
|
|
|
break;
|
2020-08-04 19:24:24 -05:00
|
|
|
case nir_var_shader_call_data:
|
2020-07-28 18:01:41 -05:00
|
|
|
case nir_var_ray_hit_attrib:
|
|
|
|
|
offset = 0;
|
|
|
|
|
break;
|
2020-05-26 12:20:20 -07:00
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported mode");
|
|
|
|
|
}
|
2020-07-20 16:30:37 -05:00
|
|
|
nir_foreach_variable_in_list(var, vars) {
|
|
|
|
|
if (var->data.mode != mode)
|
|
|
|
|
continue;
|
|
|
|
|
|
2019-04-09 20:18:11 +01:00
|
|
|
unsigned size, align;
|
|
|
|
|
const struct glsl_type *explicit_type =
|
|
|
|
|
glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align);
|
|
|
|
|
|
2020-08-18 14:43:39 -05:00
|
|
|
if (explicit_type != var->type)
|
2019-04-09 20:18:11 +01:00
|
|
|
var->type = explicit_type;
|
|
|
|
|
|
2020-10-08 02:27:24 -05:00
|
|
|
assert(util_is_power_of_two_nonzero(align));
|
2019-04-09 20:18:11 +01:00
|
|
|
var->data.driver_location = ALIGN_POT(offset, align);
|
|
|
|
|
offset = var->data.driver_location + size;
|
2020-08-18 14:43:39 -05:00
|
|
|
progress = true;
|
2019-04-09 20:18:11 +01:00
|
|
|
}
|
|
|
|
|
|
2020-05-26 12:20:20 -07:00
|
|
|
switch (mode) {
|
2020-08-27 16:00:00 -05:00
|
|
|
case nir_var_uniform:
|
|
|
|
|
assert(shader->info.stage == MESA_SHADER_KERNEL);
|
|
|
|
|
shader->num_uniforms = offset;
|
|
|
|
|
break;
|
2020-05-26 12:20:20 -07:00
|
|
|
case nir_var_shader_temp:
|
|
|
|
|
case nir_var_function_temp:
|
|
|
|
|
shader->scratch_size = offset;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_shared:
|
2019-04-09 20:18:11 +01:00
|
|
|
shader->info.cs.shared_size = offset;
|
2020-08-31 13:04:50 -05:00
|
|
|
shader->shared_size = offset;
|
2020-05-26 12:20:20 -07:00
|
|
|
break;
|
2020-08-18 14:43:39 -05:00
|
|
|
case nir_var_mem_constant:
|
|
|
|
|
shader->constant_data_size = offset;
|
|
|
|
|
break;
|
2020-08-04 19:24:24 -05:00
|
|
|
case nir_var_shader_call_data:
|
2020-07-28 18:01:41 -05:00
|
|
|
case nir_var_ray_hit_attrib:
|
|
|
|
|
break;
|
2020-05-26 12:20:20 -07:00
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported mode");
|
2019-04-09 20:18:11 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-01 16:58:27 -06:00
|
|
|
/* If nir_lower_vars_to_explicit_types is called on any shader that contains
|
|
|
|
|
* generic pointers, it must either be used on all of the generic modes or
|
|
|
|
|
* none.
|
|
|
|
|
*/
|
2019-04-09 20:18:11 +01:00
|
|
|
bool
|
|
|
|
|
nir_lower_vars_to_explicit_types(nir_shader *shader,
|
|
|
|
|
nir_variable_mode modes,
|
|
|
|
|
glsl_type_size_align_func type_info)
|
|
|
|
|
{
|
|
|
|
|
/* TODO: Situations which need to be handled to support more modes:
|
|
|
|
|
* - row-major matrices
|
|
|
|
|
* - compact shader inputs/outputs
|
|
|
|
|
* - interface types
|
|
|
|
|
*/
|
2020-08-27 16:12:57 -05:00
|
|
|
ASSERTED nir_variable_mode supported =
|
2020-11-03 11:34:18 -06:00
|
|
|
nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant |
|
2020-07-28 18:01:41 -05:00
|
|
|
nir_var_shader_temp | nir_var_function_temp | nir_var_uniform |
|
2020-08-04 19:24:24 -05:00
|
|
|
nir_var_shader_call_data | nir_var_ray_hit_attrib;
|
2019-04-09 20:18:11 +01:00
|
|
|
assert(!(modes & ~supported) && "unsupported");
|
|
|
|
|
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
2020-08-27 16:00:00 -05:00
|
|
|
if (modes & nir_var_uniform)
|
|
|
|
|
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info);
|
2019-04-09 20:18:11 +01:00
|
|
|
if (modes & nir_var_mem_shared)
|
2020-07-20 16:30:37 -05:00
|
|
|
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info);
|
2019-04-09 20:18:11 +01:00
|
|
|
if (modes & nir_var_shader_temp)
|
2020-07-20 16:30:37 -05:00
|
|
|
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info);
|
2020-11-03 11:34:18 -06:00
|
|
|
if (modes & nir_var_mem_constant)
|
|
|
|
|
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info);
|
2020-08-04 19:24:24 -05:00
|
|
|
if (modes & nir_var_shader_call_data)
|
|
|
|
|
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info);
|
2020-07-28 18:01:41 -05:00
|
|
|
if (modes & nir_var_ray_hit_attrib)
|
|
|
|
|
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info);
|
2019-04-09 20:18:11 +01:00
|
|
|
|
|
|
|
|
nir_foreach_function(function, shader) {
|
|
|
|
|
if (function->impl) {
|
|
|
|
|
if (modes & nir_var_function_temp)
|
|
|
|
|
progress |= lower_vars_to_explicit(shader, &function->impl->locals, nir_var_function_temp, type_info);
|
|
|
|
|
|
|
|
|
|
progress |= nir_lower_vars_to_explicit_types_impl(function->impl, modes, type_info);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-18 14:43:39 -05:00
|
|
|
static void
|
2020-11-03 11:07:07 -06:00
|
|
|
write_constant(void *dst, size_t dst_size,
|
|
|
|
|
const nir_constant *c, const struct glsl_type *type)
|
2020-08-18 14:43:39 -05:00
|
|
|
{
|
|
|
|
|
if (glsl_type_is_vector_or_scalar(type)) {
|
|
|
|
|
const unsigned num_components = glsl_get_vector_elements(type);
|
|
|
|
|
const unsigned bit_size = glsl_get_bit_size(type);
|
|
|
|
|
if (bit_size == 1) {
|
|
|
|
|
/* Booleans are special-cased to be 32-bit
|
|
|
|
|
*
|
|
|
|
|
* TODO: Make the native bool bit_size an option.
|
|
|
|
|
*/
|
2020-11-03 11:07:07 -06:00
|
|
|
assert(num_components * 4 <= dst_size);
|
2020-08-18 14:43:39 -05:00
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
|
|
|
int32_t b32 = -(int)c->values[i].b;
|
|
|
|
|
memcpy((char *)dst + i * 4, &b32, 4);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
assert(bit_size >= 8 && bit_size % 8 == 0);
|
|
|
|
|
const unsigned byte_size = bit_size / 8;
|
2020-11-03 11:07:07 -06:00
|
|
|
assert(num_components * byte_size <= dst_size);
|
2020-08-18 14:43:39 -05:00
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
|
|
|
/* Annoyingly, thanks to packed structs, we can't make any
|
|
|
|
|
* assumptions about the alignment of dst. To avoid any strange
|
|
|
|
|
* issues with unaligned writes, we always use memcpy.
|
|
|
|
|
*/
|
|
|
|
|
memcpy((char *)dst + i * byte_size, &c->values[i], byte_size);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else if (glsl_type_is_array_or_matrix(type)) {
|
|
|
|
|
const unsigned array_len = glsl_get_length(type);
|
|
|
|
|
const unsigned stride = glsl_get_explicit_stride(type);
|
|
|
|
|
assert(stride > 0);
|
|
|
|
|
const struct glsl_type *elem_type = glsl_get_array_element(type);
|
2020-11-03 11:07:07 -06:00
|
|
|
for (unsigned i = 0; i < array_len; i++) {
|
|
|
|
|
unsigned elem_offset = i * stride;
|
|
|
|
|
assert(elem_offset < dst_size);
|
|
|
|
|
write_constant((char *)dst + elem_offset, dst_size - elem_offset,
|
|
|
|
|
c->elements[i], elem_type);
|
|
|
|
|
}
|
2020-08-18 14:43:39 -05:00
|
|
|
} else {
|
|
|
|
|
assert(glsl_type_is_struct_or_ifc(type));
|
|
|
|
|
const unsigned num_fields = glsl_get_length(type);
|
|
|
|
|
for (unsigned i = 0; i < num_fields; i++) {
|
|
|
|
|
const int field_offset = glsl_get_struct_field_offset(type, i);
|
2020-11-03 11:07:07 -06:00
|
|
|
assert(field_offset >= 0 && field_offset < dst_size);
|
2020-08-18 14:43:39 -05:00
|
|
|
const struct glsl_type *field_type = glsl_get_struct_field(type, i);
|
2020-11-03 11:07:07 -06:00
|
|
|
write_constant((char *)dst + field_offset, dst_size - field_offset,
|
|
|
|
|
c->elements[i], field_type);
|
2020-08-18 14:43:39 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-03 11:33:19 -06:00
|
|
|
void
|
|
|
|
|
nir_gather_explicit_io_initializers(nir_shader *shader,
|
|
|
|
|
void *dst, size_t dst_size,
|
|
|
|
|
nir_variable_mode mode)
|
|
|
|
|
{
|
|
|
|
|
/* It doesn't really make sense to gather initializers for more than one
|
|
|
|
|
* mode at a time. If this ever becomes well-defined, we can drop the
|
|
|
|
|
* assert then.
|
|
|
|
|
*/
|
|
|
|
|
assert(util_bitcount(mode) == 1);
|
|
|
|
|
|
|
|
|
|
nir_foreach_variable_with_modes(var, shader, mode) {
|
|
|
|
|
assert(var->data.driver_location < dst_size);
|
|
|
|
|
write_constant((char *)dst + var->data.driver_location,
|
|
|
|
|
dst_size - var->data.driver_location,
|
|
|
|
|
var->constant_initializer, var->type);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-07 22:35:33 -08:00
|
|
|
/**
|
2017-11-24 18:00:57 +00:00
|
|
|
* Return the offset source for a load/store intrinsic.
|
2015-11-07 22:35:33 -08:00
|
|
|
*/
|
|
|
|
|
nir_src *
|
2015-11-25 14:14:05 -08:00
|
|
|
nir_get_io_offset_src(nir_intrinsic_instr *instr)
|
2015-11-07 22:35:33 -08:00
|
|
|
{
|
|
|
|
|
switch (instr->intrinsic) {
|
2015-11-25 14:14:05 -08:00
|
|
|
case nir_intrinsic_load_input:
|
|
|
|
|
case nir_intrinsic_load_output:
|
2018-11-14 15:36:38 -06:00
|
|
|
case nir_intrinsic_load_shared:
|
2015-11-25 14:14:05 -08:00
|
|
|
case nir_intrinsic_load_uniform:
|
2020-12-22 23:03:19 +13:00
|
|
|
case nir_intrinsic_load_kernel_input:
|
2018-11-19 13:40:35 -06:00
|
|
|
case nir_intrinsic_load_global:
|
2020-08-29 00:59:22 -05:00
|
|
|
case nir_intrinsic_load_global_constant:
|
2016-12-02 11:36:42 -08:00
|
|
|
case nir_intrinsic_load_scratch:
|
2019-04-03 19:29:36 -04:00
|
|
|
case nir_intrinsic_load_fs_input_interp_deltas:
|
2020-08-20 12:28:13 +00:00
|
|
|
case nir_intrinsic_shared_atomic_add:
|
|
|
|
|
case nir_intrinsic_shared_atomic_and:
|
|
|
|
|
case nir_intrinsic_shared_atomic_comp_swap:
|
|
|
|
|
case nir_intrinsic_shared_atomic_exchange:
|
|
|
|
|
case nir_intrinsic_shared_atomic_fadd:
|
|
|
|
|
case nir_intrinsic_shared_atomic_fcomp_swap:
|
|
|
|
|
case nir_intrinsic_shared_atomic_fmax:
|
|
|
|
|
case nir_intrinsic_shared_atomic_fmin:
|
|
|
|
|
case nir_intrinsic_shared_atomic_imax:
|
|
|
|
|
case nir_intrinsic_shared_atomic_imin:
|
|
|
|
|
case nir_intrinsic_shared_atomic_or:
|
|
|
|
|
case nir_intrinsic_shared_atomic_umax:
|
|
|
|
|
case nir_intrinsic_shared_atomic_umin:
|
|
|
|
|
case nir_intrinsic_shared_atomic_xor:
|
|
|
|
|
case nir_intrinsic_global_atomic_add:
|
|
|
|
|
case nir_intrinsic_global_atomic_and:
|
|
|
|
|
case nir_intrinsic_global_atomic_comp_swap:
|
|
|
|
|
case nir_intrinsic_global_atomic_exchange:
|
|
|
|
|
case nir_intrinsic_global_atomic_fadd:
|
|
|
|
|
case nir_intrinsic_global_atomic_fcomp_swap:
|
|
|
|
|
case nir_intrinsic_global_atomic_fmax:
|
|
|
|
|
case nir_intrinsic_global_atomic_fmin:
|
|
|
|
|
case nir_intrinsic_global_atomic_imax:
|
|
|
|
|
case nir_intrinsic_global_atomic_imin:
|
|
|
|
|
case nir_intrinsic_global_atomic_or:
|
|
|
|
|
case nir_intrinsic_global_atomic_umax:
|
|
|
|
|
case nir_intrinsic_global_atomic_umin:
|
|
|
|
|
case nir_intrinsic_global_atomic_xor:
|
2015-11-07 22:35:33 -08:00
|
|
|
return &instr->src[0];
|
2016-04-14 10:31:27 -07:00
|
|
|
case nir_intrinsic_load_ubo:
|
|
|
|
|
case nir_intrinsic_load_ssbo:
|
2020-08-13 11:53:17 -04:00
|
|
|
case nir_intrinsic_load_input_vertex:
|
2015-11-25 14:14:05 -08:00
|
|
|
case nir_intrinsic_load_per_vertex_input:
|
|
|
|
|
case nir_intrinsic_load_per_vertex_output:
|
nir: Add new intrinsics for fragment shader input interpolation.
Backends can normally handle shader inputs solely by looking at
load_input intrinsics, and ignore the nir_variables in nir->inputs.
One exception is fragment shader inputs. load_input doesn't capture
the necessary interpolation information - flat, smooth, noperspective
mode, and centroid, sample, or pixel for the location. This means
that backends have to interpolate based on the nir_variables, then
associate those with the load_input intrinsics (say, by storing a
map of which variables are at which locations).
With GL_ARB_enhanced_layouts, we're going to have multiple varyings
packed into a single vec4 location. The intrinsics make this easy:
simply load N components from location <loc, component>. However,
working with variables and correlating the two is very awkward; we'd
much rather have intrinsics capture all the necessary information.
Fragment shader input interpolation typically works by producing a
set of barycentric coordinates, then using those to do a linear
interpolation between the values at the triangle's corners.
We represent this by introducing five new load_barycentric_* intrinsics:
- load_barycentric_pixel (ordinary variable)
- load_barycentric_centroid (centroid qualified variable)
- load_barycentric_sample (sample qualified variable)
- load_barycentric_at_sample (ARB_gpu_shader5's interpolateAtSample())
- load_barycentric_at_offset (ARB_gpu_shader5's interpolateAtOffset())
Each of these take the interpolation mode (smooth or noperspective only)
as a const_index, and produce a vec2. The last two also take a sample
or offset source.
We then introduce a new load_interpolated_input intrinsic, which
is like a normal load_input intrinsic, but with an additional
barycentric coordinate source.
The intention is that flat inputs will still use regular load_input
intrinsics. This makes them distinguishable from normal inputs that
need fancy interpolation, while also providing all the necessary data.
This nicely unifies regular inputs and interpolateAt functions.
Qualifiers and variables become irrelevant; there are just
load_barycentric intrinsics that determine the interpolation.
v2: Document the interp_mode const_index value, define a new
BARYCENTRIC() helper rather than using SYSTEM_VALUE() for
some of them (requested by Jason Ekstrand).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-07-12 01:46:43 -07:00
|
|
|
case nir_intrinsic_load_interpolated_input:
|
2015-11-25 14:14:05 -08:00
|
|
|
case nir_intrinsic_store_output:
|
2018-11-14 15:36:38 -06:00
|
|
|
case nir_intrinsic_store_shared:
|
2018-11-19 13:40:35 -06:00
|
|
|
case nir_intrinsic_store_global:
|
2016-12-02 11:36:42 -08:00
|
|
|
case nir_intrinsic_store_scratch:
|
2020-02-07 18:44:47 +01:00
|
|
|
case nir_intrinsic_ssbo_atomic_add:
|
|
|
|
|
case nir_intrinsic_ssbo_atomic_imin:
|
|
|
|
|
case nir_intrinsic_ssbo_atomic_umin:
|
|
|
|
|
case nir_intrinsic_ssbo_atomic_imax:
|
|
|
|
|
case nir_intrinsic_ssbo_atomic_umax:
|
|
|
|
|
case nir_intrinsic_ssbo_atomic_and:
|
|
|
|
|
case nir_intrinsic_ssbo_atomic_or:
|
|
|
|
|
case nir_intrinsic_ssbo_atomic_xor:
|
|
|
|
|
case nir_intrinsic_ssbo_atomic_exchange:
|
|
|
|
|
case nir_intrinsic_ssbo_atomic_comp_swap:
|
|
|
|
|
case nir_intrinsic_ssbo_atomic_fadd:
|
|
|
|
|
case nir_intrinsic_ssbo_atomic_fmin:
|
|
|
|
|
case nir_intrinsic_ssbo_atomic_fmax:
|
|
|
|
|
case nir_intrinsic_ssbo_atomic_fcomp_swap:
|
2015-11-07 22:35:33 -08:00
|
|
|
return &instr->src[1];
|
2016-04-14 10:31:27 -07:00
|
|
|
case nir_intrinsic_store_ssbo:
|
2015-11-25 14:14:05 -08:00
|
|
|
case nir_intrinsic_store_per_vertex_output:
|
2015-11-07 22:35:33 -08:00
|
|
|
return &instr->src[2];
|
|
|
|
|
default:
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return the vertex index source for a load/store per_vertex intrinsic.
|
|
|
|
|
*/
|
|
|
|
|
nir_src *
|
|
|
|
|
nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
|
|
|
|
|
{
|
|
|
|
|
switch (instr->intrinsic) {
|
|
|
|
|
case nir_intrinsic_load_per_vertex_input:
|
|
|
|
|
case nir_intrinsic_load_per_vertex_output:
|
|
|
|
|
return &instr->src[0];
|
|
|
|
|
case nir_intrinsic_store_per_vertex_output:
|
|
|
|
|
return &instr->src[1];
|
|
|
|
|
default:
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-05-01 14:44:15 -07:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return the numeric constant that identify a NULL pointer for each address
|
|
|
|
|
* format.
|
|
|
|
|
*/
|
|
|
|
|
const nir_const_value *
|
|
|
|
|
nir_address_format_null_value(nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
|
|
|
|
|
[nir_address_format_32bit_global] = {{0}},
|
|
|
|
|
[nir_address_format_64bit_global] = {{0}},
|
|
|
|
|
[nir_address_format_64bit_bounded_global] = {{0}},
|
|
|
|
|
[nir_address_format_32bit_index_offset] = {{.u32 = ~0}, {.u32 = ~0}},
|
2020-05-25 08:53:00 -07:00
|
|
|
[nir_address_format_32bit_index_offset_pack64] = {{.u64 = ~0ull}},
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
[nir_address_format_vec2_index_32bit_offset] = {{.u32 = ~0}, {.u32 = ~0}, {.u32 = ~0}},
|
2019-05-01 14:44:15 -07:00
|
|
|
[nir_address_format_32bit_offset] = {{.u32 = ~0}},
|
2020-05-25 08:57:14 -07:00
|
|
|
[nir_address_format_32bit_offset_as_64bit] = {{.u64 = ~0ull}},
|
2020-08-15 00:57:14 -05:00
|
|
|
[nir_address_format_62bit_generic] = {{.u64 = 0}},
|
2019-05-01 14:44:15 -07:00
|
|
|
[nir_address_format_logical] = {{.u32 = ~0}},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
assert(addr_format < ARRAY_SIZE(null_values));
|
|
|
|
|
return null_values[addr_format];
|
|
|
|
|
}
|
2019-05-16 15:11:07 -07:00
|
|
|
|
|
|
|
|
nir_ssa_def *
|
|
|
|
|
nir_build_addr_ieq(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
switch (addr_format) {
|
|
|
|
|
case nir_address_format_32bit_global:
|
|
|
|
|
case nir_address_format_64bit_global:
|
|
|
|
|
case nir_address_format_64bit_bounded_global:
|
|
|
|
|
case nir_address_format_32bit_index_offset:
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
case nir_address_format_vec2_index_32bit_offset:
|
2019-05-16 15:11:07 -07:00
|
|
|
case nir_address_format_32bit_offset:
|
2020-08-15 00:57:14 -05:00
|
|
|
case nir_address_format_62bit_generic:
|
2019-05-16 15:11:07 -07:00
|
|
|
return nir_ball_iequal(b, addr0, addr1);
|
|
|
|
|
|
2020-05-25 08:57:14 -07:00
|
|
|
case nir_address_format_32bit_offset_as_64bit:
|
|
|
|
|
assert(addr0->num_components == 1 && addr1->num_components == 1);
|
|
|
|
|
return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1));
|
|
|
|
|
|
2020-05-25 08:53:00 -07:00
|
|
|
case nir_address_format_32bit_index_offset_pack64:
|
|
|
|
|
assert(addr0->num_components == 1 && addr1->num_components == 1);
|
|
|
|
|
return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1));
|
|
|
|
|
|
2019-05-16 15:11:07 -07:00
|
|
|
case nir_address_format_logical:
|
|
|
|
|
unreachable("Unsupported address format");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unreachable("Invalid address format");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *
|
|
|
|
|
nir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
switch (addr_format) {
|
|
|
|
|
case nir_address_format_32bit_global:
|
|
|
|
|
case nir_address_format_64bit_global:
|
|
|
|
|
case nir_address_format_32bit_offset:
|
2020-05-25 08:53:00 -07:00
|
|
|
case nir_address_format_32bit_index_offset_pack64:
|
2020-08-15 00:57:14 -05:00
|
|
|
case nir_address_format_62bit_generic:
|
2019-05-16 15:11:07 -07:00
|
|
|
assert(addr0->num_components == 1);
|
|
|
|
|
assert(addr1->num_components == 1);
|
|
|
|
|
return nir_isub(b, addr0, addr1);
|
|
|
|
|
|
2020-05-25 08:57:14 -07:00
|
|
|
case nir_address_format_32bit_offset_as_64bit:
|
|
|
|
|
assert(addr0->num_components == 1);
|
|
|
|
|
assert(addr1->num_components == 1);
|
|
|
|
|
return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)));
|
|
|
|
|
|
2019-05-16 15:11:07 -07:00
|
|
|
case nir_address_format_64bit_bounded_global:
|
|
|
|
|
return nir_isub(b, addr_to_global(b, addr0, addr_format),
|
|
|
|
|
addr_to_global(b, addr1, addr_format));
|
|
|
|
|
|
|
|
|
|
case nir_address_format_32bit_index_offset:
|
|
|
|
|
assert(addr0->num_components == 2);
|
|
|
|
|
assert(addr1->num_components == 2);
|
|
|
|
|
/* Assume the same buffer index. */
|
|
|
|
|
return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
|
|
|
|
|
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
case nir_address_format_vec2_index_32bit_offset:
|
|
|
|
|
assert(addr0->num_components == 3);
|
|
|
|
|
assert(addr1->num_components == 3);
|
|
|
|
|
/* Assume the same buffer index. */
|
|
|
|
|
return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2));
|
|
|
|
|
|
2019-05-16 15:11:07 -07:00
|
|
|
case nir_address_format_logical:
|
|
|
|
|
unreachable("Unsupported address format");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unreachable("Invalid address format");
|
|
|
|
|
}
|
2019-05-14 12:10:11 +02:00
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
is_input(nir_intrinsic_instr *intrin)
|
|
|
|
|
{
|
|
|
|
|
return intrin->intrinsic == nir_intrinsic_load_input ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
is_output(nir_intrinsic_instr *intrin)
|
|
|
|
|
{
|
|
|
|
|
return intrin->intrinsic == nir_intrinsic_load_output ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_store_output ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_store_per_vertex_output;
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-14 19:33:50 -04:00
|
|
|
static bool is_dual_slot(nir_intrinsic_instr *intrin)
|
|
|
|
|
{
|
|
|
|
|
if (intrin->intrinsic == nir_intrinsic_store_output ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_store_per_vertex_output) {
|
|
|
|
|
return nir_src_bit_size(intrin->src[0]) == 64 &&
|
|
|
|
|
nir_src_num_components(intrin->src[0]) >= 3;
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-01 11:37:07 -04:00
|
|
|
return nir_dest_bit_size(intrin->dest) == 64 &&
|
2020-08-14 19:33:50 -04:00
|
|
|
nir_dest_num_components(intrin->dest) >= 3;
|
|
|
|
|
}
|
2019-05-14 12:10:11 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* This pass adds constant offsets to instr->const_index[0] for input/output
|
|
|
|
|
* intrinsics, and resets the offset source to 0. Non-constant offsets remain
|
|
|
|
|
* unchanged - since we don't know what part of a compound variable is
|
|
|
|
|
* accessed, we allocate storage for the entire thing. For drivers that use
|
|
|
|
|
* nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
|
|
|
|
|
* the offset source will be 0, so that they don't have to add it in manually.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
add_const_offset_to_base_block(nir_block *block, nir_builder *b,
|
2020-09-28 13:13:49 +02:00
|
|
|
nir_variable_mode modes)
|
2019-05-14 12:10:11 +02:00
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
|
|
2020-09-28 13:13:49 +02:00
|
|
|
if (((modes & nir_var_shader_in) && is_input(intrin)) ||
|
|
|
|
|
((modes & nir_var_shader_out) && is_output(intrin))) {
|
2019-05-14 12:10:11 +02:00
|
|
|
nir_src *offset = nir_get_io_offset_src(intrin);
|
|
|
|
|
|
2020-09-29 12:02:41 +02:00
|
|
|
/* TODO: Better handling of per-view variables here */
|
|
|
|
|
if (nir_src_is_const(*offset) &&
|
|
|
|
|
!nir_intrinsic_io_semantics(intrin).per_view) {
|
2020-08-14 19:33:50 -04:00
|
|
|
unsigned off = nir_src_as_uint(*offset);
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off);
|
|
|
|
|
|
|
|
|
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
|
|
|
|
|
sem.location += off;
|
|
|
|
|
/* non-indirect indexing should reduce num_slots */
|
|
|
|
|
sem.num_slots = is_dual_slot(intrin) ? 2 : 1;
|
|
|
|
|
nir_intrinsic_set_io_semantics(intrin, sem);
|
|
|
|
|
|
2019-05-14 12:10:11 +02:00
|
|
|
b->cursor = nir_before_instr(&intrin->instr);
|
|
|
|
|
nir_instr_rewrite_src(&intrin->instr, offset,
|
|
|
|
|
nir_src_for_ssa(nir_imm_int(b, 0)));
|
|
|
|
|
progress = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
2020-09-28 13:13:49 +02:00
|
|
|
nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes)
|
2019-05-14 12:10:11 +02:00
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
|
|
|
|
nir_foreach_function(f, nir) {
|
|
|
|
|
if (f->impl) {
|
|
|
|
|
nir_builder b;
|
|
|
|
|
nir_builder_init(&b, f->impl);
|
|
|
|
|
nir_foreach_block(block, f->impl) {
|
2020-09-28 13:13:49 +02:00
|
|
|
progress |= add_const_offset_to_base_block(block, &b, modes);
|
2019-05-14 12:10:11 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|