2014-12-01 22:01:05 -08:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2014 Intel Corporation
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This lowering pass converts references to input/output variables with
|
|
|
|
|
* loads/stores to actual input/output intrinsics.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "nir.h"
|
2015-08-12 11:26:34 -07:00
|
|
|
#include "nir_builder.h"
|
2018-03-26 18:01:12 -07:00
|
|
|
#include "nir_deref.h"
|
2022-01-01 05:32:14 -05:00
|
|
|
#include "nir_xfb_info.h"
|
2014-12-01 22:01:05 -08:00
|
|
|
|
2019-04-09 20:18:11 +01:00
|
|
|
#include "util/u_math.h"
|
|
|
|
|
|
2014-12-01 22:01:05 -08:00
|
|
|
struct lower_io_state {
|
2018-03-17 19:27:36 -07:00
|
|
|
void *dead_ctx;
|
2015-08-12 11:26:34 -07:00
|
|
|
nir_builder builder;
|
2019-03-29 12:39:48 +11:00
|
|
|
int (*type_size)(const struct glsl_type *type, bool);
|
2016-04-11 13:43:27 -07:00
|
|
|
nir_variable_mode modes;
|
2016-09-14 10:29:38 -07:00
|
|
|
nir_lower_io_options options;
|
2014-12-01 22:01:05 -08:00
|
|
|
};
|
|
|
|
|
|
2019-01-07 17:17:46 -06:00
|
|
|
static nir_intrinsic_op
|
|
|
|
|
ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
|
|
|
|
|
{
|
|
|
|
|
switch (deref_op) {
|
2023-08-08 12:00:35 -05:00
|
|
|
case nir_intrinsic_deref_atomic:
|
|
|
|
|
return nir_intrinsic_ssbo_atomic;
|
|
|
|
|
case nir_intrinsic_deref_atomic_swap:
|
|
|
|
|
return nir_intrinsic_ssbo_atomic_swap;
|
2019-01-07 17:17:46 -06:00
|
|
|
default:
|
|
|
|
|
unreachable("Invalid SSBO atomic");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_intrinsic_op
|
2022-06-22 11:28:53 +02:00
|
|
|
global_atomic_for_deref(nir_address_format addr_format,
|
|
|
|
|
nir_intrinsic_op deref_op)
|
2019-01-07 17:17:46 -06:00
|
|
|
{
|
|
|
|
|
switch (deref_op) {
|
2023-05-15 10:31:11 -04:00
|
|
|
case nir_intrinsic_deref_atomic:
|
|
|
|
|
if (addr_format != nir_address_format_2x32bit_global)
|
|
|
|
|
return nir_intrinsic_global_atomic;
|
|
|
|
|
else
|
|
|
|
|
return nir_intrinsic_global_atomic_2x32;
|
|
|
|
|
|
|
|
|
|
case nir_intrinsic_deref_atomic_swap:
|
|
|
|
|
if (addr_format != nir_address_format_2x32bit_global)
|
|
|
|
|
return nir_intrinsic_global_atomic_swap;
|
|
|
|
|
else
|
|
|
|
|
return nir_intrinsic_global_atomic_swap_2x32;
|
|
|
|
|
|
2019-01-07 17:17:46 -06:00
|
|
|
default:
|
|
|
|
|
unreachable("Invalid SSBO atomic");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-09 17:28:13 +01:00
|
|
|
static nir_intrinsic_op
|
|
|
|
|
shared_atomic_for_deref(nir_intrinsic_op deref_op)
|
|
|
|
|
{
|
|
|
|
|
switch (deref_op) {
|
2023-08-08 12:00:35 -05:00
|
|
|
case nir_intrinsic_deref_atomic:
|
|
|
|
|
return nir_intrinsic_shared_atomic;
|
|
|
|
|
case nir_intrinsic_deref_atomic_swap:
|
|
|
|
|
return nir_intrinsic_shared_atomic_swap;
|
2019-04-09 17:28:13 +01:00
|
|
|
default:
|
|
|
|
|
unreachable("Invalid shared atomic");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-23 07:54:58 +02:00
|
|
|
static nir_intrinsic_op
|
|
|
|
|
task_payload_atomic_for_deref(nir_intrinsic_op deref_op)
|
|
|
|
|
{
|
|
|
|
|
switch (deref_op) {
|
2023-05-15 10:31:11 -04:00
|
|
|
case nir_intrinsic_deref_atomic:
|
|
|
|
|
return nir_intrinsic_task_payload_atomic;
|
|
|
|
|
case nir_intrinsic_deref_atomic_swap:
|
|
|
|
|
return nir_intrinsic_task_payload_atomic_swap;
|
2022-05-23 07:54:58 +02:00
|
|
|
default:
|
|
|
|
|
unreachable("Invalid task payload atomic");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-18 15:04:15 -07:00
|
|
|
void
|
2020-07-20 14:51:04 -05:00
|
|
|
nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
|
|
|
|
|
unsigned *size,
|
2019-03-29 12:39:48 +11:00
|
|
|
int (*type_size)(const struct glsl_type *, bool))
|
2014-12-01 22:01:05 -08:00
|
|
|
{
|
|
|
|
|
unsigned location = 0;
|
|
|
|
|
|
2020-07-20 14:51:04 -05:00
|
|
|
nir_foreach_variable_with_modes(var, shader, mode) {
|
2016-10-25 10:23:25 +11:00
|
|
|
var->data.driver_location = location;
|
2019-03-29 12:39:48 +11:00
|
|
|
bool bindless_type_size = var->data.mode == nir_var_shader_in ||
|
|
|
|
|
var->data.mode == nir_var_shader_out ||
|
|
|
|
|
var->data.bindless;
|
|
|
|
|
location += type_size(var->type, bindless_type_size);
|
2014-12-01 22:01:05 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*size = location;
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-01 00:46:19 -07:00
|
|
|
/**
|
2021-04-27 14:49:40 -07:00
|
|
|
* Some inputs and outputs are arrayed, meaning that there is an extra level
|
|
|
|
|
* of array indexing to handle mismatches between the shader interface and the
|
|
|
|
|
* dispatch pattern of the shader. For instance, geometry shaders are
|
|
|
|
|
* executed per-primitive while their inputs and outputs are specified
|
|
|
|
|
* per-vertex so all inputs and outputs have to be additionally indexed with
|
|
|
|
|
* the vertex index within the primitive.
|
2015-10-01 00:46:19 -07:00
|
|
|
*/
|
2016-09-25 22:19:07 -07:00
|
|
|
bool
|
2021-04-27 14:49:40 -07:00
|
|
|
nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage)
|
2015-10-01 00:46:19 -07:00
|
|
|
{
|
2016-09-25 22:19:07 -07:00
|
|
|
if (var->data.patch || !glsl_type_is_array(var->type))
|
|
|
|
|
return false;
|
2015-09-30 17:17:35 -07:00
|
|
|
|
2021-10-21 11:24:20 +02:00
|
|
|
if (stage == MESA_SHADER_MESH) {
|
|
|
|
|
/* NV_mesh_shader: this is flat array for the whole workgroup. */
|
|
|
|
|
if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES)
|
2022-02-23 15:01:05 +01:00
|
|
|
return var->data.per_primitive;
|
2021-10-21 11:24:20 +02:00
|
|
|
}
|
|
|
|
|
|
2022-05-27 09:16:09 +02:00
|
|
|
if (var->data.mode == nir_var_shader_in) {
|
|
|
|
|
if (var->data.per_vertex) {
|
|
|
|
|
assert(stage == MESA_SHADER_FRAGMENT);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-25 22:19:07 -07:00
|
|
|
return stage == MESA_SHADER_GEOMETRY ||
|
|
|
|
|
stage == MESA_SHADER_TESS_CTRL ||
|
|
|
|
|
stage == MESA_SHADER_TESS_EVAL;
|
2022-05-27 09:16:09 +02:00
|
|
|
}
|
2015-10-01 00:46:19 -07:00
|
|
|
|
2016-09-25 22:19:07 -07:00
|
|
|
if (var->data.mode == nir_var_shader_out)
|
2021-04-30 00:01:07 -07:00
|
|
|
return stage == MESA_SHADER_TESS_CTRL ||
|
|
|
|
|
stage == MESA_SHADER_MESH;
|
2016-09-25 22:19:07 -07:00
|
|
|
|
|
|
|
|
return false;
|
2015-10-02 00:11:01 -07:00
|
|
|
}
|
|
|
|
|
|
2023-03-13 00:18:47 -04:00
|
|
|
static bool
|
|
|
|
|
uses_high_dvec2_semantic(struct lower_io_state *state,
|
|
|
|
|
const nir_variable *var)
|
|
|
|
|
{
|
|
|
|
|
return state->builder.shader->info.stage == MESA_SHADER_VERTEX &&
|
|
|
|
|
state->options & nir_lower_io_lower_64bit_to_32_new &&
|
|
|
|
|
var->data.mode == nir_var_shader_in &&
|
|
|
|
|
glsl_type_is_dual_slot(glsl_without_array(var->type));
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
static unsigned
|
|
|
|
|
get_number_of_slots(struct lower_io_state *state,
|
|
|
|
|
const nir_variable *var)
|
2020-08-11 23:48:12 -04:00
|
|
|
{
|
|
|
|
|
const struct glsl_type *type = var->type;
|
|
|
|
|
|
2021-04-27 14:49:40 -07:00
|
|
|
if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) {
|
2020-08-11 23:48:12 -04:00
|
|
|
assert(glsl_type_is_array(type));
|
|
|
|
|
type = glsl_get_array_element(type);
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-23 15:01:05 +01:00
|
|
|
/* NV_mesh_shader:
|
|
|
|
|
* PRIMITIVE_INDICES is a flat array, not a proper arrayed output,
|
|
|
|
|
* as opposed to D3D-style mesh shaders where it's addressed by
|
|
|
|
|
* the primitive index.
|
|
|
|
|
* Prevent assigning several slots to primitive indices,
|
|
|
|
|
* to avoid some issues.
|
|
|
|
|
*/
|
|
|
|
|
if (state->builder.shader->info.stage == MESA_SHADER_MESH &&
|
|
|
|
|
var->data.location == VARYING_SLOT_PRIMITIVE_INDICES &&
|
|
|
|
|
!nir_is_arrayed_io(var, state->builder.shader->info.stage))
|
|
|
|
|
return 1;
|
|
|
|
|
|
2023-03-13 00:18:47 -04:00
|
|
|
return state->type_size(type, var->data.bindless) /
|
|
|
|
|
(uses_high_dvec2_semantic(state, var) ? 2 : 1);
|
2020-08-11 23:48:12 -04:00
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
2018-03-26 18:01:12 -07:00
|
|
|
get_io_offset(nir_builder *b, nir_deref_instr *deref,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def **array_index,
|
2019-03-29 12:39:48 +11:00
|
|
|
int (*type_size)(const struct glsl_type *, bool),
|
|
|
|
|
unsigned *component, bool bts)
|
2014-12-01 22:01:05 -08:00
|
|
|
{
|
2018-03-26 18:01:12 -07:00
|
|
|
nir_deref_path path;
|
|
|
|
|
nir_deref_path_init(&path, deref, NULL);
|
|
|
|
|
|
|
|
|
|
assert(path.path[0]->deref_type == nir_deref_type_var);
|
|
|
|
|
nir_deref_instr **p = &path.path[1];
|
2015-10-01 00:46:19 -07:00
|
|
|
|
2021-04-29 14:50:06 -07:00
|
|
|
/* For arrayed I/O (e.g., per-vertex input arrays in geometry shader
|
|
|
|
|
* inputs), skip the outermost array index. Process the rest normally.
|
2015-10-01 00:46:19 -07:00
|
|
|
*/
|
2021-04-29 14:50:06 -07:00
|
|
|
if (array_index != NULL) {
|
2018-03-26 18:01:12 -07:00
|
|
|
assert((*p)->deref_type == nir_deref_type_array);
|
2023-09-15 10:57:20 -04:00
|
|
|
*array_index = (*p)->arr.index.ssa;
|
2018-03-26 18:01:12 -07:00
|
|
|
p++;
|
2015-10-01 00:46:19 -07:00
|
|
|
}
|
|
|
|
|
|
2024-02-08 13:14:37 -05:00
|
|
|
if (path.path[0]->var->data.compact && nir_src_is_const((*p)->arr.index)) {
|
2018-03-26 18:01:12 -07:00
|
|
|
assert((*p)->deref_type == nir_deref_type_array);
|
|
|
|
|
assert(glsl_type_is_scalar((*p)->type));
|
|
|
|
|
|
2016-10-03 20:32:22 -07:00
|
|
|
/* We always lower indirect dereferences for "compact" array vars. */
|
2018-10-20 09:10:02 -05:00
|
|
|
const unsigned index = nir_src_as_uint((*p)->arr.index);
|
|
|
|
|
const unsigned total_offset = *component + index;
|
2016-10-03 20:32:22 -07:00
|
|
|
const unsigned slot_offset = total_offset / 4;
|
|
|
|
|
*component = total_offset % 4;
|
2019-03-29 12:39:48 +11:00
|
|
|
return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
|
2016-10-03 20:32:22 -07:00
|
|
|
}
|
|
|
|
|
|
2015-11-25 14:14:05 -08:00
|
|
|
/* Just emit code and let constant-folding go to town */
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *offset = nir_imm_int(b, 0);
|
2015-11-25 12:33:38 -08:00
|
|
|
|
2018-03-26 18:01:12 -07:00
|
|
|
for (; *p; p++) {
|
|
|
|
|
if ((*p)->deref_type == nir_deref_type_array) {
|
2019-03-29 12:39:48 +11:00
|
|
|
unsigned size = type_size((*p)->type, bts);
|
2014-12-01 22:01:05 -08:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *mul =
|
2023-09-15 10:57:20 -04:00
|
|
|
nir_amul_imm(b, (*p)->arr.index.ssa, size);
|
2014-12-01 22:01:05 -08:00
|
|
|
|
2018-03-26 18:01:12 -07:00
|
|
|
offset = nir_iadd(b, offset, mul);
|
|
|
|
|
} else if ((*p)->deref_type == nir_deref_type_struct) {
|
|
|
|
|
/* p starts at path[1], so this is safe */
|
|
|
|
|
nir_deref_instr *parent = *(p - 1);
|
2014-12-01 22:01:05 -08:00
|
|
|
|
2015-11-25 14:14:05 -08:00
|
|
|
unsigned field_offset = 0;
|
2018-03-26 18:01:12 -07:00
|
|
|
for (unsigned i = 0; i < (*p)->strct.index; i++) {
|
2019-03-29 12:39:48 +11:00
|
|
|
field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
|
2015-08-12 14:29:25 -07:00
|
|
|
}
|
2018-11-12 15:58:18 -06:00
|
|
|
offset = nir_iadd_imm(b, offset, field_offset);
|
2018-03-26 18:01:12 -07:00
|
|
|
} else {
|
|
|
|
|
unreachable("Unsupported deref type");
|
2014-12-01 22:01:05 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-26 18:01:12 -07:00
|
|
|
nir_deref_path_finish(&path);
|
|
|
|
|
|
2015-11-25 14:14:05 -08:00
|
|
|
return offset;
|
2014-12-01 22:01:05 -08:00
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
2019-07-19 17:10:07 -05:00
|
|
|
emit_load(struct lower_io_state *state,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *array_index, nir_variable *var, nir_def *offset,
|
2019-07-19 17:10:07 -05:00
|
|
|
unsigned component, unsigned num_components, unsigned bit_size,
|
2023-03-13 00:18:47 -04:00
|
|
|
nir_alu_type dest_type, bool high_dvec2)
|
2015-08-12 10:57:31 -07:00
|
|
|
{
|
2019-07-19 15:30:27 -05:00
|
|
|
nir_builder *b = &state->builder;
|
|
|
|
|
const nir_shader *nir = b->shader;
|
2016-07-12 02:07:29 -07:00
|
|
|
nir_variable_mode mode = var->data.mode;
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *barycentric = NULL;
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2015-08-12 10:57:31 -07:00
|
|
|
nir_intrinsic_op op;
|
|
|
|
|
switch (mode) {
|
|
|
|
|
case nir_var_shader_in:
|
2017-09-14 19:52:38 -07:00
|
|
|
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
|
2016-07-12 01:46:53 -07:00
|
|
|
nir->options->use_interpolated_input_intrinsics &&
|
2021-04-29 11:51:57 -07:00
|
|
|
var->data.interpolation != INTERP_MODE_FLAT &&
|
|
|
|
|
!var->data.per_primitive) {
|
2022-05-27 09:16:09 +02:00
|
|
|
if (var->data.interpolation == INTERP_MODE_EXPLICIT ||
|
|
|
|
|
var->data.per_vertex) {
|
2021-04-29 14:50:06 -07:00
|
|
|
assert(array_index != NULL);
|
2020-01-27 11:34:00 +01:00
|
|
|
op = nir_intrinsic_load_input_vertex;
|
|
|
|
|
} else {
|
2021-04-29 14:50:06 -07:00
|
|
|
assert(array_index == NULL);
|
2020-01-27 11:34:00 +01:00
|
|
|
|
|
|
|
|
nir_intrinsic_op bary_op;
|
2021-11-19 13:36:28 -06:00
|
|
|
if (var->data.sample)
|
2020-01-27 11:34:00 +01:00
|
|
|
bary_op = nir_intrinsic_load_barycentric_sample;
|
|
|
|
|
else if (var->data.centroid)
|
|
|
|
|
bary_op = nir_intrinsic_load_barycentric_centroid;
|
|
|
|
|
else
|
|
|
|
|
bary_op = nir_intrinsic_load_barycentric_pixel;
|
|
|
|
|
|
|
|
|
|
barycentric = nir_load_barycentric(&state->builder, bary_op,
|
|
|
|
|
var->data.interpolation);
|
|
|
|
|
op = nir_intrinsic_load_interpolated_input;
|
|
|
|
|
}
|
2016-07-12 01:46:53 -07:00
|
|
|
} else {
|
2023-08-08 12:00:35 -05:00
|
|
|
op = array_index ? nir_intrinsic_load_per_vertex_input : nir_intrinsic_load_input;
|
2016-07-12 01:46:53 -07:00
|
|
|
}
|
2015-08-12 10:57:31 -07:00
|
|
|
break;
|
2015-10-19 11:44:28 -07:00
|
|
|
case nir_var_shader_out:
|
2023-08-08 12:00:35 -05:00
|
|
|
op = !array_index ? nir_intrinsic_load_output : var->data.per_primitive ? nir_intrinsic_load_per_primitive_output
|
|
|
|
|
: nir_intrinsic_load_per_vertex_output;
|
2015-10-19 11:44:28 -07:00
|
|
|
break;
|
2015-08-12 10:57:31 -07:00
|
|
|
case nir_var_uniform:
|
2015-11-25 14:14:05 -08:00
|
|
|
op = nir_intrinsic_load_uniform;
|
2015-08-12 10:57:31 -07:00
|
|
|
break;
|
2016-01-18 09:44:31 -08:00
|
|
|
default:
|
|
|
|
|
unreachable("Unknown variable mode");
|
|
|
|
|
}
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2016-12-24 10:24:29 -08:00
|
|
|
nir_intrinsic_instr *load =
|
|
|
|
|
nir_intrinsic_instr_create(state->builder.shader, op);
|
2019-07-19 17:10:07 -05:00
|
|
|
load->num_components = num_components;
|
2016-07-12 02:07:29 -07:00
|
|
|
|
|
|
|
|
nir_intrinsic_set_base(load, var->data.driver_location);
|
2023-09-26 15:01:03 -05:00
|
|
|
if (nir_intrinsic_has_range(load)) {
|
|
|
|
|
const struct glsl_type *type = var->type;
|
|
|
|
|
if (array_index)
|
|
|
|
|
type = glsl_get_array_element(type);
|
|
|
|
|
unsigned var_size = state->type_size(type, var->data.bindless);
|
|
|
|
|
nir_intrinsic_set_range(load, var_size);
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-12 02:07:29 -07:00
|
|
|
if (mode == nir_var_shader_in || mode == nir_var_shader_out)
|
2016-10-03 20:32:22 -07:00
|
|
|
nir_intrinsic_set_component(load, component);
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2021-10-28 13:47:41 +03:00
|
|
|
if (nir_intrinsic_has_access(load))
|
|
|
|
|
nir_intrinsic_set_access(load, var->data.access);
|
|
|
|
|
|
2021-04-29 14:16:46 -07:00
|
|
|
nir_intrinsic_set_dest_type(load, dest_type);
|
2019-05-31 13:44:40 -04:00
|
|
|
|
2020-08-11 23:48:12 -04:00
|
|
|
if (load->intrinsic != nir_intrinsic_load_uniform) {
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_io_semantics semantics = { 0 };
|
2020-08-11 23:48:12 -04:00
|
|
|
semantics.location = var->data.location;
|
|
|
|
|
semantics.num_slots = get_number_of_slots(state, var);
|
|
|
|
|
semantics.fb_fetch_output = var->data.fb_fetch_output;
|
2020-09-06 00:24:31 -04:00
|
|
|
semantics.medium_precision =
|
|
|
|
|
var->data.precision == GLSL_PRECISION_MEDIUM ||
|
|
|
|
|
var->data.precision == GLSL_PRECISION_LOW;
|
2023-03-13 00:18:47 -04:00
|
|
|
semantics.high_dvec2 = high_dvec2;
|
2020-08-11 23:48:12 -04:00
|
|
|
nir_intrinsic_set_io_semantics(load, semantics);
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-29 14:50:06 -07:00
|
|
|
if (array_index) {
|
|
|
|
|
load->src[0] = nir_src_for_ssa(array_index);
|
2016-07-12 01:46:53 -07:00
|
|
|
load->src[1] = nir_src_for_ssa(offset);
|
|
|
|
|
} else if (barycentric) {
|
|
|
|
|
load->src[0] = nir_src_for_ssa(barycentric);
|
|
|
|
|
load->src[1] = nir_src_for_ssa(offset);
|
|
|
|
|
} else {
|
|
|
|
|
load->src[0] = nir_src_for_ssa(offset);
|
|
|
|
|
}
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def_init(&load->instr, &load->def, num_components, bit_size);
|
2019-07-19 15:30:27 -05:00
|
|
|
nir_builder_instr_insert(b, &load->instr);
|
|
|
|
|
|
2023-08-14 11:56:00 -05:00
|
|
|
return &load->def;
|
2016-01-18 09:44:31 -08:00
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
2019-07-19 17:10:07 -05:00
|
|
|
lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *array_index, nir_variable *var, nir_def *offset,
|
2019-07-19 17:10:07 -05:00
|
|
|
unsigned component, const struct glsl_type *type)
|
|
|
|
|
{
|
2023-08-04 14:59:14 -04:00
|
|
|
const bool lower_double = !glsl_type_is_integer(type) && state->options & nir_lower_io_lower_64bit_float_to_32;
|
2023-08-14 11:56:00 -05:00
|
|
|
if (intrin->def.bit_size == 64 &&
|
2023-03-13 00:18:47 -04:00
|
|
|
(lower_double || (state->options & (nir_lower_io_lower_64bit_to_32_new |
|
|
|
|
|
nir_lower_io_lower_64bit_to_32)))) {
|
2019-07-19 17:10:07 -05:00
|
|
|
nir_builder *b = &state->builder;
|
2023-03-13 00:18:47 -04:00
|
|
|
bool use_high_dvec2_semantic = uses_high_dvec2_semantic(state, var);
|
|
|
|
|
|
|
|
|
|
/* Each slot is a dual slot, so divide the offset within the variable
|
|
|
|
|
* by 2.
|
|
|
|
|
*/
|
|
|
|
|
if (use_high_dvec2_semantic)
|
|
|
|
|
offset = nir_ushr_imm(b, offset, 1);
|
2019-07-19 17:10:07 -05:00
|
|
|
|
|
|
|
|
const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *comp64[4];
|
2019-07-19 17:10:07 -05:00
|
|
|
assert(component == 0 || component == 2);
|
|
|
|
|
unsigned dest_comp = 0;
|
2023-03-13 00:18:47 -04:00
|
|
|
bool high_dvec2 = false;
|
2023-08-14 11:56:00 -05:00
|
|
|
while (dest_comp < intrin->def.num_components) {
|
2019-07-19 17:10:07 -05:00
|
|
|
const unsigned num_comps =
|
2023-08-14 11:56:00 -05:00
|
|
|
MIN2(intrin->def.num_components - dest_comp,
|
2019-07-19 17:10:07 -05:00
|
|
|
(4 - component) / 2);
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *data32 =
|
2021-04-29 14:50:06 -07:00
|
|
|
emit_load(state, array_index, var, offset, component,
|
2023-03-13 00:18:47 -04:00
|
|
|
num_comps * 2, 32, nir_type_uint32, high_dvec2);
|
2019-07-19 17:10:07 -05:00
|
|
|
for (unsigned i = 0; i < num_comps; i++) {
|
|
|
|
|
comp64[dest_comp + i] =
|
|
|
|
|
nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Only the first store has a component offset */
|
|
|
|
|
component = 0;
|
|
|
|
|
dest_comp += num_comps;
|
2023-03-13 00:18:47 -04:00
|
|
|
|
|
|
|
|
if (use_high_dvec2_semantic) {
|
|
|
|
|
/* Increment the offset when we wrap around the dual slot. */
|
|
|
|
|
if (high_dvec2)
|
|
|
|
|
offset = nir_iadd_imm(b, offset, slot_size);
|
|
|
|
|
high_dvec2 = !high_dvec2;
|
|
|
|
|
} else {
|
|
|
|
|
offset = nir_iadd_imm(b, offset, slot_size);
|
|
|
|
|
}
|
2019-07-19 17:10:07 -05:00
|
|
|
}
|
|
|
|
|
|
2023-08-14 11:56:00 -05:00
|
|
|
return nir_vec(b, comp64, intrin->def.num_components);
|
|
|
|
|
} else if (intrin->def.bit_size == 1) {
|
2020-03-27 00:30:25 -05:00
|
|
|
/* Booleans are 32-bit */
|
|
|
|
|
assert(glsl_type_is_boolean(type));
|
|
|
|
|
return nir_b2b1(&state->builder,
|
2021-04-29 14:50:06 -07:00
|
|
|
emit_load(state, array_index, var, offset, component,
|
2023-08-14 11:56:00 -05:00
|
|
|
intrin->def.num_components, 32,
|
2023-03-13 00:18:47 -04:00
|
|
|
nir_type_bool32, false));
|
2019-07-19 17:10:07 -05:00
|
|
|
} else {
|
2021-04-29 14:50:06 -07:00
|
|
|
return emit_load(state, array_index, var, offset, component,
|
2023-08-14 11:56:00 -05:00
|
|
|
intrin->def.num_components,
|
|
|
|
|
intrin->def.bit_size,
|
2023-03-13 00:18:47 -04:00
|
|
|
nir_get_nir_type_for_glsl_type(type), false);
|
2019-07-19 17:10:07 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-19 15:30:27 -05:00
|
|
|
static void
|
2023-08-12 16:17:15 -04:00
|
|
|
emit_store(struct lower_io_state *state, nir_def *data,
|
|
|
|
|
nir_def *array_index, nir_variable *var, nir_def *offset,
|
2019-07-19 17:10:07 -05:00
|
|
|
unsigned component, unsigned num_components,
|
2020-09-30 21:20:53 -05:00
|
|
|
nir_component_mask_t write_mask, nir_alu_type src_type)
|
2016-01-18 09:44:31 -08:00
|
|
|
{
|
2019-07-19 15:30:27 -05:00
|
|
|
nir_builder *b = &state->builder;
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2021-04-29 14:16:46 -07:00
|
|
|
assert(var->data.mode == nir_var_shader_out);
|
2021-04-29 14:50:06 -07:00
|
|
|
nir_intrinsic_op op =
|
2023-08-08 12:00:35 -05:00
|
|
|
!array_index ? nir_intrinsic_store_output : var->data.per_primitive ? nir_intrinsic_store_per_primitive_output
|
|
|
|
|
: nir_intrinsic_store_per_vertex_output;
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2016-12-24 10:24:29 -08:00
|
|
|
nir_intrinsic_instr *store =
|
|
|
|
|
nir_intrinsic_instr_create(state->builder.shader, op);
|
2019-07-19 17:10:07 -05:00
|
|
|
store->num_components = num_components;
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2019-07-19 17:10:07 -05:00
|
|
|
store->src[0] = nir_src_for_ssa(data);
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2023-09-26 15:01:03 -05:00
|
|
|
const struct glsl_type *type = var->type;
|
|
|
|
|
if (array_index)
|
|
|
|
|
type = glsl_get_array_element(type);
|
|
|
|
|
unsigned var_size = state->type_size(type, var->data.bindless);
|
2016-07-12 02:07:29 -07:00
|
|
|
nir_intrinsic_set_base(store, var->data.driver_location);
|
2023-09-26 15:01:03 -05:00
|
|
|
nir_intrinsic_set_range(store, var_size);
|
2021-04-29 14:16:46 -07:00
|
|
|
nir_intrinsic_set_component(store, component);
|
|
|
|
|
nir_intrinsic_set_src_type(store, src_type);
|
2019-05-31 13:44:40 -04:00
|
|
|
|
2019-07-19 17:10:07 -05:00
|
|
|
nir_intrinsic_set_write_mask(store, write_mask);
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2021-10-28 13:47:41 +03:00
|
|
|
if (nir_intrinsic_has_access(store))
|
|
|
|
|
nir_intrinsic_set_access(store, var->data.access);
|
|
|
|
|
|
2021-04-29 14:50:06 -07:00
|
|
|
if (array_index)
|
|
|
|
|
store->src[1] = nir_src_for_ssa(array_index);
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2021-04-29 14:50:06 -07:00
|
|
|
store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset);
|
2016-07-12 02:07:29 -07:00
|
|
|
|
2020-08-11 23:48:12 -04:00
|
|
|
unsigned gs_streams = 0;
|
|
|
|
|
if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) {
|
|
|
|
|
if (var->data.stream & NIR_STREAM_PACKED) {
|
|
|
|
|
gs_streams = var->data.stream & ~NIR_STREAM_PACKED;
|
|
|
|
|
} else {
|
|
|
|
|
assert(var->data.stream < 4);
|
|
|
|
|
gs_streams = 0;
|
|
|
|
|
for (unsigned i = 0; i < num_components; ++i)
|
|
|
|
|
gs_streams |= var->data.stream << (2 * i);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_io_semantics semantics = { 0 };
|
2020-08-11 23:48:12 -04:00
|
|
|
semantics.location = var->data.location;
|
|
|
|
|
semantics.num_slots = get_number_of_slots(state, var);
|
|
|
|
|
semantics.dual_source_blend_index = var->data.index;
|
|
|
|
|
semantics.gs_streams = gs_streams;
|
2020-09-06 00:24:31 -04:00
|
|
|
semantics.medium_precision =
|
|
|
|
|
var->data.precision == GLSL_PRECISION_MEDIUM ||
|
|
|
|
|
var->data.precision == GLSL_PRECISION_LOW;
|
2020-09-02 11:20:06 +02:00
|
|
|
semantics.per_view = var->data.per_view;
|
2021-12-14 12:22:48 +01:00
|
|
|
semantics.invariant = var->data.invariant;
|
2021-10-21 11:24:20 +02:00
|
|
|
|
2020-08-11 23:48:12 -04:00
|
|
|
nir_intrinsic_set_io_semantics(store, semantics);
|
|
|
|
|
|
2019-07-19 15:30:27 -05:00
|
|
|
nir_builder_instr_insert(b, &store->instr);
|
2015-08-12 10:57:31 -07:00
|
|
|
}
|
|
|
|
|
|
2019-07-19 17:10:07 -05:00
|
|
|
static void
|
|
|
|
|
lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *array_index, nir_variable *var, nir_def *offset,
|
2019-07-19 17:10:07 -05:00
|
|
|
unsigned component, const struct glsl_type *type)
|
|
|
|
|
{
|
2023-08-04 14:59:14 -04:00
|
|
|
const bool lower_double = !glsl_type_is_integer(type) && state->options & nir_lower_io_lower_64bit_float_to_32;
|
2019-07-19 17:10:07 -05:00
|
|
|
if (intrin->src[1].ssa->bit_size == 64 &&
|
2023-03-13 00:18:47 -04:00
|
|
|
(lower_double || (state->options & (nir_lower_io_lower_64bit_to_32 |
|
|
|
|
|
nir_lower_io_lower_64bit_to_32_new)))) {
|
2019-07-19 17:10:07 -05:00
|
|
|
nir_builder *b = &state->builder;
|
|
|
|
|
|
|
|
|
|
const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
|
|
|
|
|
|
|
|
|
|
assert(component == 0 || component == 2);
|
|
|
|
|
unsigned src_comp = 0;
|
|
|
|
|
nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
|
|
|
|
|
while (src_comp < intrin->num_components) {
|
|
|
|
|
const unsigned num_comps =
|
|
|
|
|
MIN2(intrin->num_components - src_comp,
|
|
|
|
|
(4 - component) / 2);
|
|
|
|
|
|
|
|
|
|
if (write_mask & BITFIELD_MASK(num_comps)) {
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *data =
|
2019-07-19 17:10:07 -05:00
|
|
|
nir_channels(b, intrin->src[1].ssa,
|
|
|
|
|
BITFIELD_RANGE(src_comp, num_comps));
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *data32 = nir_bitcast_vector(b, data, 32);
|
2019-07-19 17:10:07 -05:00
|
|
|
|
2023-10-11 13:52:04 -07:00
|
|
|
uint32_t write_mask32 = 0;
|
2019-07-19 17:10:07 -05:00
|
|
|
for (unsigned i = 0; i < num_comps; i++) {
|
|
|
|
|
if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
|
|
|
|
|
write_mask32 |= 3 << (i * 2);
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-29 14:50:06 -07:00
|
|
|
emit_store(state, data32, array_index, var, offset,
|
2019-07-19 17:10:07 -05:00
|
|
|
component, data32->num_components, write_mask32,
|
|
|
|
|
nir_type_uint32);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Only the first store has a component offset */
|
|
|
|
|
component = 0;
|
|
|
|
|
src_comp += num_comps;
|
|
|
|
|
write_mask >>= num_comps;
|
|
|
|
|
offset = nir_iadd_imm(b, offset, slot_size);
|
|
|
|
|
}
|
2023-08-14 11:56:00 -05:00
|
|
|
} else if (intrin->def.bit_size == 1) {
|
2020-03-27 00:30:25 -05:00
|
|
|
/* Booleans are 32-bit */
|
|
|
|
|
assert(glsl_type_is_boolean(type));
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
|
2021-04-29 14:50:06 -07:00
|
|
|
emit_store(state, b32_val, array_index, var, offset,
|
2020-03-27 00:30:25 -05:00
|
|
|
component, intrin->num_components,
|
|
|
|
|
nir_intrinsic_write_mask(intrin),
|
|
|
|
|
nir_type_bool32);
|
2019-07-19 17:10:07 -05:00
|
|
|
} else {
|
2021-04-29 14:50:06 -07:00
|
|
|
emit_store(state, intrin->src[1].ssa, array_index, var, offset,
|
2019-07-19 17:10:07 -05:00
|
|
|
component, intrin->num_components,
|
|
|
|
|
nir_intrinsic_write_mask(intrin),
|
|
|
|
|
nir_get_nir_type_for_glsl_type(type));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
2016-07-12 01:46:53 -07:00
|
|
|
lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_variable *var, nir_def *offset, unsigned component,
|
2019-05-31 13:44:40 -04:00
|
|
|
const struct glsl_type *type)
|
2016-07-12 01:46:53 -07:00
|
|
|
{
|
2019-07-19 15:30:27 -05:00
|
|
|
nir_builder *b = &state->builder;
|
2016-07-12 01:46:53 -07:00
|
|
|
assert(var->data.mode == nir_var_shader_in);
|
|
|
|
|
|
2020-01-27 11:34:00 +01:00
|
|
|
/* Ignore interpolateAt() for flat variables - flat is flat. Lower
|
|
|
|
|
* interpolateAtVertex() for explicit variables.
|
|
|
|
|
*/
|
|
|
|
|
if (var->data.interpolation == INTERP_MODE_FLAT ||
|
|
|
|
|
var->data.interpolation == INTERP_MODE_EXPLICIT) {
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *vertex_index = NULL;
|
2020-01-27 11:34:00 +01:00
|
|
|
|
|
|
|
|
if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
|
|
|
|
|
assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
|
|
|
|
|
vertex_index = intrin->src[1].ssa;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return lower_load(intrin, state, vertex_index, var, offset, component, type);
|
|
|
|
|
}
|
2016-07-21 17:42:01 -07:00
|
|
|
|
2019-07-19 17:10:07 -05:00
|
|
|
/* None of the supported APIs allow interpolation on 64-bit things */
|
2023-08-14 11:56:00 -05:00
|
|
|
assert(intrin->def.bit_size <= 32);
|
2019-07-19 17:10:07 -05:00
|
|
|
|
2016-07-12 01:46:53 -07:00
|
|
|
nir_intrinsic_op bary_op;
|
|
|
|
|
switch (intrin->intrinsic) {
|
2018-03-17 19:27:36 -07:00
|
|
|
case nir_intrinsic_interp_deref_at_centroid:
|
2021-11-19 13:36:28 -06:00
|
|
|
bary_op = nir_intrinsic_load_barycentric_centroid;
|
2016-07-12 01:46:53 -07:00
|
|
|
break;
|
2018-03-17 19:27:36 -07:00
|
|
|
case nir_intrinsic_interp_deref_at_sample:
|
2016-07-12 01:46:53 -07:00
|
|
|
bary_op = nir_intrinsic_load_barycentric_at_sample;
|
|
|
|
|
break;
|
2018-03-17 19:27:36 -07:00
|
|
|
case nir_intrinsic_interp_deref_at_offset:
|
2016-07-12 01:46:53 -07:00
|
|
|
bary_op = nir_intrinsic_load_barycentric_at_offset;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Bogus interpolateAt() intrinsic.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *bary_setup =
|
2016-12-24 10:24:29 -08:00
|
|
|
nir_intrinsic_instr_create(state->builder.shader, bary_op);
|
2016-07-12 01:46:53 -07:00
|
|
|
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def_init(&bary_setup->instr, &bary_setup->def, 2, 32);
|
2016-07-12 01:46:53 -07:00
|
|
|
nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
|
|
|
|
|
|
2018-03-17 19:27:36 -07:00
|
|
|
if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
|
2020-01-24 16:01:04 +01:00
|
|
|
intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
|
2023-09-06 13:56:09 +10:00
|
|
|
bary_setup->src[0] = nir_src_for_ssa(intrin->src[1].ssa);
|
2018-03-17 19:27:36 -07:00
|
|
|
|
2019-07-19 15:30:27 -05:00
|
|
|
nir_builder_instr_insert(b, &bary_setup->instr);
|
2016-07-12 01:46:53 -07:00
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_io_semantics semantics = { 0 };
|
2020-08-11 23:48:12 -04:00
|
|
|
semantics.location = var->data.location;
|
|
|
|
|
semantics.num_slots = get_number_of_slots(state, var);
|
2020-09-06 00:24:31 -04:00
|
|
|
semantics.medium_precision =
|
|
|
|
|
var->data.precision == GLSL_PRECISION_MEDIUM ||
|
|
|
|
|
var->data.precision == GLSL_PRECISION_LOW;
|
2016-07-12 01:46:53 -07:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *load =
|
2021-01-01 20:37:53 +01:00
|
|
|
nir_load_interpolated_input(&state->builder,
|
2023-08-14 11:56:00 -05:00
|
|
|
intrin->def.num_components,
|
|
|
|
|
intrin->def.bit_size,
|
|
|
|
|
&bary_setup->def,
|
2021-01-01 20:37:53 +01:00
|
|
|
offset,
|
|
|
|
|
.base = var->data.driver_location,
|
|
|
|
|
.component = component,
|
2022-10-14 21:27:21 -04:00
|
|
|
.io_semantics = semantics,
|
2023-08-14 11:56:00 -05:00
|
|
|
.dest_type = nir_type_float | intrin->def.bit_size);
|
2021-01-01 20:37:53 +01:00
|
|
|
|
|
|
|
|
return load;
|
2016-07-12 01:46:53 -07:00
|
|
|
}
|
|
|
|
|
|
2014-12-01 22:01:05 -08:00
|
|
|
static bool
|
2016-04-08 16:16:56 -04:00
|
|
|
nir_lower_io_block(nir_block *block,
|
|
|
|
|
struct lower_io_state *state)
|
2014-12-01 22:01:05 -08:00
|
|
|
{
|
2015-11-25 12:33:38 -08:00
|
|
|
nir_builder *b = &state->builder;
|
2016-07-12 01:46:53 -07:00
|
|
|
const nir_shader_compiler_options *options = b->shader->options;
|
2017-03-09 11:01:22 -08:00
|
|
|
bool progress = false;
|
2015-11-25 12:33:38 -08:00
|
|
|
|
2016-04-26 18:34:19 -07:00
|
|
|
nir_foreach_instr_safe(instr, block) {
|
2014-12-01 22:01:05 -08:00
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
|
|
2016-01-18 09:59:19 -08:00
|
|
|
switch (intrin->intrinsic) {
|
2018-03-17 19:27:36 -07:00
|
|
|
case nir_intrinsic_load_deref:
|
|
|
|
|
case nir_intrinsic_store_deref:
|
2016-01-18 09:59:19 -08:00
|
|
|
/* We can lower the io for this nir instrinsic */
|
|
|
|
|
break;
|
2018-03-17 19:27:36 -07:00
|
|
|
case nir_intrinsic_interp_deref_at_centroid:
|
|
|
|
|
case nir_intrinsic_interp_deref_at_sample:
|
|
|
|
|
case nir_intrinsic_interp_deref_at_offset:
|
2020-01-24 16:01:04 +01:00
|
|
|
case nir_intrinsic_interp_deref_at_vertex:
|
2016-07-12 01:46:53 -07:00
|
|
|
/* We can optionally lower these to load_interpolated_input */
|
2020-08-07 10:34:30 +02:00
|
|
|
if (options->use_interpolated_input_intrinsics ||
|
|
|
|
|
options->lower_interpolate_at)
|
2016-07-12 01:46:53 -07:00
|
|
|
break;
|
2020-11-24 11:02:00 +01:00
|
|
|
FALLTHROUGH;
|
2016-01-18 09:59:19 -08:00
|
|
|
default:
|
|
|
|
|
/* We can't lower the io for this nir instrinsic, so skip it */
|
2015-08-12 15:14:35 -07:00
|
|
|
continue;
|
2016-01-18 09:59:19 -08:00
|
|
|
}
|
2015-08-12 15:14:35 -07:00
|
|
|
|
2018-03-26 18:01:12 -07:00
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
2020-11-01 16:58:27 -06:00
|
|
|
if (!nir_deref_mode_is_one_of(deref, state->modes))
|
2015-08-12 15:14:35 -07:00
|
|
|
continue;
|
|
|
|
|
|
2019-07-10 11:55:24 +02:00
|
|
|
nir_variable *var = nir_deref_instr_get_variable(deref);
|
|
|
|
|
|
2015-11-25 12:33:38 -08:00
|
|
|
b->cursor = nir_before_instr(instr);
|
|
|
|
|
|
2021-04-29 14:50:06 -07:00
|
|
|
const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage);
|
2015-10-01 00:46:19 -07:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *offset;
|
|
|
|
|
nir_def *array_index = NULL;
|
2016-10-03 20:32:22 -07:00
|
|
|
unsigned component_offset = var->data.location_frac;
|
2020-10-30 12:30:09 -05:00
|
|
|
bool bindless_type_size = var->data.mode == nir_var_shader_in ||
|
|
|
|
|
var->data.mode == nir_var_shader_out ||
|
2019-03-29 12:39:48 +11:00
|
|
|
var->data.bindless;
|
2015-10-01 00:36:25 -07:00
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
if (nir_deref_instr_is_known_out_of_bounds(deref)) {
|
|
|
|
|
/* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
|
|
|
|
|
*
|
|
|
|
|
* In the subsections described above for array, vector, matrix and
|
|
|
|
|
* structure accesses, any out-of-bounds access produced undefined
|
|
|
|
|
* behavior....
|
|
|
|
|
* Out-of-bounds reads return undefined values, which
|
|
|
|
|
* include values from other variables of the active program or zero.
|
|
|
|
|
* Out-of-bounds writes may be discarded or overwrite
|
|
|
|
|
* other variables of the active program.
|
|
|
|
|
*
|
|
|
|
|
* GL_KHR_robustness and GL_ARB_robustness encourage us to return zero
|
|
|
|
|
* for reads.
|
|
|
|
|
*
|
|
|
|
|
* Otherwise get_io_offset would return out-of-bound offset which may
|
|
|
|
|
* result in out-of-bound loading/storing of inputs/outputs,
|
|
|
|
|
* that could cause issues in drivers down the line.
|
|
|
|
|
*/
|
2020-08-21 13:42:55 +03:00
|
|
|
if (intrin->intrinsic != nir_intrinsic_store_deref) {
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *zero =
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_imm_zero(b, intrin->def.num_components,
|
|
|
|
|
intrin->def.bit_size);
|
|
|
|
|
nir_def_rewrite_uses(&intrin->def,
|
2023-08-12 16:17:15 -04:00
|
|
|
zero);
|
2020-08-21 13:42:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_instr_remove(&intrin->instr);
|
|
|
|
|
progress = true;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-29 14:50:06 -07:00
|
|
|
offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL,
|
2019-03-29 12:39:48 +11:00
|
|
|
state->type_size, &component_offset,
|
|
|
|
|
bindless_type_size);
|
2014-12-01 22:01:05 -08:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *replacement = NULL;
|
2016-07-12 02:30:02 -07:00
|
|
|
|
2016-07-12 02:16:30 -07:00
|
|
|
switch (intrin->intrinsic) {
|
2018-03-17 19:27:36 -07:00
|
|
|
case nir_intrinsic_load_deref:
|
2021-04-29 14:50:06 -07:00
|
|
|
replacement = lower_load(intrin, state, array_index, var, offset,
|
2019-05-31 13:44:40 -04:00
|
|
|
component_offset, deref->type);
|
2014-12-01 22:01:05 -08:00
|
|
|
break;
|
|
|
|
|
|
2018-03-17 19:27:36 -07:00
|
|
|
case nir_intrinsic_store_deref:
|
2021-04-29 14:50:06 -07:00
|
|
|
lower_store(intrin, state, array_index, var, offset,
|
2019-07-19 15:30:27 -05:00
|
|
|
component_offset, deref->type);
|
2014-12-01 22:01:05 -08:00
|
|
|
break;
|
|
|
|
|
|
2018-03-17 19:27:36 -07:00
|
|
|
case nir_intrinsic_interp_deref_at_centroid:
|
|
|
|
|
case nir_intrinsic_interp_deref_at_sample:
|
|
|
|
|
case nir_intrinsic_interp_deref_at_offset:
|
2020-01-24 16:01:04 +01:00
|
|
|
case nir_intrinsic_interp_deref_at_vertex:
|
2021-04-29 14:50:06 -07:00
|
|
|
assert(array_index == NULL);
|
2018-03-17 19:27:36 -07:00
|
|
|
replacement = lower_interpolate_at(intrin, state, var, offset,
|
2019-05-31 13:44:40 -04:00
|
|
|
component_offset, deref->type);
|
2016-07-12 01:46:53 -07:00
|
|
|
break;
|
|
|
|
|
|
2016-07-12 02:30:02 -07:00
|
|
|
default:
|
2016-07-18 22:42:44 -07:00
|
|
|
continue;
|
2016-07-12 02:30:02 -07:00
|
|
|
}
|
|
|
|
|
|
2019-07-19 15:30:27 -05:00
|
|
|
if (replacement) {
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def_rewrite_uses(&intrin->def,
|
2023-08-12 16:17:15 -04:00
|
|
|
replacement);
|
2016-01-18 09:59:19 -08:00
|
|
|
}
|
2016-07-12 02:30:02 -07:00
|
|
|
nir_instr_remove(&intrin->instr);
|
2017-03-09 11:01:22 -08:00
|
|
|
progress = true;
|
2014-12-01 22:01:05 -08:00
|
|
|
}
|
|
|
|
|
|
2017-03-09 11:01:22 -08:00
|
|
|
return progress;
|
2014-12-01 22:01:05 -08:00
|
|
|
}
|
|
|
|
|
|
2017-03-09 11:01:22 -08:00
|
|
|
static bool
|
2015-08-12 15:14:35 -07:00
|
|
|
nir_lower_io_impl(nir_function_impl *impl,
|
2016-04-11 13:43:27 -07:00
|
|
|
nir_variable_mode modes,
|
2019-03-29 12:39:48 +11:00
|
|
|
int (*type_size)(const struct glsl_type *, bool),
|
2016-09-14 10:29:38 -07:00
|
|
|
nir_lower_io_options options)
|
2014-12-01 22:01:05 -08:00
|
|
|
{
|
|
|
|
|
struct lower_io_state state;
|
2017-03-09 11:01:22 -08:00
|
|
|
bool progress = false;
|
2014-12-01 22:01:05 -08:00
|
|
|
|
2023-06-26 10:42:47 -04:00
|
|
|
state.builder = nir_builder_create(impl);
|
2018-03-17 19:27:36 -07:00
|
|
|
state.dead_ctx = ralloc_context(NULL);
|
2016-04-11 13:43:27 -07:00
|
|
|
state.modes = modes;
|
2015-08-12 14:29:25 -07:00
|
|
|
state.type_size = type_size;
|
2016-09-14 10:29:38 -07:00
|
|
|
state.options = options;
|
2014-12-01 22:01:05 -08:00
|
|
|
|
2020-06-10 12:47:50 -05:00
|
|
|
ASSERTED nir_variable_mode supported_modes =
|
2020-06-10 12:51:01 -05:00
|
|
|
nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
|
2020-06-10 12:47:50 -05:00
|
|
|
assert(!(modes & ~supported_modes));
|
|
|
|
|
|
2016-04-08 16:16:56 -04:00
|
|
|
nir_foreach_block(block, impl) {
|
2017-03-09 11:01:22 -08:00
|
|
|
progress |= nir_lower_io_block(block, &state);
|
2016-04-08 16:16:56 -04:00
|
|
|
}
|
2014-12-12 16:25:38 -08:00
|
|
|
|
2018-03-17 19:27:36 -07:00
|
|
|
ralloc_free(state.dead_ctx);
|
|
|
|
|
|
2020-08-15 01:54:45 -05:00
|
|
|
nir_metadata_preserve(impl, nir_metadata_none);
|
|
|
|
|
|
2017-03-09 11:01:22 -08:00
|
|
|
return progress;
|
2014-12-01 22:01:05 -08:00
|
|
|
}
|
|
|
|
|
|
2020-06-11 16:08:06 -05:00
|
|
|
/** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
|
|
|
|
|
*
|
|
|
|
|
* This pass is intended to be used for cross-stage shader I/O and driver-
|
|
|
|
|
* managed uniforms to turn deref-based access into a simpler model using
|
|
|
|
|
* locations or offsets. For fragment shader inputs, it can optionally turn
|
|
|
|
|
* load_deref into an explicit interpolation using barycentrics coming from
|
|
|
|
|
* one of the load_barycentric_* intrinsics. This pass requires that all
|
|
|
|
|
* deref chains are complete and contain no casts.
|
|
|
|
|
*/
|
2017-03-09 11:01:22 -08:00
|
|
|
bool
|
2016-04-11 13:43:27 -07:00
|
|
|
nir_lower_io(nir_shader *shader, nir_variable_mode modes,
|
2019-03-29 12:39:48 +11:00
|
|
|
int (*type_size)(const struct glsl_type *, bool),
|
2016-09-14 10:29:38 -07:00
|
|
|
nir_lower_io_options options)
|
2014-12-01 22:01:05 -08:00
|
|
|
{
|
2017-03-09 11:01:22 -08:00
|
|
|
bool progress = false;
|
|
|
|
|
|
2023-06-22 13:27:59 -04:00
|
|
|
nir_foreach_function_impl(impl, shader) {
|
|
|
|
|
progress |= nir_lower_io_impl(impl, modes, type_size, options);
|
2014-12-01 22:01:05 -08:00
|
|
|
}
|
2017-03-09 11:01:22 -08:00
|
|
|
|
|
|
|
|
return progress;
|
2014-12-01 22:01:05 -08:00
|
|
|
}
|
2015-11-07 22:35:33 -08:00
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
static unsigned
|
|
|
|
|
type_scalar_size_bytes(const struct glsl_type *type)
|
|
|
|
|
{
|
|
|
|
|
assert(glsl_type_is_vector_or_scalar(type) ||
|
|
|
|
|
glsl_type_is_matrix(type));
|
|
|
|
|
return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *
|
|
|
|
|
nir_build_addr_iadd(nir_builder *b, nir_def *addr,
|
2023-01-09 21:15:08 +02:00
|
|
|
nir_address_format addr_format,
|
|
|
|
|
nir_variable_mode modes,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *offset)
|
2018-11-27 21:31:42 -06:00
|
|
|
{
|
|
|
|
|
assert(offset->num_components == 1);
|
|
|
|
|
|
|
|
|
|
switch (addr_format) {
|
2019-01-07 17:17:46 -06:00
|
|
|
case nir_address_format_32bit_global:
|
|
|
|
|
case nir_address_format_64bit_global:
|
2019-05-03 14:34:55 -07:00
|
|
|
case nir_address_format_32bit_offset:
|
2020-05-25 08:57:14 -07:00
|
|
|
assert(addr->bit_size == offset->bit_size);
|
2019-01-07 17:17:46 -06:00
|
|
|
assert(addr->num_components == 1);
|
|
|
|
|
return nir_iadd(b, addr, offset);
|
|
|
|
|
|
2022-06-22 11:28:53 +02:00
|
|
|
case nir_address_format_2x32bit_global: {
|
|
|
|
|
assert(addr->num_components == 2);
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *lo = nir_channel(b, addr, 0);
|
|
|
|
|
nir_def *hi = nir_channel(b, addr, 1);
|
|
|
|
|
nir_def *res_lo = nir_iadd(b, lo, offset);
|
|
|
|
|
nir_def *carry = nir_b2i32(b, nir_ult(b, res_lo, lo));
|
|
|
|
|
nir_def *res_hi = nir_iadd(b, hi, carry);
|
2022-06-22 11:28:53 +02:00
|
|
|
return nir_vec2(b, res_lo, res_hi);
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-25 08:57:14 -07:00
|
|
|
case nir_address_format_32bit_offset_as_64bit:
|
|
|
|
|
assert(addr->num_components == 1);
|
|
|
|
|
assert(offset->bit_size == 32);
|
|
|
|
|
return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset));
|
|
|
|
|
|
2021-01-20 14:27:32 -06:00
|
|
|
case nir_address_format_64bit_global_32bit_offset:
|
2019-01-09 14:56:02 -06:00
|
|
|
case nir_address_format_64bit_bounded_global:
|
|
|
|
|
assert(addr->num_components == 4);
|
2020-05-25 08:57:14 -07:00
|
|
|
assert(addr->bit_size == offset->bit_size);
|
2021-08-19 10:40:17 +01:00
|
|
|
return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 3), offset), 3);
|
2019-01-09 14:56:02 -06:00
|
|
|
|
2019-03-09 10:10:37 -06:00
|
|
|
case nir_address_format_32bit_index_offset:
|
2018-11-27 21:31:42 -06:00
|
|
|
assert(addr->num_components == 2);
|
2020-05-25 08:57:14 -07:00
|
|
|
assert(addr->bit_size == offset->bit_size);
|
2021-08-19 10:40:17 +01:00
|
|
|
return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 1), offset), 1);
|
2020-05-25 08:53:00 -07:00
|
|
|
|
|
|
|
|
case nir_address_format_32bit_index_offset_pack64:
|
|
|
|
|
assert(addr->num_components == 1);
|
|
|
|
|
assert(offset->bit_size == 32);
|
|
|
|
|
return nir_pack_64_2x32_split(b,
|
|
|
|
|
nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset),
|
|
|
|
|
nir_unpack_64_2x32_split_y(b, addr));
|
|
|
|
|
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
case nir_address_format_vec2_index_32bit_offset:
|
|
|
|
|
assert(addr->num_components == 3);
|
2020-05-25 08:57:14 -07:00
|
|
|
assert(offset->bit_size == 32);
|
2021-08-19 10:40:17 +01:00
|
|
|
return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 2), offset), 2);
|
2020-05-25 08:53:00 -07:00
|
|
|
|
2020-08-15 00:57:14 -05:00
|
|
|
case nir_address_format_62bit_generic:
|
|
|
|
|
assert(addr->num_components == 1);
|
|
|
|
|
assert(addr->bit_size == 64);
|
|
|
|
|
assert(offset->bit_size == 64);
|
|
|
|
|
if (!(modes & ~(nir_var_function_temp |
|
|
|
|
|
nir_var_shader_temp |
|
|
|
|
|
nir_var_mem_shared))) {
|
|
|
|
|
/* If we're sure it's one of these modes, we can do an easy 32-bit
|
|
|
|
|
* addition and don't need to bother with 64-bit math.
|
|
|
|
|
*/
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *addr32 = nir_unpack_64_2x32_split_x(b, addr);
|
|
|
|
|
nir_def *type = nir_unpack_64_2x32_split_y(b, addr);
|
2020-08-15 00:57:14 -05:00
|
|
|
addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset));
|
|
|
|
|
return nir_pack_64_2x32_split(b, addr32, type);
|
|
|
|
|
} else {
|
|
|
|
|
return nir_iadd(b, addr, offset);
|
|
|
|
|
}
|
|
|
|
|
|
2019-05-01 13:24:45 -07:00
|
|
|
case nir_address_format_logical:
|
|
|
|
|
unreachable("Unsupported address format");
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
unreachable("Invalid address format");
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-25 08:57:14 -07:00
|
|
|
static unsigned
|
2023-08-12 16:17:15 -04:00
|
|
|
addr_get_offset_bit_size(nir_def *addr, nir_address_format addr_format)
|
2020-05-25 08:57:14 -07:00
|
|
|
{
|
2020-05-25 08:53:00 -07:00
|
|
|
if (addr_format == nir_address_format_32bit_offset_as_64bit ||
|
|
|
|
|
addr_format == nir_address_format_32bit_index_offset_pack64)
|
2020-05-25 08:57:14 -07:00
|
|
|
return 32;
|
|
|
|
|
return addr->bit_size;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *
|
|
|
|
|
nir_build_addr_iadd_imm(nir_builder *b, nir_def *addr,
|
2023-01-09 21:15:08 +02:00
|
|
|
nir_address_format addr_format,
|
|
|
|
|
nir_variable_mode modes,
|
|
|
|
|
int64_t offset)
|
2018-11-27 21:31:42 -06:00
|
|
|
{
|
2023-05-07 11:53:21 +02:00
|
|
|
if (!offset)
|
|
|
|
|
return addr;
|
|
|
|
|
|
2023-01-09 21:15:08 +02:00
|
|
|
return nir_build_addr_iadd(
|
|
|
|
|
b, addr, addr_format, modes,
|
|
|
|
|
nir_imm_intN_t(b, offset,
|
|
|
|
|
addr_get_offset_bit_size(addr, addr_format)));
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
2020-08-15 00:53:45 -05:00
|
|
|
build_addr_for_var(nir_builder *b, nir_variable *var,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared |
|
2022-02-08 02:55:18 +01:00
|
|
|
nir_var_mem_task_payload |
|
2020-11-02 17:58:42 -06:00
|
|
|
nir_var_mem_global |
|
2020-08-18 14:43:39 -05:00
|
|
|
nir_var_shader_temp | nir_var_function_temp |
|
2020-05-27 17:08:28 -05:00
|
|
|
nir_var_mem_push_const | nir_var_mem_constant));
|
2020-08-15 00:53:45 -05:00
|
|
|
|
|
|
|
|
const unsigned num_comps = nir_address_format_num_components(addr_format);
|
|
|
|
|
const unsigned bit_size = nir_address_format_bit_size(addr_format);
|
|
|
|
|
|
|
|
|
|
switch (addr_format) {
|
2022-06-22 11:28:53 +02:00
|
|
|
case nir_address_format_2x32bit_global:
|
2020-08-15 00:53:45 -05:00
|
|
|
case nir_address_format_32bit_global:
|
|
|
|
|
case nir_address_format_64bit_global: {
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *base_addr;
|
2020-08-15 00:53:45 -05:00
|
|
|
switch (var->data.mode) {
|
|
|
|
|
case nir_var_shader_temp:
|
2020-11-30 10:41:35 -06:00
|
|
|
base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0);
|
2020-08-15 00:53:45 -05:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case nir_var_function_temp:
|
2020-11-30 10:41:35 -06:00
|
|
|
base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1);
|
2020-08-15 00:53:45 -05:00
|
|
|
break;
|
|
|
|
|
|
2020-08-18 14:43:39 -05:00
|
|
|
case nir_var_mem_constant:
|
|
|
|
|
base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size);
|
|
|
|
|
break;
|
|
|
|
|
|
2020-08-18 10:27:41 -05:00
|
|
|
case nir_var_mem_shared:
|
|
|
|
|
base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size);
|
|
|
|
|
break;
|
|
|
|
|
|
2020-11-02 17:58:42 -06:00
|
|
|
case nir_var_mem_global:
|
|
|
|
|
base_addr = nir_load_global_base_ptr(b, num_comps, bit_size);
|
|
|
|
|
break;
|
|
|
|
|
|
2020-08-15 00:53:45 -05:00
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported variable mode");
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-09 21:15:08 +02:00
|
|
|
return nir_build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode,
|
|
|
|
|
var->data.driver_location);
|
2020-08-15 00:53:45 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_address_format_32bit_offset:
|
|
|
|
|
assert(var->data.driver_location <= UINT32_MAX);
|
|
|
|
|
return nir_imm_int(b, var->data.driver_location);
|
|
|
|
|
|
|
|
|
|
case nir_address_format_32bit_offset_as_64bit:
|
|
|
|
|
assert(var->data.driver_location <= UINT32_MAX);
|
|
|
|
|
return nir_imm_int64(b, var->data.driver_location);
|
|
|
|
|
|
2020-08-15 00:57:14 -05:00
|
|
|
case nir_address_format_62bit_generic:
|
|
|
|
|
switch (var->data.mode) {
|
|
|
|
|
case nir_var_shader_temp:
|
|
|
|
|
case nir_var_function_temp:
|
|
|
|
|
assert(var->data.driver_location <= UINT32_MAX);
|
|
|
|
|
return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64);
|
|
|
|
|
|
|
|
|
|
case nir_var_mem_shared:
|
|
|
|
|
assert(var->data.driver_location <= UINT32_MAX);
|
|
|
|
|
return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64);
|
|
|
|
|
|
2020-11-02 17:58:42 -06:00
|
|
|
case nir_var_mem_global:
|
|
|
|
|
return nir_iadd_imm(b, nir_load_global_base_ptr(b, num_comps, bit_size),
|
2023-08-08 12:00:35 -05:00
|
|
|
var->data.driver_location);
|
2020-11-02 17:58:42 -06:00
|
|
|
|
2020-08-15 00:57:14 -05:00
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported variable mode");
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-15 00:53:45 -05:00
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported address format");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
|
|
|
|
build_runtime_addr_mode_check(nir_builder *b, nir_def *addr,
|
2020-08-15 00:32:46 -05:00
|
|
|
nir_address_format addr_format,
|
|
|
|
|
nir_variable_mode mode)
|
|
|
|
|
{
|
|
|
|
|
/* The compile-time check failed; do a run-time check */
|
|
|
|
|
switch (addr_format) {
|
2020-08-15 00:57:14 -05:00
|
|
|
case nir_address_format_62bit_generic: {
|
|
|
|
|
assert(addr->num_components == 1);
|
|
|
|
|
assert(addr->bit_size == 64);
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *mode_enum = nir_ushr_imm(b, addr, 62);
|
2020-08-15 00:57:14 -05:00
|
|
|
switch (mode) {
|
|
|
|
|
case nir_var_function_temp:
|
|
|
|
|
case nir_var_shader_temp:
|
|
|
|
|
return nir_ieq_imm(b, mode_enum, 0x2);
|
|
|
|
|
|
|
|
|
|
case nir_var_mem_shared:
|
|
|
|
|
return nir_ieq_imm(b, mode_enum, 0x1);
|
|
|
|
|
|
|
|
|
|
case nir_var_mem_global:
|
|
|
|
|
return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0),
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_ieq_imm(b, mode_enum, 0x3));
|
2020-08-15 00:57:14 -05:00
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Invalid mode check intrinsic");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-15 00:32:46 -05:00
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported address mode");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-22 11:11:16 -08:00
|
|
|
unsigned
|
|
|
|
|
nir_address_format_bit_size(nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
switch (addr_format) {
|
2023-08-08 12:00:35 -05:00
|
|
|
case nir_address_format_32bit_global:
|
|
|
|
|
return 32;
|
|
|
|
|
case nir_address_format_2x32bit_global:
|
|
|
|
|
return 32;
|
|
|
|
|
case nir_address_format_64bit_global:
|
|
|
|
|
return 64;
|
|
|
|
|
case nir_address_format_64bit_global_32bit_offset:
|
|
|
|
|
return 32;
|
|
|
|
|
case nir_address_format_64bit_bounded_global:
|
|
|
|
|
return 32;
|
|
|
|
|
case nir_address_format_32bit_index_offset:
|
|
|
|
|
return 32;
|
|
|
|
|
case nir_address_format_32bit_index_offset_pack64:
|
|
|
|
|
return 64;
|
|
|
|
|
case nir_address_format_vec2_index_32bit_offset:
|
|
|
|
|
return 32;
|
|
|
|
|
case nir_address_format_62bit_generic:
|
|
|
|
|
return 64;
|
|
|
|
|
case nir_address_format_32bit_offset:
|
|
|
|
|
return 32;
|
|
|
|
|
case nir_address_format_32bit_offset_as_64bit:
|
|
|
|
|
return 64;
|
|
|
|
|
case nir_address_format_logical:
|
|
|
|
|
return 32;
|
2021-11-22 11:11:16 -08:00
|
|
|
}
|
|
|
|
|
unreachable("Invalid address format");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned
|
|
|
|
|
nir_address_format_num_components(nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
switch (addr_format) {
|
2023-08-08 12:00:35 -05:00
|
|
|
case nir_address_format_32bit_global:
|
|
|
|
|
return 1;
|
|
|
|
|
case nir_address_format_2x32bit_global:
|
|
|
|
|
return 2;
|
|
|
|
|
case nir_address_format_64bit_global:
|
|
|
|
|
return 1;
|
|
|
|
|
case nir_address_format_64bit_global_32bit_offset:
|
|
|
|
|
return 4;
|
|
|
|
|
case nir_address_format_64bit_bounded_global:
|
|
|
|
|
return 4;
|
|
|
|
|
case nir_address_format_32bit_index_offset:
|
|
|
|
|
return 2;
|
|
|
|
|
case nir_address_format_32bit_index_offset_pack64:
|
|
|
|
|
return 1;
|
|
|
|
|
case nir_address_format_vec2_index_32bit_offset:
|
|
|
|
|
return 3;
|
|
|
|
|
case nir_address_format_62bit_generic:
|
|
|
|
|
return 1;
|
|
|
|
|
case nir_address_format_32bit_offset:
|
|
|
|
|
return 1;
|
|
|
|
|
case nir_address_format_32bit_offset_as_64bit:
|
|
|
|
|
return 1;
|
|
|
|
|
case nir_address_format_logical:
|
|
|
|
|
return 1;
|
2021-11-22 11:11:16 -08:00
|
|
|
}
|
|
|
|
|
unreachable("Invalid address format");
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
|
|
|
|
addr_to_index(nir_builder *b, nir_def *addr,
|
2018-11-27 21:31:42 -06:00
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
2020-05-25 08:53:00 -07:00
|
|
|
switch (addr_format) {
|
|
|
|
|
case nir_address_format_32bit_index_offset:
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
assert(addr->num_components == 2);
|
|
|
|
|
return nir_channel(b, addr, 0);
|
2020-05-25 08:53:00 -07:00
|
|
|
case nir_address_format_32bit_index_offset_pack64:
|
|
|
|
|
return nir_unpack_64_2x32_split_y(b, addr);
|
|
|
|
|
case nir_address_format_vec2_index_32bit_offset:
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
assert(addr->num_components == 3);
|
treewide: Use nir_trim_vector more
Via Coccinelle patches
@@
expression a, b, c;
@@
-nir_channels(b, a, (1 << c) - 1)
+nir_trim_vector(b, a, c)
@@
expression a, b, c;
@@
-nir_channels(b, a, BITFIELD_MASK(c))
+nir_trim_vector(b, a, c)
@@
expression a, b;
@@
-nir_channels(b, a, 3)
+nir_trim_vector(b, a, 2)
@@
expression a, b;
@@
-nir_channels(b, a, 7)
+nir_trim_vector(b, a, 3)
Plus a fixup for pointless trimming an immediate in RADV and radeonsi.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23352>
2023-05-31 21:08:47 -04:00
|
|
|
return nir_trim_vector(b, addr, 2);
|
2023-08-08 12:00:35 -05:00
|
|
|
default:
|
|
|
|
|
unreachable("Invalid address format");
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
|
|
|
|
addr_to_offset(nir_builder *b, nir_def *addr,
|
2018-11-27 21:31:42 -06:00
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
2020-05-25 08:57:14 -07:00
|
|
|
switch (addr_format) {
|
|
|
|
|
case nir_address_format_32bit_index_offset:
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
assert(addr->num_components == 2);
|
|
|
|
|
return nir_channel(b, addr, 1);
|
2020-05-25 08:53:00 -07:00
|
|
|
case nir_address_format_32bit_index_offset_pack64:
|
|
|
|
|
return nir_unpack_64_2x32_split_x(b, addr);
|
2020-05-25 08:57:14 -07:00
|
|
|
case nir_address_format_vec2_index_32bit_offset:
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
assert(addr->num_components == 3);
|
|
|
|
|
return nir_channel(b, addr, 2);
|
2020-05-25 08:57:14 -07:00
|
|
|
case nir_address_format_32bit_offset:
|
|
|
|
|
return addr;
|
|
|
|
|
case nir_address_format_32bit_offset_as_64bit:
|
2020-08-15 00:57:14 -05:00
|
|
|
case nir_address_format_62bit_generic:
|
2020-05-25 08:57:14 -07:00
|
|
|
return nir_u2u32(b, addr);
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Invalid address format");
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
2019-01-07 17:17:46 -06:00
|
|
|
/** Returns true if the given address format resolves to a global address */
|
|
|
|
|
static bool
|
2020-08-15 00:39:00 -05:00
|
|
|
addr_format_is_global(nir_address_format addr_format,
|
|
|
|
|
nir_variable_mode mode)
|
2019-01-07 17:17:46 -06:00
|
|
|
{
|
2020-08-15 00:57:14 -05:00
|
|
|
if (addr_format == nir_address_format_62bit_generic)
|
|
|
|
|
return mode == nir_var_mem_global;
|
|
|
|
|
|
2019-01-07 17:17:46 -06:00
|
|
|
return addr_format == nir_address_format_32bit_global ||
|
2022-06-22 11:28:53 +02:00
|
|
|
addr_format == nir_address_format_2x32bit_global ||
|
2019-01-09 14:56:02 -06:00
|
|
|
addr_format == nir_address_format_64bit_global ||
|
2021-01-20 14:27:32 -06:00
|
|
|
addr_format == nir_address_format_64bit_global_32bit_offset ||
|
2019-01-09 14:56:02 -06:00
|
|
|
addr_format == nir_address_format_64bit_bounded_global;
|
2019-01-07 17:17:46 -06:00
|
|
|
}
|
|
|
|
|
|
2020-07-14 10:38:32 -07:00
|
|
|
static bool
|
2020-08-15 00:39:00 -05:00
|
|
|
addr_format_is_offset(nir_address_format addr_format,
|
|
|
|
|
nir_variable_mode mode)
|
2020-07-14 10:38:32 -07:00
|
|
|
{
|
2020-08-15 00:57:14 -05:00
|
|
|
if (addr_format == nir_address_format_62bit_generic)
|
|
|
|
|
return mode != nir_var_mem_global;
|
|
|
|
|
|
2020-05-25 08:57:14 -07:00
|
|
|
return addr_format == nir_address_format_32bit_offset ||
|
|
|
|
|
addr_format == nir_address_format_32bit_offset_as_64bit;
|
2020-07-14 10:38:32 -07:00
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
|
|
|
|
addr_to_global(nir_builder *b, nir_def *addr,
|
2019-01-07 17:17:46 -06:00
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
switch (addr_format) {
|
|
|
|
|
case nir_address_format_32bit_global:
|
|
|
|
|
case nir_address_format_64bit_global:
|
2020-08-15 00:57:14 -05:00
|
|
|
case nir_address_format_62bit_generic:
|
2019-01-07 17:17:46 -06:00
|
|
|
assert(addr->num_components == 1);
|
|
|
|
|
return addr;
|
|
|
|
|
|
2022-06-22 11:28:53 +02:00
|
|
|
case nir_address_format_2x32bit_global:
|
|
|
|
|
assert(addr->num_components == 2);
|
|
|
|
|
return addr;
|
|
|
|
|
|
2021-01-20 14:27:32 -06:00
|
|
|
case nir_address_format_64bit_global_32bit_offset:
|
2019-01-09 14:56:02 -06:00
|
|
|
case nir_address_format_64bit_bounded_global:
|
|
|
|
|
assert(addr->num_components == 4);
|
treewide: Use nir_trim_vector more
Via Coccinelle patches
@@
expression a, b, c;
@@
-nir_channels(b, a, (1 << c) - 1)
+nir_trim_vector(b, a, c)
@@
expression a, b, c;
@@
-nir_channels(b, a, BITFIELD_MASK(c))
+nir_trim_vector(b, a, c)
@@
expression a, b;
@@
-nir_channels(b, a, 3)
+nir_trim_vector(b, a, 2)
@@
expression a, b;
@@
-nir_channels(b, a, 7)
+nir_trim_vector(b, a, 3)
Plus a fixup for pointless trimming an immediate in RADV and radeonsi.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23352>
2023-05-31 21:08:47 -04:00
|
|
|
return nir_iadd(b, nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)),
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_u2u64(b, nir_channel(b, addr, 3)));
|
2019-01-09 14:56:02 -06:00
|
|
|
|
2019-03-09 10:10:37 -06:00
|
|
|
case nir_address_format_32bit_index_offset:
|
2020-05-25 08:53:00 -07:00
|
|
|
case nir_address_format_32bit_index_offset_pack64:
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
case nir_address_format_vec2_index_32bit_offset:
|
2019-05-03 14:34:55 -07:00
|
|
|
case nir_address_format_32bit_offset:
|
2020-05-25 08:57:14 -07:00
|
|
|
case nir_address_format_32bit_offset_as_64bit:
|
2019-05-01 13:24:45 -07:00
|
|
|
case nir_address_format_logical:
|
2019-01-07 17:17:46 -06:00
|
|
|
unreachable("Cannot get a 64-bit address with this address format");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unreachable("Invalid address format");
|
|
|
|
|
}
|
|
|
|
|
|
2019-01-09 14:56:02 -06:00
|
|
|
static bool
|
|
|
|
|
addr_format_needs_bounds_check(nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
return addr_format == nir_address_format_64bit_bounded_global;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
|
|
|
|
addr_is_in_bounds(nir_builder *b, nir_def *addr,
|
2019-01-09 14:56:02 -06:00
|
|
|
nir_address_format addr_format, unsigned size)
|
|
|
|
|
{
|
|
|
|
|
assert(addr_format == nir_address_format_64bit_bounded_global);
|
|
|
|
|
assert(addr->num_components == 4);
|
2023-01-17 23:42:03 +02:00
|
|
|
assert(size > 0);
|
|
|
|
|
return nir_ult(b, nir_iadd_imm(b, nir_channel(b, addr, 3), size - 1),
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_channel(b, addr, 2));
|
2019-01-09 14:56:02 -06:00
|
|
|
}
|
|
|
|
|
|
2020-09-04 13:25:05 -07:00
|
|
|
static void
|
|
|
|
|
nir_get_explicit_deref_range(nir_deref_instr *deref,
|
|
|
|
|
nir_address_format addr_format,
|
|
|
|
|
uint32_t *out_base,
|
|
|
|
|
uint32_t *out_range)
|
|
|
|
|
{
|
|
|
|
|
uint32_t base = 0;
|
|
|
|
|
uint32_t range = glsl_get_explicit_size(deref->type, false);
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
|
|
|
|
|
|
|
|
|
switch (deref->deref_type) {
|
|
|
|
|
case nir_deref_type_array:
|
|
|
|
|
case nir_deref_type_array_wildcard:
|
|
|
|
|
case nir_deref_type_ptr_as_array: {
|
|
|
|
|
const unsigned stride = nir_deref_instr_array_stride(deref);
|
|
|
|
|
if (stride == 0)
|
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
|
|
if (!parent)
|
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
|
|
if (deref->deref_type != nir_deref_type_array_wildcard &&
|
2020-09-15 23:53:14 +02:00
|
|
|
nir_src_is_const(deref->arr.index)) {
|
2020-09-04 13:25:05 -07:00
|
|
|
base += stride * nir_src_as_uint(deref->arr.index);
|
2020-09-15 23:53:14 +02:00
|
|
|
} else {
|
|
|
|
|
if (glsl_get_length(parent->type) == 0)
|
|
|
|
|
goto fail;
|
2020-09-04 13:25:05 -07:00
|
|
|
range += stride * (glsl_get_length(parent->type) - 1);
|
2020-09-15 23:53:14 +02:00
|
|
|
}
|
2020-09-04 13:25:05 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_struct: {
|
|
|
|
|
if (!parent)
|
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
|
|
base += glsl_get_struct_field_offset(parent->type, deref->strct.index);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_cast: {
|
|
|
|
|
nir_instr *parent_instr = deref->parent.ssa->parent_instr;
|
|
|
|
|
|
|
|
|
|
switch (parent_instr->type) {
|
|
|
|
|
case nir_instr_type_load_const: {
|
|
|
|
|
nir_load_const_instr *load = nir_instr_as_load_const(parent_instr);
|
|
|
|
|
|
|
|
|
|
switch (addr_format) {
|
|
|
|
|
case nir_address_format_32bit_offset:
|
|
|
|
|
base += load->value[1].u32;
|
|
|
|
|
break;
|
|
|
|
|
case nir_address_format_32bit_index_offset:
|
|
|
|
|
base += load->value[1].u32;
|
|
|
|
|
break;
|
|
|
|
|
case nir_address_format_vec2_index_32bit_offset:
|
|
|
|
|
base += load->value[2].u32;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*out_base = base;
|
|
|
|
|
*out_range = range;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_instr_type_intrinsic: {
|
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr);
|
|
|
|
|
switch (intr->intrinsic) {
|
|
|
|
|
case nir_intrinsic_load_vulkan_descriptor:
|
|
|
|
|
/* Assume that a load_vulkan_descriptor won't contribute to an
|
|
|
|
|
* offset within the resource.
|
|
|
|
|
*/
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*out_base = base;
|
|
|
|
|
*out_range = range;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
deref = parent;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fail:
|
|
|
|
|
*out_base = 0;
|
|
|
|
|
*out_range = ~0;
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-15 01:54:45 -05:00
|
|
|
static nir_variable_mode
|
|
|
|
|
canonicalize_generic_modes(nir_variable_mode modes)
|
|
|
|
|
{
|
|
|
|
|
assert(modes != 0);
|
|
|
|
|
if (util_bitcount(modes) == 1)
|
|
|
|
|
return modes;
|
|
|
|
|
|
|
|
|
|
assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp |
|
|
|
|
|
nir_var_mem_shared | nir_var_mem_global)));
|
|
|
|
|
|
|
|
|
|
/* Canonicalize by converting shader_temp to function_temp */
|
|
|
|
|
if (modes & nir_var_shader_temp) {
|
|
|
|
|
modes &= ~nir_var_shader_temp;
|
|
|
|
|
modes |= nir_var_function_temp;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return modes;
|
|
|
|
|
}
|
|
|
|
|
|
2022-06-22 11:28:53 +02:00
|
|
|
static nir_intrinsic_op
|
|
|
|
|
get_store_global_op_from_addr_format(nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
if (addr_format != nir_address_format_2x32bit_global)
|
|
|
|
|
return nir_intrinsic_store_global;
|
|
|
|
|
else
|
|
|
|
|
return nir_intrinsic_store_global_2x32;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_intrinsic_op
|
|
|
|
|
get_load_global_op_from_addr_format(nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
if (addr_format != nir_address_format_2x32bit_global)
|
|
|
|
|
return nir_intrinsic_load_global;
|
|
|
|
|
else
|
|
|
|
|
return nir_intrinsic_load_global_2x32;
|
|
|
|
|
}
|
|
|
|
|
|
2023-09-05 18:09:40 -04:00
|
|
|
static nir_intrinsic_op
|
|
|
|
|
get_load_global_constant_op_from_addr_format(nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
if (addr_format != nir_address_format_2x32bit_global)
|
|
|
|
|
return nir_intrinsic_load_global_constant;
|
|
|
|
|
else
|
|
|
|
|
return nir_intrinsic_load_global_2x32; /* no dedicated op, fallback */
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
2018-11-27 21:31:42 -06:00
|
|
|
build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *addr, nir_address_format addr_format,
|
2020-08-15 01:54:45 -05:00
|
|
|
nir_variable_mode modes,
|
2020-08-24 10:57:57 -05:00
|
|
|
uint32_t align_mul, uint32_t align_offset,
|
2018-11-27 21:31:42 -06:00
|
|
|
unsigned num_components)
|
|
|
|
|
{
|
2020-09-04 13:25:05 -07:00
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
2020-08-15 01:54:45 -05:00
|
|
|
modes = canonicalize_generic_modes(modes);
|
|
|
|
|
|
|
|
|
|
if (util_bitcount(modes) > 1) {
|
|
|
|
|
if (addr_format_is_global(addr_format, modes)) {
|
|
|
|
|
return build_explicit_io_load(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_global,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
num_components);
|
|
|
|
|
} else if (modes & nir_var_function_temp) {
|
|
|
|
|
nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
|
|
|
|
|
nir_var_function_temp));
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *res1 =
|
2020-08-15 01:54:45 -05:00
|
|
|
build_explicit_io_load(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_function_temp,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
num_components);
|
|
|
|
|
nir_push_else(b, NULL);
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *res2 =
|
2020-08-15 01:54:45 -05:00
|
|
|
build_explicit_io_load(b, intrin, addr, addr_format,
|
|
|
|
|
modes & ~nir_var_function_temp,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
num_components);
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
return nir_if_phi(b, res1, res2);
|
|
|
|
|
} else {
|
|
|
|
|
nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
|
|
|
|
|
nir_var_mem_shared));
|
|
|
|
|
assert(modes & nir_var_mem_shared);
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *res1 =
|
2020-08-15 01:54:45 -05:00
|
|
|
build_explicit_io_load(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_shared,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
num_components);
|
|
|
|
|
nir_push_else(b, NULL);
|
|
|
|
|
assert(modes & nir_var_mem_global);
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *res2 =
|
2020-08-15 01:54:45 -05:00
|
|
|
build_explicit_io_load(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_global,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
num_components);
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
return nir_if_phi(b, res1, res2);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(util_bitcount(modes) == 1);
|
|
|
|
|
const nir_variable_mode mode = modes;
|
2018-11-27 21:31:42 -06:00
|
|
|
|
|
|
|
|
nir_intrinsic_op op;
|
2020-10-05 14:46:36 -07:00
|
|
|
switch (intrin->intrinsic) {
|
|
|
|
|
case nir_intrinsic_load_deref:
|
|
|
|
|
switch (mode) {
|
|
|
|
|
case nir_var_mem_ubo:
|
2021-01-15 00:35:19 -06:00
|
|
|
if (addr_format == nir_address_format_64bit_global_32bit_offset)
|
|
|
|
|
op = nir_intrinsic_load_global_constant_offset;
|
|
|
|
|
else if (addr_format == nir_address_format_64bit_bounded_global)
|
|
|
|
|
op = nir_intrinsic_load_global_constant_bounded;
|
|
|
|
|
else if (addr_format_is_global(addr_format, mode))
|
|
|
|
|
op = nir_intrinsic_load_global_constant;
|
|
|
|
|
else
|
|
|
|
|
op = nir_intrinsic_load_ubo;
|
2020-10-05 14:46:36 -07:00
|
|
|
break;
|
|
|
|
|
case nir_var_mem_ssbo:
|
|
|
|
|
if (addr_format_is_global(addr_format, mode))
|
|
|
|
|
op = nir_intrinsic_load_global;
|
|
|
|
|
else
|
|
|
|
|
op = nir_intrinsic_load_ssbo;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_global:
|
2020-08-15 00:39:00 -05:00
|
|
|
assert(addr_format_is_global(addr_format, mode));
|
2022-06-22 11:28:53 +02:00
|
|
|
op = get_load_global_op_from_addr_format(addr_format);
|
2020-10-05 14:46:36 -07:00
|
|
|
break;
|
|
|
|
|
case nir_var_uniform:
|
|
|
|
|
assert(addr_format_is_offset(addr_format, mode));
|
|
|
|
|
assert(b->shader->info.stage == MESA_SHADER_KERNEL);
|
|
|
|
|
op = nir_intrinsic_load_kernel_input;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_shared:
|
|
|
|
|
assert(addr_format_is_offset(addr_format, mode));
|
|
|
|
|
op = nir_intrinsic_load_shared;
|
|
|
|
|
break;
|
2022-02-08 02:55:18 +01:00
|
|
|
case nir_var_mem_task_payload:
|
|
|
|
|
assert(addr_format_is_offset(addr_format, mode));
|
|
|
|
|
op = nir_intrinsic_load_task_payload;
|
|
|
|
|
break;
|
2020-10-05 14:46:36 -07:00
|
|
|
case nir_var_shader_temp:
|
|
|
|
|
case nir_var_function_temp:
|
|
|
|
|
if (addr_format_is_offset(addr_format, mode)) {
|
|
|
|
|
op = nir_intrinsic_load_scratch;
|
|
|
|
|
} else {
|
|
|
|
|
assert(addr_format_is_global(addr_format, mode));
|
2022-06-22 11:28:53 +02:00
|
|
|
op = get_load_global_op_from_addr_format(addr_format);
|
2020-10-05 14:46:36 -07:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_push_const:
|
|
|
|
|
assert(addr_format == nir_address_format_32bit_offset);
|
|
|
|
|
op = nir_intrinsic_load_push_constant;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_constant:
|
|
|
|
|
if (addr_format_is_offset(addr_format, mode)) {
|
|
|
|
|
op = nir_intrinsic_load_constant;
|
|
|
|
|
} else {
|
|
|
|
|
assert(addr_format_is_global(addr_format, mode));
|
2023-09-05 18:09:40 -04:00
|
|
|
op = get_load_global_constant_op_from_addr_format(addr_format);
|
2020-10-05 14:46:36 -07:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported explicit IO variable mode");
|
2020-07-14 13:32:19 -05:00
|
|
|
}
|
2020-05-26 12:21:33 -07:00
|
|
|
break;
|
2020-10-05 14:46:36 -07:00
|
|
|
|
|
|
|
|
case nir_intrinsic_load_deref_block_intel:
|
|
|
|
|
switch (mode) {
|
|
|
|
|
case nir_var_mem_ssbo:
|
|
|
|
|
if (addr_format_is_global(addr_format, mode))
|
|
|
|
|
op = nir_intrinsic_load_global_block_intel;
|
|
|
|
|
else
|
|
|
|
|
op = nir_intrinsic_load_ssbo_block_intel;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_global:
|
|
|
|
|
op = nir_intrinsic_load_global_block_intel;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_shared:
|
|
|
|
|
op = nir_intrinsic_load_shared_block_intel;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported explicit IO variable mode");
|
2020-08-18 14:43:39 -05:00
|
|
|
}
|
|
|
|
|
break;
|
2020-10-05 14:46:36 -07:00
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
default:
|
2020-10-05 14:46:36 -07:00
|
|
|
unreachable("Invalid intrinsic");
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
|
|
|
|
|
|
2021-01-15 00:35:19 -06:00
|
|
|
if (op == nir_intrinsic_load_global_constant_offset) {
|
|
|
|
|
assert(addr_format == nir_address_format_64bit_global_32bit_offset);
|
|
|
|
|
load->src[0] = nir_src_for_ssa(
|
treewide: Use nir_trim_vector more
Via Coccinelle patches
@@
expression a, b, c;
@@
-nir_channels(b, a, (1 << c) - 1)
+nir_trim_vector(b, a, c)
@@
expression a, b, c;
@@
-nir_channels(b, a, BITFIELD_MASK(c))
+nir_trim_vector(b, a, c)
@@
expression a, b;
@@
-nir_channels(b, a, 3)
+nir_trim_vector(b, a, 2)
@@
expression a, b;
@@
-nir_channels(b, a, 7)
+nir_trim_vector(b, a, 3)
Plus a fixup for pointless trimming an immediate in RADV and radeonsi.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23352>
2023-05-31 21:08:47 -04:00
|
|
|
nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)));
|
2021-01-15 00:35:19 -06:00
|
|
|
load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
|
|
|
|
|
} else if (op == nir_intrinsic_load_global_constant_bounded) {
|
|
|
|
|
assert(addr_format == nir_address_format_64bit_bounded_global);
|
|
|
|
|
load->src[0] = nir_src_for_ssa(
|
treewide: Use nir_trim_vector more
Via Coccinelle patches
@@
expression a, b, c;
@@
-nir_channels(b, a, (1 << c) - 1)
+nir_trim_vector(b, a, c)
@@
expression a, b, c;
@@
-nir_channels(b, a, BITFIELD_MASK(c))
+nir_trim_vector(b, a, c)
@@
expression a, b;
@@
-nir_channels(b, a, 3)
+nir_trim_vector(b, a, 2)
@@
expression a, b;
@@
-nir_channels(b, a, 7)
+nir_trim_vector(b, a, 3)
Plus a fixup for pointless trimming an immediate in RADV and radeonsi.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23352>
2023-05-31 21:08:47 -04:00
|
|
|
nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)));
|
2021-01-15 00:35:19 -06:00
|
|
|
load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
|
|
|
|
|
load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2));
|
|
|
|
|
} else if (addr_format_is_global(addr_format, mode)) {
|
2019-01-07 17:17:46 -06:00
|
|
|
load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
|
2020-08-15 00:39:00 -05:00
|
|
|
} else if (addr_format_is_offset(addr_format, mode)) {
|
2019-04-09 17:28:13 +01:00
|
|
|
assert(addr->num_components == 1);
|
2020-05-25 08:57:14 -07:00
|
|
|
load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
2019-01-07 17:17:46 -06:00
|
|
|
} else {
|
|
|
|
|
load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
|
|
|
|
|
load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
|
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
|
2020-08-20 14:01:23 +01:00
|
|
|
if (nir_intrinsic_has_access(load))
|
2018-11-27 21:31:42 -06:00
|
|
|
nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
|
|
|
|
|
|
2020-08-18 14:43:39 -05:00
|
|
|
if (op == nir_intrinsic_load_constant) {
|
|
|
|
|
nir_intrinsic_set_base(load, 0);
|
|
|
|
|
nir_intrinsic_set_range(load, b->shader->constant_data_size);
|
2022-10-20 02:54:39 +02:00
|
|
|
} else if (op == nir_intrinsic_load_kernel_input) {
|
|
|
|
|
nir_intrinsic_set_base(load, 0);
|
|
|
|
|
nir_intrinsic_set_range(load, b->shader->num_uniforms);
|
2020-05-27 17:08:28 -05:00
|
|
|
} else if (mode == nir_var_mem_push_const) {
|
|
|
|
|
/* Push constants are required to be able to be chased back to the
|
|
|
|
|
* variable so we can provide a base/range.
|
|
|
|
|
*/
|
|
|
|
|
nir_variable *var = nir_deref_instr_get_variable(deref);
|
|
|
|
|
nir_intrinsic_set_base(load, 0);
|
|
|
|
|
nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false));
|
2020-08-18 14:43:39 -05:00
|
|
|
}
|
|
|
|
|
|
2023-08-14 11:56:00 -05:00
|
|
|
unsigned bit_size = intrin->def.bit_size;
|
2019-08-27 18:32:07 -07:00
|
|
|
if (bit_size == 1) {
|
|
|
|
|
/* TODO: Make the native bool bit_size an option. */
|
|
|
|
|
bit_size = 32;
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-27 17:08:28 -05:00
|
|
|
if (nir_intrinsic_has_align(load))
|
|
|
|
|
nir_intrinsic_set_align(load, align_mul, align_offset);
|
2018-11-27 21:31:42 -06:00
|
|
|
|
2020-09-04 13:25:05 -07:00
|
|
|
if (nir_intrinsic_has_range_base(load)) {
|
|
|
|
|
unsigned base, range;
|
|
|
|
|
nir_get_explicit_deref_range(deref, addr_format, &base, &range);
|
|
|
|
|
nir_intrinsic_set_range_base(load, base);
|
|
|
|
|
nir_intrinsic_set_range(load, range);
|
nir: Add a range_base+range to nir_intrinsic_load_ubo().
For UBO accesses to be the same performance as classic GL default uniform
block uniforms, we need to be able to push them through the same path. On
freedreno, we haven't been uploading UBOs as push constants when they're
used for indirect array access, because we don't know what range of the
UBO is needed for an access.
I believe we won't be able to calculate the range in general in spirv
given casts that can happen, so we define a [0, ~0] range to be "We don't
know anything". We use that at the moment for all UBO loads except for
nir_lower_uniforms_to_ubo, where we now avoid losing the range information
that default uniform block loads come with.
In a departure from other NIR intrinsics with a "base", I didn't make the
base an be something you have to add to the src[1] offset. This keeps us
from needing to modify all drivers (particularly since the base+offset
thing can mean needing to do addition in the backend), makes backend
tracking of ranges easy, and makes the range calculations in
load_store_vectorizer reasonable. However, this could definitely cause
some confusion for people used to the normal NIR base.
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Reviewed-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6359>
2020-08-14 13:10:02 -07:00
|
|
|
}
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
load->num_components = num_components;
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def_init(&load->instr, &load->def, num_components, bit_size);
|
2018-11-27 21:31:42 -06:00
|
|
|
|
2019-08-27 18:32:07 -07:00
|
|
|
assert(bit_size % 8 == 0);
|
2019-01-09 14:56:02 -06:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *result;
|
2021-01-15 00:35:19 -06:00
|
|
|
if (addr_format_needs_bounds_check(addr_format) &&
|
|
|
|
|
op != nir_intrinsic_load_global_constant_bounded) {
|
|
|
|
|
/* We don't need to bounds-check global_constant_bounded because bounds
|
|
|
|
|
* checking is handled by the intrinsic itself.
|
|
|
|
|
*
|
|
|
|
|
* The Vulkan spec for robustBufferAccess gives us quite a few options
|
2019-01-09 14:56:02 -06:00
|
|
|
* as to what we can do with an OOB read. Unfortunately, returning
|
|
|
|
|
* undefined values isn't one of them so we return an actual zero.
|
|
|
|
|
*/
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *zero = nir_imm_zero(b, load->num_components, bit_size);
|
2019-01-09 14:56:02 -06:00
|
|
|
|
2020-10-05 14:46:36 -07:00
|
|
|
/* TODO: Better handle block_intel. */
|
2023-12-05 12:36:43 -06:00
|
|
|
assert(load->num_components == 1);
|
|
|
|
|
const unsigned load_size = bit_size / 8;
|
2019-01-09 14:56:02 -06:00
|
|
|
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
|
|
|
|
|
|
|
|
|
|
nir_builder_instr_insert(b, &load->instr);
|
|
|
|
|
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
|
2023-08-14 11:56:00 -05:00
|
|
|
result = nir_if_phi(b, &load->def, zero);
|
2019-01-09 14:56:02 -06:00
|
|
|
} else {
|
|
|
|
|
nir_builder_instr_insert(b, &load->instr);
|
2023-08-14 11:56:00 -05:00
|
|
|
result = &load->def;
|
2019-01-09 14:56:02 -06:00
|
|
|
}
|
2019-08-27 18:32:07 -07:00
|
|
|
|
2023-08-14 11:56:00 -05:00
|
|
|
if (intrin->def.bit_size == 1) {
|
2020-03-27 00:29:14 -05:00
|
|
|
/* For shared, we can go ahead and use NIR's and/or the back-end's
|
|
|
|
|
* standard encoding for booleans rather than forcing a 0/1 boolean.
|
|
|
|
|
* This should save an instruction or two.
|
|
|
|
|
*/
|
2020-07-15 14:09:06 -05:00
|
|
|
if (mode == nir_var_mem_shared ||
|
|
|
|
|
mode == nir_var_shader_temp ||
|
|
|
|
|
mode == nir_var_function_temp)
|
2020-03-27 00:29:14 -05:00
|
|
|
result = nir_b2b1(b, result);
|
|
|
|
|
else
|
|
|
|
|
result = nir_i2b(b, result);
|
|
|
|
|
}
|
2019-08-27 18:32:07 -07:00
|
|
|
|
|
|
|
|
return result;
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *addr, nir_address_format addr_format,
|
2020-08-15 01:54:45 -05:00
|
|
|
nir_variable_mode modes,
|
2020-08-24 10:57:57 -05:00
|
|
|
uint32_t align_mul, uint32_t align_offset,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *value, nir_component_mask_t write_mask)
|
2018-11-27 21:31:42 -06:00
|
|
|
{
|
2020-08-15 01:54:45 -05:00
|
|
|
modes = canonicalize_generic_modes(modes);
|
|
|
|
|
|
|
|
|
|
if (util_bitcount(modes) > 1) {
|
|
|
|
|
if (addr_format_is_global(addr_format, modes)) {
|
|
|
|
|
build_explicit_io_store(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_global,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
value, write_mask);
|
|
|
|
|
} else if (modes & nir_var_function_temp) {
|
|
|
|
|
nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
|
|
|
|
|
nir_var_function_temp));
|
|
|
|
|
build_explicit_io_store(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_function_temp,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
value, write_mask);
|
|
|
|
|
nir_push_else(b, NULL);
|
|
|
|
|
build_explicit_io_store(b, intrin, addr, addr_format,
|
|
|
|
|
modes & ~nir_var_function_temp,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
value, write_mask);
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
} else {
|
|
|
|
|
nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
|
|
|
|
|
nir_var_mem_shared));
|
|
|
|
|
assert(modes & nir_var_mem_shared);
|
|
|
|
|
build_explicit_io_store(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_shared,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
value, write_mask);
|
|
|
|
|
nir_push_else(b, NULL);
|
|
|
|
|
assert(modes & nir_var_mem_global);
|
|
|
|
|
build_explicit_io_store(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_global,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
value, write_mask);
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(util_bitcount(modes) == 1);
|
|
|
|
|
const nir_variable_mode mode = modes;
|
2018-11-27 21:31:42 -06:00
|
|
|
|
|
|
|
|
nir_intrinsic_op op;
|
2020-10-05 14:46:36 -07:00
|
|
|
switch (intrin->intrinsic) {
|
|
|
|
|
case nir_intrinsic_store_deref:
|
|
|
|
|
assert(write_mask != 0);
|
|
|
|
|
|
|
|
|
|
switch (mode) {
|
|
|
|
|
case nir_var_mem_ssbo:
|
|
|
|
|
if (addr_format_is_global(addr_format, mode))
|
2022-06-22 11:28:53 +02:00
|
|
|
op = get_store_global_op_from_addr_format(addr_format);
|
2020-10-05 14:46:36 -07:00
|
|
|
else
|
|
|
|
|
op = nir_intrinsic_store_ssbo;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_global:
|
2020-08-15 00:39:00 -05:00
|
|
|
assert(addr_format_is_global(addr_format, mode));
|
2022-06-22 11:28:53 +02:00
|
|
|
op = get_store_global_op_from_addr_format(addr_format);
|
2020-10-05 14:46:36 -07:00
|
|
|
break;
|
|
|
|
|
case nir_var_mem_shared:
|
|
|
|
|
assert(addr_format_is_offset(addr_format, mode));
|
|
|
|
|
op = nir_intrinsic_store_shared;
|
|
|
|
|
break;
|
2022-02-08 02:55:18 +01:00
|
|
|
case nir_var_mem_task_payload:
|
|
|
|
|
assert(addr_format_is_offset(addr_format, mode));
|
|
|
|
|
op = nir_intrinsic_store_task_payload;
|
|
|
|
|
break;
|
2020-10-05 14:46:36 -07:00
|
|
|
case nir_var_shader_temp:
|
|
|
|
|
case nir_var_function_temp:
|
|
|
|
|
if (addr_format_is_offset(addr_format, mode)) {
|
|
|
|
|
op = nir_intrinsic_store_scratch;
|
|
|
|
|
} else {
|
|
|
|
|
assert(addr_format_is_global(addr_format, mode));
|
2022-06-22 11:28:53 +02:00
|
|
|
op = get_store_global_op_from_addr_format(addr_format);
|
2020-10-05 14:46:36 -07:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported explicit IO variable mode");
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case nir_intrinsic_store_deref_block_intel:
|
|
|
|
|
assert(write_mask == 0);
|
|
|
|
|
|
|
|
|
|
switch (mode) {
|
|
|
|
|
case nir_var_mem_ssbo:
|
|
|
|
|
if (addr_format_is_global(addr_format, mode))
|
|
|
|
|
op = nir_intrinsic_store_global_block_intel;
|
|
|
|
|
else
|
|
|
|
|
op = nir_intrinsic_store_ssbo_block_intel;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_global:
|
|
|
|
|
op = nir_intrinsic_store_global_block_intel;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_shared:
|
|
|
|
|
op = nir_intrinsic_store_shared_block_intel;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported explicit IO variable mode");
|
2020-07-14 13:32:19 -05:00
|
|
|
}
|
2020-05-26 12:21:33 -07:00
|
|
|
break;
|
2020-10-05 14:46:36 -07:00
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
default:
|
2020-10-05 14:46:36 -07:00
|
|
|
unreachable("Invalid intrinsic");
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
|
|
|
|
|
|
2019-08-27 18:32:07 -07:00
|
|
|
if (value->bit_size == 1) {
|
2020-03-27 00:29:14 -05:00
|
|
|
/* For shared, we can go ahead and use NIR's and/or the back-end's
|
|
|
|
|
* standard encoding for booleans rather than forcing a 0/1 boolean.
|
|
|
|
|
* This should save an instruction or two.
|
|
|
|
|
*
|
|
|
|
|
* TODO: Make the native bool bit_size an option.
|
|
|
|
|
*/
|
2020-07-15 14:09:06 -05:00
|
|
|
if (mode == nir_var_mem_shared ||
|
|
|
|
|
mode == nir_var_shader_temp ||
|
|
|
|
|
mode == nir_var_function_temp)
|
2020-03-27 00:29:14 -05:00
|
|
|
value = nir_b2b32(b, value);
|
|
|
|
|
else
|
2022-11-29 13:10:42 -06:00
|
|
|
value = nir_b2iN(b, value, 32);
|
2019-08-27 18:32:07 -07:00
|
|
|
}
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
store->src[0] = nir_src_for_ssa(value);
|
2020-08-15 00:39:00 -05:00
|
|
|
if (addr_format_is_global(addr_format, mode)) {
|
2019-01-07 17:17:46 -06:00
|
|
|
store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
|
2020-08-15 00:39:00 -05:00
|
|
|
} else if (addr_format_is_offset(addr_format, mode)) {
|
2019-04-09 17:28:13 +01:00
|
|
|
assert(addr->num_components == 1);
|
2020-05-25 08:57:14 -07:00
|
|
|
store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
2019-01-07 17:17:46 -06:00
|
|
|
} else {
|
|
|
|
|
store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
|
|
|
|
|
store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
|
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
|
|
|
|
|
nir_intrinsic_set_write_mask(store, write_mask);
|
|
|
|
|
|
2020-08-20 14:01:23 +01:00
|
|
|
if (nir_intrinsic_has_access(store))
|
2019-04-09 17:28:13 +01:00
|
|
|
nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
|
2018-11-27 21:31:42 -06:00
|
|
|
|
2020-08-24 10:57:57 -05:00
|
|
|
nir_intrinsic_set_align(store, align_mul, align_offset);
|
2018-11-27 21:31:42 -06:00
|
|
|
|
|
|
|
|
assert(value->num_components == 1 ||
|
|
|
|
|
value->num_components == intrin->num_components);
|
|
|
|
|
store->num_components = value->num_components;
|
2019-01-09 14:56:02 -06:00
|
|
|
|
|
|
|
|
assert(value->bit_size % 8 == 0);
|
|
|
|
|
|
|
|
|
|
if (addr_format_needs_bounds_check(addr_format)) {
|
2020-10-05 14:46:36 -07:00
|
|
|
/* TODO: Better handle block_intel. */
|
2023-12-05 12:36:43 -06:00
|
|
|
assert(store->num_components == 1);
|
|
|
|
|
const unsigned store_size = value->bit_size / 8;
|
2019-01-09 14:56:02 -06:00
|
|
|
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
|
|
|
|
|
|
|
|
|
|
nir_builder_instr_insert(b, &store->instr);
|
|
|
|
|
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
} else {
|
|
|
|
|
nir_builder_instr_insert(b, &store->instr);
|
|
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
2018-11-27 21:31:42 -06:00
|
|
|
build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *addr, nir_address_format addr_format,
|
2020-08-15 01:54:45 -05:00
|
|
|
nir_variable_mode modes)
|
2018-11-27 21:31:42 -06:00
|
|
|
{
|
2020-08-15 01:54:45 -05:00
|
|
|
modes = canonicalize_generic_modes(modes);
|
|
|
|
|
|
|
|
|
|
if (util_bitcount(modes) > 1) {
|
|
|
|
|
if (addr_format_is_global(addr_format, modes)) {
|
|
|
|
|
return build_explicit_io_atomic(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_global);
|
|
|
|
|
} else if (modes & nir_var_function_temp) {
|
|
|
|
|
nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
|
|
|
|
|
nir_var_function_temp));
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *res1 =
|
2020-08-15 01:54:45 -05:00
|
|
|
build_explicit_io_atomic(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_function_temp);
|
|
|
|
|
nir_push_else(b, NULL);
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *res2 =
|
2020-08-15 01:54:45 -05:00
|
|
|
build_explicit_io_atomic(b, intrin, addr, addr_format,
|
|
|
|
|
modes & ~nir_var_function_temp);
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
return nir_if_phi(b, res1, res2);
|
|
|
|
|
} else {
|
|
|
|
|
nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
|
|
|
|
|
nir_var_mem_shared));
|
|
|
|
|
assert(modes & nir_var_mem_shared);
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *res1 =
|
2020-08-15 01:54:45 -05:00
|
|
|
build_explicit_io_atomic(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_shared);
|
|
|
|
|
nir_push_else(b, NULL);
|
|
|
|
|
assert(modes & nir_var_mem_global);
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *res2 =
|
2020-08-15 01:54:45 -05:00
|
|
|
build_explicit_io_atomic(b, intrin, addr, addr_format,
|
|
|
|
|
nir_var_mem_global);
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
return nir_if_phi(b, res1, res2);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(util_bitcount(modes) == 1);
|
|
|
|
|
const nir_variable_mode mode = modes;
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
const unsigned num_data_srcs =
|
|
|
|
|
nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_op op;
|
|
|
|
|
switch (mode) {
|
2019-01-16 00:11:23 +01:00
|
|
|
case nir_var_mem_ssbo:
|
2020-08-15 00:39:00 -05:00
|
|
|
if (addr_format_is_global(addr_format, mode))
|
2022-06-22 11:28:53 +02:00
|
|
|
op = global_atomic_for_deref(addr_format, intrin->intrinsic);
|
2019-01-07 17:17:46 -06:00
|
|
|
else
|
|
|
|
|
op = ssbo_atomic_for_deref(intrin->intrinsic);
|
2018-11-27 21:31:42 -06:00
|
|
|
break;
|
2019-01-19 18:50:48 -06:00
|
|
|
case nir_var_mem_global:
|
2020-08-15 00:39:00 -05:00
|
|
|
assert(addr_format_is_global(addr_format, mode));
|
2022-06-22 11:28:53 +02:00
|
|
|
op = global_atomic_for_deref(addr_format, intrin->intrinsic);
|
2019-01-19 18:50:48 -06:00
|
|
|
break;
|
2019-04-09 17:28:13 +01:00
|
|
|
case nir_var_mem_shared:
|
2020-08-15 00:39:00 -05:00
|
|
|
assert(addr_format_is_offset(addr_format, mode));
|
2019-04-09 17:28:13 +01:00
|
|
|
op = shared_atomic_for_deref(intrin->intrinsic);
|
|
|
|
|
break;
|
2022-05-23 07:54:58 +02:00
|
|
|
case nir_var_mem_task_payload:
|
|
|
|
|
assert(addr_format_is_offset(addr_format, mode));
|
|
|
|
|
op = task_payload_atomic_for_deref(intrin->intrinsic);
|
|
|
|
|
break;
|
2018-11-27 21:31:42 -06:00
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported explicit IO variable mode");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
|
2023-05-15 10:31:11 -04:00
|
|
|
nir_intrinsic_set_atomic_op(atomic, nir_intrinsic_atomic_op(intrin));
|
2023-05-12 10:53:41 -04:00
|
|
|
|
2019-01-07 17:17:46 -06:00
|
|
|
unsigned src = 0;
|
2020-08-15 00:39:00 -05:00
|
|
|
if (addr_format_is_global(addr_format, mode)) {
|
2019-01-07 17:17:46 -06:00
|
|
|
atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
|
2020-08-15 00:39:00 -05:00
|
|
|
} else if (addr_format_is_offset(addr_format, mode)) {
|
2019-04-09 17:28:13 +01:00
|
|
|
assert(addr->num_components == 1);
|
2020-05-25 08:57:14 -07:00
|
|
|
atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
2019-01-07 17:17:46 -06:00
|
|
|
} else {
|
|
|
|
|
atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
|
|
|
|
|
atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
|
|
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
for (unsigned i = 0; i < num_data_srcs; i++) {
|
2019-01-07 17:17:46 -06:00
|
|
|
atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
2019-03-04 13:04:45 -06:00
|
|
|
/* Global atomics don't have access flags because they assume that the
|
|
|
|
|
* address may be non-uniform.
|
|
|
|
|
*/
|
2020-08-20 14:01:23 +01:00
|
|
|
if (nir_intrinsic_has_access(atomic))
|
2019-03-04 13:04:45 -06:00
|
|
|
nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
|
|
|
|
|
|
2023-08-14 11:56:00 -05:00
|
|
|
assert(intrin->def.num_components == 1);
|
|
|
|
|
nir_def_init(&atomic->instr, &atomic->def, 1,
|
|
|
|
|
intrin->def.bit_size);
|
2018-11-27 21:31:42 -06:00
|
|
|
|
2023-08-14 11:56:00 -05:00
|
|
|
assert(atomic->def.bit_size % 8 == 0);
|
2019-01-09 14:56:02 -06:00
|
|
|
|
|
|
|
|
if (addr_format_needs_bounds_check(addr_format)) {
|
2023-08-14 11:56:00 -05:00
|
|
|
const unsigned atomic_size = atomic->def.bit_size / 8;
|
2019-01-09 14:56:02 -06:00
|
|
|
nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
|
|
|
|
|
|
|
|
|
|
nir_builder_instr_insert(b, &atomic->instr);
|
|
|
|
|
|
|
|
|
|
nir_pop_if(b, NULL);
|
2023-08-14 11:56:00 -05:00
|
|
|
return nir_if_phi(b, &atomic->def,
|
|
|
|
|
nir_undef(b, 1, atomic->def.bit_size));
|
2019-01-09 14:56:02 -06:00
|
|
|
} else {
|
|
|
|
|
nir_builder_instr_insert(b, &atomic->instr);
|
2023-08-14 11:56:00 -05:00
|
|
|
return &atomic->def;
|
2019-01-09 14:56:02 -06:00
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *
|
2019-01-07 18:00:22 -06:00
|
|
|
nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *base_addr,
|
2019-01-07 18:00:22 -06:00
|
|
|
nir_address_format addr_format)
|
2018-11-27 21:31:42 -06:00
|
|
|
{
|
|
|
|
|
switch (deref->deref_type) {
|
|
|
|
|
case nir_deref_type_var:
|
2020-08-15 00:53:45 -05:00
|
|
|
return build_addr_for_var(b, deref->var, addr_format);
|
2018-11-27 21:31:42 -06:00
|
|
|
|
2022-05-26 21:15:11 +02:00
|
|
|
case nir_deref_type_ptr_as_array:
|
2018-11-27 21:31:42 -06:00
|
|
|
case nir_deref_type_array: {
|
2020-08-27 11:59:54 -05:00
|
|
|
unsigned stride = nir_deref_instr_array_stride(deref);
|
2018-11-27 21:31:42 -06:00
|
|
|
assert(stride > 0);
|
|
|
|
|
|
2022-05-26 21:15:11 +02:00
|
|
|
unsigned offset_bit_size = addr_get_offset_bit_size(base_addr, addr_format);
|
2023-09-15 10:57:20 -04:00
|
|
|
nir_def *index = deref->arr.index.ssa;
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *offset;
|
2018-11-27 21:31:42 -06:00
|
|
|
|
2022-05-26 21:15:11 +02:00
|
|
|
/* If the access chain has been declared in-bounds, then we know it doesn't
|
|
|
|
|
* overflow the type. For nir_deref_type_array, this implies it cannot be
|
|
|
|
|
* negative. Also, since types in NIR have a maximum 32-bit size, we know the
|
|
|
|
|
* final result will fit in a 32-bit value so we can convert the index to
|
|
|
|
|
* 32-bit before multiplying and save ourselves from a 64-bit multiply.
|
|
|
|
|
*/
|
|
|
|
|
if (deref->arr.in_bounds && deref->deref_type == nir_deref_type_array) {
|
|
|
|
|
index = nir_u2u32(b, index);
|
2022-11-29 12:54:21 -06:00
|
|
|
offset = nir_u2uN(b, nir_amul_imm(b, index, stride), offset_bit_size);
|
2022-05-26 21:15:11 +02:00
|
|
|
} else {
|
2022-11-29 12:54:21 -06:00
|
|
|
index = nir_i2iN(b, index, offset_bit_size);
|
2022-05-26 21:15:11 +02:00
|
|
|
offset = nir_amul_imm(b, index, stride);
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-09 21:15:08 +02:00
|
|
|
return nir_build_addr_iadd(b, base_addr, addr_format,
|
|
|
|
|
deref->modes, offset);
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_array_wildcard:
|
|
|
|
|
unreachable("Wildcards should be lowered by now");
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_struct: {
|
|
|
|
|
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
|
|
|
|
int offset = glsl_get_struct_field_offset(parent->type,
|
|
|
|
|
deref->strct.index);
|
|
|
|
|
assert(offset >= 0);
|
2023-01-09 21:15:08 +02:00
|
|
|
return nir_build_addr_iadd_imm(b, base_addr, addr_format,
|
|
|
|
|
deref->modes, offset);
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_cast:
|
|
|
|
|
/* Nothing to do here */
|
2019-01-07 18:00:22 -06:00
|
|
|
return base_addr;
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
2019-01-07 18:00:22 -06:00
|
|
|
unreachable("Invalid NIR deref type");
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
2019-01-07 18:00:22 -06:00
|
|
|
void
|
|
|
|
|
nir_lower_explicit_io_instr(nir_builder *b,
|
|
|
|
|
nir_intrinsic_instr *intrin,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *addr,
|
2019-01-07 18:00:22 -06:00
|
|
|
nir_address_format addr_format)
|
2018-11-27 21:31:42 -06:00
|
|
|
{
|
|
|
|
|
b->cursor = nir_after_instr(&intrin->instr);
|
|
|
|
|
|
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
|
|
|
|
unsigned vec_stride = glsl_get_explicit_stride(deref->type);
|
|
|
|
|
unsigned scalar_size = type_scalar_size_bytes(deref->type);
|
2023-12-05 12:36:43 -06:00
|
|
|
if (vec_stride == 0) {
|
|
|
|
|
vec_stride = scalar_size;
|
|
|
|
|
} else {
|
|
|
|
|
assert(glsl_type_is_vector(deref->type));
|
|
|
|
|
assert(vec_stride >= scalar_size);
|
|
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
|
2020-08-24 10:57:57 -05:00
|
|
|
uint32_t align_mul, align_offset;
|
|
|
|
|
if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) {
|
|
|
|
|
/* If we don't have an alignment from the deref, assume scalar */
|
|
|
|
|
align_mul = scalar_size;
|
|
|
|
|
align_offset = 0;
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-05 12:36:43 -06:00
|
|
|
/* In order for bounds checking to be correct as per the Vulkan spec,
|
|
|
|
|
* we need to check at the individual component granularity. Prior to
|
|
|
|
|
* robustness2, we're technically allowed to be sloppy by 16B. Even with
|
|
|
|
|
* robustness2, UBO loads are allowed to have a granularity as high as 256B
|
|
|
|
|
* depending on hardware limits. However, we have none of that information
|
|
|
|
|
* here. Short of adding new address formats, the easiest way to do that
|
|
|
|
|
* is to just split any loads and stores into individual components here.
|
|
|
|
|
*
|
|
|
|
|
* TODO: At some point in the future we may want to add more ops similar to
|
|
|
|
|
* nir_intrinsic_load_global_constant_bounded and make bouds checking the
|
|
|
|
|
* back-end's problem. Another option would be to somehow plumb more of
|
|
|
|
|
* that information through to nir_lower_explicit_io. For now, however,
|
|
|
|
|
* scalarizing is at least correct.
|
|
|
|
|
*/
|
|
|
|
|
bool scalarize = vec_stride > scalar_size ||
|
|
|
|
|
addr_format_needs_bounds_check(addr_format);
|
|
|
|
|
|
2020-10-05 14:25:10 -07:00
|
|
|
switch (intrin->intrinsic) {
|
|
|
|
|
case nir_intrinsic_load_deref: {
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *value;
|
2023-12-05 12:36:43 -06:00
|
|
|
if (scalarize) {
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *comps[NIR_MAX_VEC_COMPONENTS] = {
|
2023-08-08 12:00:35 -05:00
|
|
|
NULL,
|
|
|
|
|
};
|
2018-11-27 21:31:42 -06:00
|
|
|
for (unsigned i = 0; i < intrin->num_components; i++) {
|
2020-08-24 10:57:57 -05:00
|
|
|
unsigned comp_offset = i * vec_stride;
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *comp_addr = nir_build_addr_iadd_imm(b, addr, addr_format,
|
|
|
|
|
deref->modes,
|
|
|
|
|
comp_offset);
|
2018-11-27 21:31:42 -06:00
|
|
|
comps[i] = build_explicit_io_load(b, intrin, comp_addr,
|
2020-08-15 01:54:45 -05:00
|
|
|
addr_format, deref->modes,
|
|
|
|
|
align_mul,
|
2020-08-24 10:57:57 -05:00
|
|
|
(align_offset + comp_offset) %
|
|
|
|
|
align_mul,
|
|
|
|
|
1);
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
value = nir_vec(b, comps, intrin->num_components);
|
|
|
|
|
} else {
|
|
|
|
|
value = build_explicit_io_load(b, intrin, addr, addr_format,
|
2020-08-15 01:54:45 -05:00
|
|
|
deref->modes, align_mul, align_offset,
|
2018-11-27 21:31:42 -06:00
|
|
|
intrin->num_components);
|
|
|
|
|
}
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def_rewrite_uses(&intrin->def, value);
|
2020-10-05 14:25:10 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_intrinsic_store_deref: {
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *value = intrin->src[1].ssa;
|
2018-11-27 21:31:42 -06:00
|
|
|
nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
|
2023-12-05 12:36:43 -06:00
|
|
|
if (scalarize) {
|
2018-11-27 21:31:42 -06:00
|
|
|
for (unsigned i = 0; i < intrin->num_components; i++) {
|
|
|
|
|
if (!(write_mask & (1 << i)))
|
|
|
|
|
continue;
|
|
|
|
|
|
2020-08-24 10:57:57 -05:00
|
|
|
unsigned comp_offset = i * vec_stride;
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *comp_addr = nir_build_addr_iadd_imm(b, addr, addr_format,
|
|
|
|
|
deref->modes,
|
|
|
|
|
comp_offset);
|
2018-11-27 21:31:42 -06:00
|
|
|
build_explicit_io_store(b, intrin, comp_addr, addr_format,
|
2020-08-15 01:54:45 -05:00
|
|
|
deref->modes, align_mul,
|
2020-08-24 10:57:57 -05:00
|
|
|
(align_offset + comp_offset) % align_mul,
|
2018-11-27 21:31:42 -06:00
|
|
|
nir_channel(b, value, i), 1);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
build_explicit_io_store(b, intrin, addr, addr_format,
|
2020-08-15 01:54:45 -05:00
|
|
|
deref->modes, align_mul, align_offset,
|
2018-11-27 21:31:42 -06:00
|
|
|
value, write_mask);
|
|
|
|
|
}
|
2020-10-05 14:25:10 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-05 14:46:36 -07:00
|
|
|
case nir_intrinsic_load_deref_block_intel: {
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
|
|
|
|
|
deref->modes,
|
|
|
|
|
align_mul, align_offset,
|
|
|
|
|
intrin->num_components);
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def_rewrite_uses(&intrin->def, value);
|
2020-10-05 14:46:36 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_intrinsic_store_deref_block_intel: {
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *value = intrin->src[1].ssa;
|
2020-10-05 14:46:36 -07:00
|
|
|
const nir_component_mask_t write_mask = 0;
|
|
|
|
|
build_explicit_io_store(b, intrin, addr, addr_format,
|
|
|
|
|
deref->modes, align_mul, align_offset,
|
|
|
|
|
value, write_mask);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-05 14:25:10 -07:00
|
|
|
default: {
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *value =
|
2020-08-15 01:54:45 -05:00
|
|
|
build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def_rewrite_uses(&intrin->def, value);
|
2020-10-05 14:25:10 -07:00
|
|
|
break;
|
|
|
|
|
}
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_instr_remove(&intrin->instr);
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-24 10:48:51 -05:00
|
|
|
bool
|
|
|
|
|
nir_get_explicit_deref_align(nir_deref_instr *deref,
|
|
|
|
|
bool default_to_type_align,
|
|
|
|
|
uint32_t *align_mul,
|
|
|
|
|
uint32_t *align_offset)
|
|
|
|
|
{
|
|
|
|
|
if (deref->deref_type == nir_deref_type_var) {
|
|
|
|
|
/* If we see a variable, align_mul is effectively infinite because we
|
|
|
|
|
* know the offset exactly (up to the offset of the base pointer for the
|
|
|
|
|
* given variable mode). We have to pick something so we choose 256B
|
|
|
|
|
* as an arbitrary alignment which seems high enough for any reasonable
|
|
|
|
|
* wide-load use-case. Back-ends should clamp alignments down if 256B
|
|
|
|
|
* is too large for some reason.
|
|
|
|
|
*/
|
|
|
|
|
*align_mul = 256;
|
|
|
|
|
*align_offset = deref->var->data.driver_location % 256;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If we're a cast deref that has an alignment, use that. */
|
|
|
|
|
if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) {
|
|
|
|
|
*align_mul = deref->cast.align_mul;
|
|
|
|
|
*align_offset = deref->cast.align_offset;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Otherwise, we need to compute the alignment based on the parent */
|
|
|
|
|
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
|
|
|
|
if (parent == NULL) {
|
|
|
|
|
assert(deref->deref_type == nir_deref_type_cast);
|
|
|
|
|
if (default_to_type_align) {
|
|
|
|
|
/* If we don't have a parent, assume the type's alignment, if any. */
|
|
|
|
|
unsigned type_align = glsl_get_explicit_alignment(deref->type);
|
|
|
|
|
if (type_align == 0)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
*align_mul = type_align;
|
|
|
|
|
*align_offset = 0;
|
|
|
|
|
return true;
|
|
|
|
|
} else {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t parent_mul, parent_offset;
|
|
|
|
|
if (!nir_get_explicit_deref_align(parent, default_to_type_align,
|
|
|
|
|
&parent_mul, &parent_offset))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
switch (deref->deref_type) {
|
|
|
|
|
case nir_deref_type_var:
|
|
|
|
|
unreachable("Handled above");
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_array:
|
|
|
|
|
case nir_deref_type_array_wildcard:
|
|
|
|
|
case nir_deref_type_ptr_as_array: {
|
|
|
|
|
const unsigned stride = nir_deref_instr_array_stride(deref);
|
|
|
|
|
if (stride == 0)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (deref->deref_type != nir_deref_type_array_wildcard &&
|
|
|
|
|
nir_src_is_const(deref->arr.index)) {
|
|
|
|
|
unsigned offset = nir_src_as_uint(deref->arr.index) * stride;
|
|
|
|
|
*align_mul = parent_mul;
|
|
|
|
|
*align_offset = (parent_offset + offset) % parent_mul;
|
|
|
|
|
} else {
|
|
|
|
|
/* If this is a wildcard or an indirect deref, we have to go with the
|
|
|
|
|
* power-of-two gcd.
|
|
|
|
|
*/
|
2020-09-06 20:09:01 -05:00
|
|
|
*align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1));
|
|
|
|
|
*align_offset = parent_offset % *align_mul;
|
2020-08-24 10:48:51 -05:00
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_struct: {
|
|
|
|
|
const int offset = glsl_get_struct_field_offset(parent->type,
|
|
|
|
|
deref->strct.index);
|
|
|
|
|
if (offset < 0)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
*align_mul = parent_mul;
|
|
|
|
|
*align_offset = (parent_offset + offset) % parent_mul;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_deref_type_cast:
|
|
|
|
|
/* We handled the explicit alignment case above. */
|
|
|
|
|
assert(deref->cast.align_mul == 0);
|
|
|
|
|
*align_mul = parent_mul;
|
|
|
|
|
*align_offset = parent_offset;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unreachable("Invalid deref_instr_type");
|
|
|
|
|
}
|
|
|
|
|
|
2019-01-07 18:00:22 -06:00
|
|
|
static void
|
|
|
|
|
lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
2023-02-15 22:34:46 +01:00
|
|
|
/* Ignore samplers/textures, because they are handled by other passes like `nir_lower_samplers`.
|
|
|
|
|
* Also do it only for those being uniforms, otherwise it will break GL bindless textures handles
|
|
|
|
|
* stored in UBOs.
|
|
|
|
|
*/
|
|
|
|
|
if (nir_deref_mode_is_in_set(deref, nir_var_uniform) &&
|
|
|
|
|
(glsl_type_is_sampler(deref->type) ||
|
|
|
|
|
glsl_type_is_texture(deref->type)))
|
|
|
|
|
return;
|
|
|
|
|
|
2019-01-07 18:00:22 -06:00
|
|
|
/* Just delete the deref if it's not used. We can't use
|
|
|
|
|
* nir_deref_instr_remove_if_unused here because it may remove more than
|
|
|
|
|
* one deref which could break our list walking since we walk the list
|
|
|
|
|
* backwards.
|
|
|
|
|
*/
|
2023-08-14 11:56:00 -05:00
|
|
|
if (nir_def_is_unused(&deref->def)) {
|
2019-01-07 18:00:22 -06:00
|
|
|
nir_instr_remove(&deref->instr);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
b->cursor = nir_after_instr(&deref->instr);
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *base_addr = NULL;
|
2019-01-07 18:00:22 -06:00
|
|
|
if (deref->deref_type != nir_deref_type_var) {
|
|
|
|
|
base_addr = deref->parent.ssa;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
|
|
|
|
|
addr_format);
|
2023-08-14 11:56:00 -05:00
|
|
|
assert(addr->bit_size == deref->def.bit_size);
|
|
|
|
|
assert(addr->num_components == deref->def.num_components);
|
2019-01-07 18:00:22 -06:00
|
|
|
|
|
|
|
|
nir_instr_remove(&deref->instr);
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def_rewrite_uses(&deref->def, addr);
|
2019-01-07 18:00:22 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-10 08:35:00 -05:00
|
|
|
static void
|
|
|
|
|
lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
b->cursor = nir_after_instr(&intrin->instr);
|
|
|
|
|
|
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
|
|
|
|
|
|
|
|
|
assert(glsl_type_is_array(deref->type));
|
|
|
|
|
assert(glsl_get_length(deref->type) == 0);
|
2020-10-30 12:19:25 -05:00
|
|
|
assert(nir_deref_mode_is(deref, nir_var_mem_ssbo));
|
2019-03-10 08:35:00 -05:00
|
|
|
unsigned stride = glsl_get_explicit_stride(deref->type);
|
|
|
|
|
assert(stride > 0);
|
|
|
|
|
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def *addr = &deref->def;
|
2019-03-10 08:35:00 -05:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *offset, *size;
|
2023-02-21 07:37:28 -06:00
|
|
|
switch (addr_format) {
|
|
|
|
|
case nir_address_format_64bit_global_32bit_offset:
|
|
|
|
|
case nir_address_format_64bit_bounded_global:
|
|
|
|
|
offset = nir_channel(b, addr, 3);
|
|
|
|
|
size = nir_channel(b, addr, 2);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case nir_address_format_32bit_index_offset:
|
|
|
|
|
case nir_address_format_32bit_index_offset_pack64:
|
|
|
|
|
case nir_address_format_vec2_index_32bit_offset: {
|
|
|
|
|
offset = addr_to_offset(b, addr, addr_format);
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *index = addr_to_index(b, addr, addr_format);
|
2023-02-21 07:37:28 -06:00
|
|
|
unsigned access = nir_intrinsic_access(intrin);
|
2023-08-08 12:00:35 -05:00
|
|
|
size = nir_get_ssbo_size(b, index, .access = access);
|
2023-02-21 07:37:28 -06:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Cannot determine SSBO size");
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *remaining = nir_usub_sat(b, size, offset);
|
|
|
|
|
nir_def *arr_size = nir_udiv_imm(b, remaining, stride);
|
2019-03-10 08:35:00 -05:00
|
|
|
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def_rewrite_uses(&intrin->def, arr_size);
|
2019-03-10 08:35:00 -05:00
|
|
|
nir_instr_remove(&intrin->instr);
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-15 00:32:46 -05:00
|
|
|
static void
|
|
|
|
|
lower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
if (addr_format_is_global(addr_format, 0)) {
|
|
|
|
|
/* If the address format is always global, then the driver can use
|
|
|
|
|
* global addresses regardless of the mode. In that case, don't create
|
|
|
|
|
* a check, just whack the intrinsic to addr_mode_is and delegate to the
|
2021-02-03 13:49:39 -05:00
|
|
|
* driver lowering.
|
2020-08-15 00:32:46 -05:00
|
|
|
*/
|
|
|
|
|
intrin->intrinsic = nir_intrinsic_addr_mode_is;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *addr = intrin->src[0].ssa;
|
2020-08-15 00:32:46 -05:00
|
|
|
|
|
|
|
|
b->cursor = nir_instr_remove(&intrin->instr);
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *is_mode =
|
2020-08-15 00:32:46 -05:00
|
|
|
build_runtime_addr_mode_check(b, addr, addr_format,
|
|
|
|
|
nir_intrinsic_memory_modes(intrin));
|
|
|
|
|
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def_rewrite_uses(&intrin->def, is_mode);
|
2020-08-15 00:32:46 -05:00
|
|
|
}
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
static bool
|
|
|
|
|
nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
2023-06-26 10:42:29 -04:00
|
|
|
nir_builder b = nir_builder_create(impl);
|
2018-11-27 21:31:42 -06:00
|
|
|
|
|
|
|
|
/* Walk in reverse order so that we can see the full deref chain when we
|
|
|
|
|
* lower the access operations. We lower them assuming that the derefs
|
|
|
|
|
* will be turned into address calculations later.
|
|
|
|
|
*/
|
|
|
|
|
nir_foreach_block_reverse(block, impl) {
|
|
|
|
|
nir_foreach_instr_reverse_safe(instr, block) {
|
|
|
|
|
switch (instr->type) {
|
|
|
|
|
case nir_instr_type_deref: {
|
|
|
|
|
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
2020-11-01 16:58:27 -06:00
|
|
|
if (nir_deref_mode_is_in_set(deref, modes)) {
|
2018-11-27 21:31:42 -06:00
|
|
|
lower_explicit_io_deref(&b, deref, addr_format);
|
|
|
|
|
progress = true;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_instr_type_intrinsic: {
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
|
switch (intrin->intrinsic) {
|
|
|
|
|
case nir_intrinsic_load_deref:
|
|
|
|
|
case nir_intrinsic_store_deref:
|
2020-10-05 14:46:36 -07:00
|
|
|
case nir_intrinsic_load_deref_block_intel:
|
|
|
|
|
case nir_intrinsic_store_deref_block_intel:
|
2023-05-12 10:53:41 -04:00
|
|
|
case nir_intrinsic_deref_atomic:
|
2023-05-15 10:31:11 -04:00
|
|
|
case nir_intrinsic_deref_atomic_swap: {
|
2018-11-27 21:31:42 -06:00
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
2020-11-01 16:58:27 -06:00
|
|
|
if (nir_deref_mode_is_in_set(deref, modes)) {
|
2018-11-27 21:31:42 -06:00
|
|
|
lower_explicit_io_access(&b, intrin, addr_format);
|
|
|
|
|
progress = true;
|
2019-03-10 08:35:00 -05:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_intrinsic_deref_buffer_array_length: {
|
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
2020-11-01 16:58:27 -06:00
|
|
|
if (nir_deref_mode_is_in_set(deref, modes)) {
|
2019-03-10 08:35:00 -05:00
|
|
|
lower_explicit_io_array_length(&b, intrin, addr_format);
|
|
|
|
|
progress = true;
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-15 00:32:46 -05:00
|
|
|
case nir_intrinsic_deref_mode_is: {
|
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
|
|
|
|
if (nir_deref_mode_is_in_set(deref, modes)) {
|
|
|
|
|
lower_explicit_io_mode_check(&b, intrin, addr_format);
|
|
|
|
|
progress = true;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-19 12:37:06 +01:00
|
|
|
case nir_intrinsic_launch_mesh_workgroups_with_payload_deref: {
|
|
|
|
|
if (modes & nir_var_mem_task_payload) {
|
|
|
|
|
/* Get address and size of the payload variable. */
|
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[1]);
|
|
|
|
|
assert(deref->deref_type == nir_deref_type_var);
|
|
|
|
|
unsigned base = deref->var->data.explicit_location;
|
|
|
|
|
unsigned size = glsl_get_explicit_size(deref->var->type, false);
|
|
|
|
|
|
|
|
|
|
/* Replace the current instruction with the explicit intrinsic. */
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *dispatch_3d = intrin->src[0].ssa;
|
2022-02-19 12:37:06 +01:00
|
|
|
b.cursor = nir_instr_remove(instr);
|
|
|
|
|
nir_launch_mesh_workgroups(&b, dispatch_3d, .base = base, .range = size);
|
|
|
|
|
progress = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-27 21:31:42 -06:00
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
/* Nothing to do */
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (progress) {
|
2022-11-16 15:53:26 +02:00
|
|
|
nir_metadata_preserve(impl, nir_metadata_none);
|
2021-10-04 21:24:00 +03:00
|
|
|
} else {
|
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_all);
|
2018-11-27 21:31:42 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-11 16:08:06 -05:00
|
|
|
/** Lower explicitly laid out I/O access to byte offset/address intrinsics
|
|
|
|
|
*
|
|
|
|
|
* This pass is intended to be used for any I/O which touches memory external
|
|
|
|
|
* to the shader or which is directly visible to the client. It requires that
|
|
|
|
|
* all data types in the given modes have a explicit stride/offset decorations
|
|
|
|
|
* to tell it exactly how to calculate the offset/address for the given load,
|
|
|
|
|
* store, or atomic operation. If the offset/stride information does not come
|
|
|
|
|
* from the client explicitly (as with shared variables in GL or Vulkan),
|
|
|
|
|
* nir_lower_vars_to_explicit_types() can be used to add them.
|
|
|
|
|
*
|
|
|
|
|
* Unlike nir_lower_io, this pass is fully capable of handling incomplete
|
|
|
|
|
* pointer chains which may contain cast derefs. It does so by walking the
|
|
|
|
|
* deref chain backwards and simply replacing each deref, one at a time, with
|
|
|
|
|
* the appropriate address calculation. The pass takes a nir_address_format
|
|
|
|
|
* parameter which describes how the offset or address is to be represented
|
|
|
|
|
* during calculations. By ensuring that the address is always in a
|
|
|
|
|
* consistent format, pointers can safely be conjured from thin air by the
|
|
|
|
|
* driver, stored to variables, passed through phis, etc.
|
|
|
|
|
*
|
|
|
|
|
* The one exception to the simple algorithm described above is for handling
|
|
|
|
|
* row-major matrices in which case we may look down one additional level of
|
|
|
|
|
* the deref chain.
|
2020-11-01 16:58:27 -06:00
|
|
|
*
|
2020-08-15 00:57:14 -05:00
|
|
|
* This pass is also capable of handling OpenCL generic pointers. If the
|
2021-02-03 13:49:39 -05:00
|
|
|
* address mode is global, it will lower any ambiguous (more than one mode)
|
|
|
|
|
* access to global and pass through the deref_mode_is run-time checks as
|
2020-08-15 00:57:14 -05:00
|
|
|
* addr_mode_is. This assumes the driver has somehow mapped shared and
|
|
|
|
|
* scratch memory to the global address space. For other modes such as
|
|
|
|
|
* 62bit_generic, there is an enum embedded in the address and we lower
|
|
|
|
|
* ambiguous access to an if-ladder and deref_mode_is to a check against the
|
|
|
|
|
* embedded enum. If nir_lower_explicit_io is called on any shader that
|
|
|
|
|
* contains generic pointers, it must either be used on all of the generic
|
|
|
|
|
* modes or none.
|
2020-06-11 16:08:06 -05:00
|
|
|
*/
|
2018-11-27 21:31:42 -06:00
|
|
|
bool
|
|
|
|
|
nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
|
|
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
2023-06-22 13:27:59 -04:00
|
|
|
nir_foreach_function_impl(impl, shader) {
|
|
|
|
|
if (impl && nir_lower_explicit_io_impl(impl, modes, addr_format))
|
2018-11-27 21:31:42 -06:00
|
|
|
progress = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-09 20:18:11 +01:00
|
|
|
static bool
|
|
|
|
|
nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
|
|
|
|
|
nir_variable_mode modes,
|
|
|
|
|
glsl_type_size_align_func type_info)
|
|
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
|
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
|
nir_foreach_instr(instr, block) {
|
|
|
|
|
if (instr->type != nir_instr_type_deref)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
2020-11-01 16:58:27 -06:00
|
|
|
if (!nir_deref_mode_is_in_set(deref, modes))
|
2019-04-09 20:18:11 +01:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
unsigned size, alignment;
|
|
|
|
|
const struct glsl_type *new_type =
|
|
|
|
|
glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
|
|
|
|
|
if (new_type != deref->type) {
|
|
|
|
|
progress = true;
|
|
|
|
|
deref->type = new_type;
|
|
|
|
|
}
|
|
|
|
|
if (deref->deref_type == nir_deref_type_cast) {
|
|
|
|
|
/* See also glsl_type::get_explicit_type_for_size_align() */
|
|
|
|
|
unsigned new_stride = align(size, alignment);
|
|
|
|
|
if (new_stride != deref->cast.ptr_stride) {
|
|
|
|
|
deref->cast.ptr_stride = new_stride;
|
|
|
|
|
progress = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (progress) {
|
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_block_index |
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_metadata_dominance |
|
2023-08-15 10:11:43 -05:00
|
|
|
nir_metadata_live_defs |
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_metadata_loop_analysis);
|
2021-10-04 21:24:00 +03:00
|
|
|
} else {
|
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_all);
|
2019-04-09 20:18:11 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
lower_vars_to_explicit(nir_shader *shader,
|
|
|
|
|
struct exec_list *vars, nir_variable_mode mode,
|
|
|
|
|
glsl_type_size_align_func type_info)
|
|
|
|
|
{
|
|
|
|
|
bool progress = false;
|
2020-05-26 12:20:20 -07:00
|
|
|
unsigned offset;
|
|
|
|
|
switch (mode) {
|
2020-08-27 16:00:00 -05:00
|
|
|
case nir_var_uniform:
|
|
|
|
|
assert(shader->info.stage == MESA_SHADER_KERNEL);
|
|
|
|
|
offset = 0;
|
|
|
|
|
break;
|
2020-05-26 12:20:20 -07:00
|
|
|
case nir_var_function_temp:
|
|
|
|
|
case nir_var_shader_temp:
|
|
|
|
|
offset = shader->scratch_size;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_shared:
|
2021-04-08 00:53:52 +02:00
|
|
|
offset = shader->info.shared_size;
|
2020-05-26 12:20:20 -07:00
|
|
|
break;
|
2022-02-08 02:55:18 +01:00
|
|
|
case nir_var_mem_task_payload:
|
|
|
|
|
offset = shader->info.task_payload_size;
|
|
|
|
|
break;
|
2023-08-01 14:35:21 +02:00
|
|
|
case nir_var_mem_node_payload:
|
|
|
|
|
assert(!shader->info.cs.node_payloads_size);
|
|
|
|
|
offset = 0;
|
|
|
|
|
break;
|
2020-11-02 17:58:42 -06:00
|
|
|
case nir_var_mem_global:
|
|
|
|
|
offset = shader->global_mem_size;
|
|
|
|
|
break;
|
2020-08-18 14:43:39 -05:00
|
|
|
case nir_var_mem_constant:
|
|
|
|
|
offset = shader->constant_data_size;
|
|
|
|
|
break;
|
2020-08-04 19:24:24 -05:00
|
|
|
case nir_var_shader_call_data:
|
2020-07-28 18:01:41 -05:00
|
|
|
case nir_var_ray_hit_attrib:
|
2023-08-01 14:35:21 +02:00
|
|
|
case nir_var_mem_node_payload_in:
|
2020-07-28 18:01:41 -05:00
|
|
|
offset = 0;
|
|
|
|
|
break;
|
2020-05-26 12:20:20 -07:00
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported mode");
|
|
|
|
|
}
|
2020-07-20 16:30:37 -05:00
|
|
|
nir_foreach_variable_in_list(var, vars) {
|
|
|
|
|
if (var->data.mode != mode)
|
|
|
|
|
continue;
|
|
|
|
|
|
2023-12-05 10:28:22 -06:00
|
|
|
unsigned size, alignment;
|
2019-04-09 20:18:11 +01:00
|
|
|
const struct glsl_type *explicit_type =
|
2023-12-05 10:28:22 -06:00
|
|
|
glsl_get_explicit_type_for_size_align(var->type, type_info,
|
|
|
|
|
&size, &alignment);
|
2019-04-09 20:18:11 +01:00
|
|
|
|
2020-08-18 14:43:39 -05:00
|
|
|
if (explicit_type != var->type)
|
2019-04-09 20:18:11 +01:00
|
|
|
var->type = explicit_type;
|
|
|
|
|
|
2021-05-07 10:33:07 +03:00
|
|
|
UNUSED bool is_empty_struct =
|
|
|
|
|
glsl_type_is_struct_or_ifc(explicit_type) &&
|
|
|
|
|
glsl_get_length(explicit_type) == 0;
|
|
|
|
|
|
2023-12-05 10:28:22 -06:00
|
|
|
assert(util_is_power_of_two_nonzero(alignment) || is_empty_struct ||
|
2023-08-08 11:02:33 -07:00
|
|
|
glsl_type_is_cmat(glsl_without_array(explicit_type)));
|
2023-12-05 10:28:22 -06:00
|
|
|
assert(util_is_power_of_two_or_zero(var->data.alignment));
|
|
|
|
|
alignment = MAX2(alignment, var->data.alignment);
|
|
|
|
|
|
|
|
|
|
var->data.driver_location = ALIGN_POT(offset, alignment);
|
2019-04-09 20:18:11 +01:00
|
|
|
offset = var->data.driver_location + size;
|
2020-08-18 14:43:39 -05:00
|
|
|
progress = true;
|
2019-04-09 20:18:11 +01:00
|
|
|
}
|
|
|
|
|
|
2020-05-26 12:20:20 -07:00
|
|
|
switch (mode) {
|
2020-08-27 16:00:00 -05:00
|
|
|
case nir_var_uniform:
|
|
|
|
|
assert(shader->info.stage == MESA_SHADER_KERNEL);
|
|
|
|
|
shader->num_uniforms = offset;
|
|
|
|
|
break;
|
2020-05-26 12:20:20 -07:00
|
|
|
case nir_var_shader_temp:
|
|
|
|
|
case nir_var_function_temp:
|
|
|
|
|
shader->scratch_size = offset;
|
|
|
|
|
break;
|
|
|
|
|
case nir_var_mem_shared:
|
2021-04-08 12:30:14 +02:00
|
|
|
shader->info.shared_size = offset;
|
2020-05-26 12:20:20 -07:00
|
|
|
break;
|
2022-02-08 02:55:18 +01:00
|
|
|
case nir_var_mem_task_payload:
|
|
|
|
|
shader->info.task_payload_size = offset;
|
|
|
|
|
break;
|
2023-08-01 14:35:21 +02:00
|
|
|
case nir_var_mem_node_payload:
|
|
|
|
|
shader->info.cs.node_payloads_size = offset;
|
|
|
|
|
break;
|
2020-11-02 17:58:42 -06:00
|
|
|
case nir_var_mem_global:
|
|
|
|
|
shader->global_mem_size = offset;
|
|
|
|
|
break;
|
2020-08-18 14:43:39 -05:00
|
|
|
case nir_var_mem_constant:
|
|
|
|
|
shader->constant_data_size = offset;
|
|
|
|
|
break;
|
2020-08-04 19:24:24 -05:00
|
|
|
case nir_var_shader_call_data:
|
2020-07-28 18:01:41 -05:00
|
|
|
case nir_var_ray_hit_attrib:
|
2023-08-01 14:35:21 +02:00
|
|
|
case nir_var_mem_node_payload_in:
|
2020-07-28 18:01:41 -05:00
|
|
|
break;
|
2020-05-26 12:20:20 -07:00
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported mode");
|
2019-04-09 20:18:11 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-01 16:58:27 -06:00
|
|
|
/* If nir_lower_vars_to_explicit_types is called on any shader that contains
|
|
|
|
|
* generic pointers, it must either be used on all of the generic modes or
|
|
|
|
|
* none.
|
|
|
|
|
*/
|
2019-04-09 20:18:11 +01:00
|
|
|
bool
|
|
|
|
|
nir_lower_vars_to_explicit_types(nir_shader *shader,
|
|
|
|
|
nir_variable_mode modes,
|
|
|
|
|
glsl_type_size_align_func type_info)
|
|
|
|
|
{
|
|
|
|
|
/* TODO: Situations which need to be handled to support more modes:
|
|
|
|
|
* - row-major matrices
|
|
|
|
|
* - compact shader inputs/outputs
|
|
|
|
|
* - interface types
|
|
|
|
|
*/
|
2020-08-27 16:12:57 -05:00
|
|
|
ASSERTED nir_variable_mode supported =
|
2020-11-03 11:34:18 -06:00
|
|
|
nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant |
|
2020-07-28 18:01:41 -05:00
|
|
|
nir_var_shader_temp | nir_var_function_temp | nir_var_uniform |
|
2022-02-08 02:55:18 +01:00
|
|
|
nir_var_shader_call_data | nir_var_ray_hit_attrib |
|
2023-08-01 14:35:21 +02:00
|
|
|
nir_var_mem_task_payload | nir_var_mem_node_payload |
|
|
|
|
|
nir_var_mem_node_payload_in;
|
2019-04-09 20:18:11 +01:00
|
|
|
assert(!(modes & ~supported) && "unsupported");
|
|
|
|
|
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
2020-08-27 16:00:00 -05:00
|
|
|
if (modes & nir_var_uniform)
|
|
|
|
|
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info);
|
2020-11-02 17:58:42 -06:00
|
|
|
if (modes & nir_var_mem_global)
|
|
|
|
|
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_global, type_info);
|
2020-10-28 14:55:05 -07:00
|
|
|
|
|
|
|
|
if (modes & nir_var_mem_shared) {
|
2021-04-29 12:10:13 -07:00
|
|
|
assert(!shader->info.shared_memory_explicit_layout);
|
2020-07-20 16:30:37 -05:00
|
|
|
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info);
|
2020-10-28 14:55:05 -07:00
|
|
|
}
|
|
|
|
|
|
2019-04-09 20:18:11 +01:00
|
|
|
if (modes & nir_var_shader_temp)
|
2020-07-20 16:30:37 -05:00
|
|
|
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info);
|
2020-11-03 11:34:18 -06:00
|
|
|
if (modes & nir_var_mem_constant)
|
|
|
|
|
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info);
|
2020-08-04 19:24:24 -05:00
|
|
|
if (modes & nir_var_shader_call_data)
|
|
|
|
|
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info);
|
2020-07-28 18:01:41 -05:00
|
|
|
if (modes & nir_var_ray_hit_attrib)
|
|
|
|
|
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info);
|
2022-02-08 02:55:18 +01:00
|
|
|
if (modes & nir_var_mem_task_payload)
|
|
|
|
|
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_task_payload, type_info);
|
2023-08-01 14:35:21 +02:00
|
|
|
if (modes & nir_var_mem_node_payload)
|
|
|
|
|
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_node_payload, type_info);
|
|
|
|
|
if (modes & nir_var_mem_node_payload_in)
|
|
|
|
|
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_node_payload_in, type_info);
|
2019-04-09 20:18:11 +01:00
|
|
|
|
2023-06-22 13:27:59 -04:00
|
|
|
nir_foreach_function_impl(impl, shader) {
|
|
|
|
|
if (modes & nir_var_function_temp)
|
|
|
|
|
progress |= lower_vars_to_explicit(shader, &impl->locals, nir_var_function_temp, type_info);
|
2019-04-09 20:18:11 +01:00
|
|
|
|
2023-06-22 13:27:59 -04:00
|
|
|
progress |= nir_lower_vars_to_explicit_types_impl(impl, modes, type_info);
|
2019-04-09 20:18:11 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-18 14:43:39 -05:00
|
|
|
static void
|
2020-11-03 11:07:07 -06:00
|
|
|
write_constant(void *dst, size_t dst_size,
|
|
|
|
|
const nir_constant *c, const struct glsl_type *type)
|
2020-08-18 14:43:39 -05:00
|
|
|
{
|
2023-05-18 10:31:50 -07:00
|
|
|
if (c->is_null_constant) {
|
|
|
|
|
memset(dst, 0, dst_size);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-18 14:43:39 -05:00
|
|
|
if (glsl_type_is_vector_or_scalar(type)) {
|
|
|
|
|
const unsigned num_components = glsl_get_vector_elements(type);
|
|
|
|
|
const unsigned bit_size = glsl_get_bit_size(type);
|
|
|
|
|
if (bit_size == 1) {
|
|
|
|
|
/* Booleans are special-cased to be 32-bit
|
|
|
|
|
*
|
|
|
|
|
* TODO: Make the native bool bit_size an option.
|
|
|
|
|
*/
|
2020-11-03 11:07:07 -06:00
|
|
|
assert(num_components * 4 <= dst_size);
|
2020-08-18 14:43:39 -05:00
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
|
|
|
int32_t b32 = -(int)c->values[i].b;
|
|
|
|
|
memcpy((char *)dst + i * 4, &b32, 4);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
assert(bit_size >= 8 && bit_size % 8 == 0);
|
|
|
|
|
const unsigned byte_size = bit_size / 8;
|
2020-11-03 11:07:07 -06:00
|
|
|
assert(num_components * byte_size <= dst_size);
|
2020-08-18 14:43:39 -05:00
|
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
|
|
|
/* Annoyingly, thanks to packed structs, we can't make any
|
|
|
|
|
* assumptions about the alignment of dst. To avoid any strange
|
|
|
|
|
* issues with unaligned writes, we always use memcpy.
|
|
|
|
|
*/
|
|
|
|
|
memcpy((char *)dst + i * byte_size, &c->values[i], byte_size);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else if (glsl_type_is_array_or_matrix(type)) {
|
|
|
|
|
const unsigned array_len = glsl_get_length(type);
|
|
|
|
|
const unsigned stride = glsl_get_explicit_stride(type);
|
|
|
|
|
assert(stride > 0);
|
|
|
|
|
const struct glsl_type *elem_type = glsl_get_array_element(type);
|
2020-11-03 11:07:07 -06:00
|
|
|
for (unsigned i = 0; i < array_len; i++) {
|
|
|
|
|
unsigned elem_offset = i * stride;
|
|
|
|
|
assert(elem_offset < dst_size);
|
|
|
|
|
write_constant((char *)dst + elem_offset, dst_size - elem_offset,
|
|
|
|
|
c->elements[i], elem_type);
|
|
|
|
|
}
|
2020-08-18 14:43:39 -05:00
|
|
|
} else {
|
|
|
|
|
assert(glsl_type_is_struct_or_ifc(type));
|
|
|
|
|
const unsigned num_fields = glsl_get_length(type);
|
|
|
|
|
for (unsigned i = 0; i < num_fields; i++) {
|
|
|
|
|
const int field_offset = glsl_get_struct_field_offset(type, i);
|
2020-11-03 11:07:07 -06:00
|
|
|
assert(field_offset >= 0 && field_offset < dst_size);
|
2020-08-18 14:43:39 -05:00
|
|
|
const struct glsl_type *field_type = glsl_get_struct_field(type, i);
|
2020-11-03 11:07:07 -06:00
|
|
|
write_constant((char *)dst + field_offset, dst_size - field_offset,
|
|
|
|
|
c->elements[i], field_type);
|
2020-08-18 14:43:39 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-03 11:33:19 -06:00
|
|
|
void
|
|
|
|
|
nir_gather_explicit_io_initializers(nir_shader *shader,
|
|
|
|
|
void *dst, size_t dst_size,
|
|
|
|
|
nir_variable_mode mode)
|
|
|
|
|
{
|
|
|
|
|
/* It doesn't really make sense to gather initializers for more than one
|
|
|
|
|
* mode at a time. If this ever becomes well-defined, we can drop the
|
|
|
|
|
* assert then.
|
|
|
|
|
*/
|
|
|
|
|
assert(util_bitcount(mode) == 1);
|
|
|
|
|
|
|
|
|
|
nir_foreach_variable_with_modes(var, shader, mode) {
|
|
|
|
|
assert(var->data.driver_location < dst_size);
|
|
|
|
|
write_constant((char *)dst + var->data.driver_location,
|
|
|
|
|
dst_size - var->data.driver_location,
|
|
|
|
|
var->constant_initializer, var->type);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-07 22:35:33 -08:00
|
|
|
/**
|
2023-06-12 21:58:43 -07:00
|
|
|
* Return the offset source number for a load/store intrinsic or -1 if there's no offset.
|
2015-11-07 22:35:33 -08:00
|
|
|
*/
|
2023-06-12 21:58:43 -07:00
|
|
|
int
|
|
|
|
|
nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
|
2015-11-07 22:35:33 -08:00
|
|
|
{
|
|
|
|
|
switch (instr->intrinsic) {
|
2015-11-25 14:14:05 -08:00
|
|
|
case nir_intrinsic_load_input:
|
|
|
|
|
case nir_intrinsic_load_output:
|
2018-11-14 15:36:38 -06:00
|
|
|
case nir_intrinsic_load_shared:
|
2022-02-08 02:55:18 +01:00
|
|
|
case nir_intrinsic_load_task_payload:
|
2015-11-25 14:14:05 -08:00
|
|
|
case nir_intrinsic_load_uniform:
|
2020-12-22 23:03:19 +13:00
|
|
|
case nir_intrinsic_load_kernel_input:
|
2018-11-19 13:40:35 -06:00
|
|
|
case nir_intrinsic_load_global:
|
2022-06-22 11:28:53 +02:00
|
|
|
case nir_intrinsic_load_global_2x32:
|
2020-08-29 00:59:22 -05:00
|
|
|
case nir_intrinsic_load_global_constant:
|
2016-12-02 11:36:42 -08:00
|
|
|
case nir_intrinsic_load_scratch:
|
2019-04-03 19:29:36 -04:00
|
|
|
case nir_intrinsic_load_fs_input_interp_deltas:
|
nir: Add unified atomics
Currently, we have an atomic intrinsic for each combination of memory type
(global, shared, image, etc) and atomic operation (add, sub, etc). So for m
types of memory supported by the driver and n atomic opcodes, the driver has to
handle O(mn) intrinsics. This makes a total mess in every single backend I've
looked at, without fail.
It would be a lot nicer to unify the intrinsics. There are two obvious ways:
1. Make the memory type a constant index, keep different intrinsics for
different operations. The problem with this is that different memory types
imply different intrinsic signatures (number of sources, etc). As an
example, it doesn't make sense to unify global_atomic_amd with
global_atomic_2x32, as an example. The first takes 3 scalar sources, the
second takes 1 vector and 1 scalar. Also, in any single backend, there are a
lot more operations than there are memory types.
2. Make the opcode a constant index, keep different intrinsics for different
operations. This works well, with one exception: compswap and fcompswap
take an extra argument that other atomics don't, so there's an extra axis of
variation for the intrinsic signatures.
So, the solution is to have 2 intrinsics for each memory type -- for atomics
taking 1 argument and atomics taking 2 respectively. Both of these intrinsics
take an nir_atomic_op enum to describe its operation. We don't use a nir_op for
this purpose, as there are some atomics (cmpxchg, inc_wrap, etc) that don't
cleanly map to any ALU op and it would be weird to force it.
The plan is to transition to these new opcodes gradually. This series adds a
lowering pass producing these opcodes from the existing opcodes, so that
backends can opt-in to the new forms one-by-one. Then we can convert backends
separately without any cross-tree flag day. Once everything is converted, we can
convert the producers and core NIR as a flag day, but we have far fewer
producers than backends so this should be fine. Finally we can drop the old
stuff.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22914>
2023-05-08 15:29:31 -04:00
|
|
|
case nir_intrinsic_shared_atomic:
|
|
|
|
|
case nir_intrinsic_shared_atomic_swap:
|
|
|
|
|
case nir_intrinsic_task_payload_atomic:
|
|
|
|
|
case nir_intrinsic_task_payload_atomic_swap:
|
|
|
|
|
case nir_intrinsic_global_atomic:
|
|
|
|
|
case nir_intrinsic_global_atomic_swap:
|
2023-06-12 21:58:43 -07:00
|
|
|
return 0;
|
2016-04-14 10:31:27 -07:00
|
|
|
case nir_intrinsic_load_ubo:
|
|
|
|
|
case nir_intrinsic_load_ssbo:
|
2020-08-13 11:53:17 -04:00
|
|
|
case nir_intrinsic_load_input_vertex:
|
2015-11-25 14:14:05 -08:00
|
|
|
case nir_intrinsic_load_per_vertex_input:
|
|
|
|
|
case nir_intrinsic_load_per_vertex_output:
|
2021-05-03 12:04:01 -07:00
|
|
|
case nir_intrinsic_load_per_primitive_output:
|
nir: Add new intrinsics for fragment shader input interpolation.
Backends can normally handle shader inputs solely by looking at
load_input intrinsics, and ignore the nir_variables in nir->inputs.
One exception is fragment shader inputs. load_input doesn't capture
the necessary interpolation information - flat, smooth, noperspective
mode, and centroid, sample, or pixel for the location. This means
that backends have to interpolate based on the nir_variables, then
associate those with the load_input intrinsics (say, by storing a
map of which variables are at which locations).
With GL_ARB_enhanced_layouts, we're going to have multiple varyings
packed into a single vec4 location. The intrinsics make this easy:
simply load N components from location <loc, component>. However,
working with variables and correlating the two is very awkward; we'd
much rather have intrinsics capture all the necessary information.
Fragment shader input interpolation typically works by producing a
set of barycentric coordinates, then using those to do a linear
interpolation between the values at the triangle's corners.
We represent this by introducing five new load_barycentric_* intrinsics:
- load_barycentric_pixel (ordinary variable)
- load_barycentric_centroid (centroid qualified variable)
- load_barycentric_sample (sample qualified variable)
- load_barycentric_at_sample (ARB_gpu_shader5's interpolateAtSample())
- load_barycentric_at_offset (ARB_gpu_shader5's interpolateAtOffset())
Each of these take the interpolation mode (smooth or noperspective only)
as a const_index, and produce a vec2. The last two also take a sample
or offset source.
We then introduce a new load_interpolated_input intrinsic, which
is like a normal load_input intrinsic, but with an additional
barycentric coordinate source.
The intention is that flat inputs will still use regular load_input
intrinsics. This makes them distinguishable from normal inputs that
need fancy interpolation, while also providing all the necessary data.
This nicely unifies regular inputs and interpolateAt functions.
Qualifiers and variables become irrelevant; there are just
load_barycentric intrinsics that determine the interpolation.
v2: Document the interp_mode const_index value, define a new
BARYCENTRIC() helper rather than using SYSTEM_VALUE() for
some of them (requested by Jason Ekstrand).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-07-12 01:46:43 -07:00
|
|
|
case nir_intrinsic_load_interpolated_input:
|
2015-11-25 14:14:05 -08:00
|
|
|
case nir_intrinsic_store_output:
|
2018-11-14 15:36:38 -06:00
|
|
|
case nir_intrinsic_store_shared:
|
2022-02-08 02:55:18 +01:00
|
|
|
case nir_intrinsic_store_task_payload:
|
2018-11-19 13:40:35 -06:00
|
|
|
case nir_intrinsic_store_global:
|
2022-06-22 11:28:53 +02:00
|
|
|
case nir_intrinsic_store_global_2x32:
|
2016-12-02 11:36:42 -08:00
|
|
|
case nir_intrinsic_store_scratch:
|
nir: Add unified atomics
Currently, we have an atomic intrinsic for each combination of memory type
(global, shared, image, etc) and atomic operation (add, sub, etc). So for m
types of memory supported by the driver and n atomic opcodes, the driver has to
handle O(mn) intrinsics. This makes a total mess in every single backend I've
looked at, without fail.
It would be a lot nicer to unify the intrinsics. There are two obvious ways:
1. Make the memory type a constant index, keep different intrinsics for
different operations. The problem with this is that different memory types
imply different intrinsic signatures (number of sources, etc). As an
example, it doesn't make sense to unify global_atomic_amd with
global_atomic_2x32, as an example. The first takes 3 scalar sources, the
second takes 1 vector and 1 scalar. Also, in any single backend, there are a
lot more operations than there are memory types.
2. Make the opcode a constant index, keep different intrinsics for different
operations. This works well, with one exception: compswap and fcompswap
take an extra argument that other atomics don't, so there's an extra axis of
variation for the intrinsic signatures.
So, the solution is to have 2 intrinsics for each memory type -- for atomics
taking 1 argument and atomics taking 2 respectively. Both of these intrinsics
take an nir_atomic_op enum to describe its operation. We don't use a nir_op for
this purpose, as there are some atomics (cmpxchg, inc_wrap, etc) that don't
cleanly map to any ALU op and it would be weird to force it.
The plan is to transition to these new opcodes gradually. This series adds a
lowering pass producing these opcodes from the existing opcodes, so that
backends can opt-in to the new forms one-by-one. Then we can convert backends
separately without any cross-tree flag day. Once everything is converted, we can
convert the producers and core NIR as a flag day, but we have far fewer
producers than backends so this should be fine. Finally we can drop the old
stuff.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22914>
2023-05-08 15:29:31 -04:00
|
|
|
case nir_intrinsic_ssbo_atomic:
|
|
|
|
|
case nir_intrinsic_ssbo_atomic_swap:
|
2023-06-12 21:58:43 -07:00
|
|
|
return 1;
|
2016-04-14 10:31:27 -07:00
|
|
|
case nir_intrinsic_store_ssbo:
|
2015-11-25 14:14:05 -08:00
|
|
|
case nir_intrinsic_store_per_vertex_output:
|
2021-05-03 12:04:01 -07:00
|
|
|
case nir_intrinsic_store_per_primitive_output:
|
2023-06-12 21:58:43 -07:00
|
|
|
return 2;
|
2015-11-07 22:35:33 -08:00
|
|
|
default:
|
2023-06-12 21:58:43 -07:00
|
|
|
return -1;
|
2015-11-07 22:35:33 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2023-06-12 21:58:43 -07:00
|
|
|
* Return the offset source for a load/store intrinsic.
|
2015-11-07 22:35:33 -08:00
|
|
|
*/
|
|
|
|
|
nir_src *
|
2023-06-12 21:58:43 -07:00
|
|
|
nir_get_io_offset_src(nir_intrinsic_instr *instr)
|
|
|
|
|
{
|
|
|
|
|
const int idx = nir_get_io_offset_src_number(instr);
|
|
|
|
|
return idx >= 0 ? &instr->src[idx] : NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return the vertex index source number for a load/store per_vertex intrinsic or -1 if there's no offset.
|
|
|
|
|
*/
|
|
|
|
|
int
|
|
|
|
|
nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr *instr)
|
2015-11-07 22:35:33 -08:00
|
|
|
{
|
|
|
|
|
switch (instr->intrinsic) {
|
|
|
|
|
case nir_intrinsic_load_per_vertex_input:
|
|
|
|
|
case nir_intrinsic_load_per_vertex_output:
|
2021-10-14 18:14:12 +02:00
|
|
|
case nir_intrinsic_load_per_primitive_output:
|
2023-06-12 21:58:43 -07:00
|
|
|
return 0;
|
2015-11-07 22:35:33 -08:00
|
|
|
case nir_intrinsic_store_per_vertex_output:
|
2021-10-14 18:14:12 +02:00
|
|
|
case nir_intrinsic_store_per_primitive_output:
|
2023-06-12 21:58:43 -07:00
|
|
|
return 1;
|
2015-11-07 22:35:33 -08:00
|
|
|
default:
|
2023-06-12 21:58:43 -07:00
|
|
|
return -1;
|
2015-11-07 22:35:33 -08:00
|
|
|
}
|
|
|
|
|
}
|
2019-05-01 14:44:15 -07:00
|
|
|
|
2023-06-12 21:58:43 -07:00
|
|
|
/**
|
|
|
|
|
* Return the vertex index source for a load/store per_vertex intrinsic.
|
|
|
|
|
*/
|
|
|
|
|
nir_src *
|
|
|
|
|
nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr)
|
|
|
|
|
{
|
|
|
|
|
const int idx = nir_get_io_arrayed_index_src_number(instr);
|
|
|
|
|
return idx >= 0 ? &instr->src[idx] : NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2019-05-01 14:44:15 -07:00
|
|
|
/**
|
|
|
|
|
* Return the numeric constant that identify a NULL pointer for each address
|
|
|
|
|
* format.
|
|
|
|
|
*/
|
|
|
|
|
const nir_const_value *
|
|
|
|
|
nir_address_format_null_value(nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
|
2023-08-08 12:00:35 -05:00
|
|
|
[nir_address_format_32bit_global] = { { 0 } },
|
|
|
|
|
[nir_address_format_2x32bit_global] = { { 0 } },
|
|
|
|
|
[nir_address_format_64bit_global] = { { 0 } },
|
|
|
|
|
[nir_address_format_64bit_global_32bit_offset] = { { 0 } },
|
|
|
|
|
[nir_address_format_64bit_bounded_global] = { { 0 } },
|
|
|
|
|
[nir_address_format_32bit_index_offset] = { { .u32 = ~0 }, { .u32 = ~0 } },
|
|
|
|
|
[nir_address_format_32bit_index_offset_pack64] = { { .u64 = ~0ull } },
|
|
|
|
|
[nir_address_format_vec2_index_32bit_offset] = { { .u32 = ~0 }, { .u32 = ~0 }, { .u32 = ~0 } },
|
|
|
|
|
[nir_address_format_32bit_offset] = { { .u32 = ~0 } },
|
|
|
|
|
[nir_address_format_32bit_offset_as_64bit] = { { .u64 = ~0ull } },
|
|
|
|
|
[nir_address_format_62bit_generic] = { { .u64 = 0 } },
|
|
|
|
|
[nir_address_format_logical] = { { .u32 = ~0 } },
|
2019-05-01 14:44:15 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
assert(addr_format < ARRAY_SIZE(null_values));
|
|
|
|
|
return null_values[addr_format];
|
|
|
|
|
}
|
2019-05-16 15:11:07 -07:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *
|
|
|
|
|
nir_build_addr_ieq(nir_builder *b, nir_def *addr0, nir_def *addr1,
|
2019-05-16 15:11:07 -07:00
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
switch (addr_format) {
|
|
|
|
|
case nir_address_format_32bit_global:
|
2022-06-22 11:28:53 +02:00
|
|
|
case nir_address_format_2x32bit_global:
|
2019-05-16 15:11:07 -07:00
|
|
|
case nir_address_format_64bit_global:
|
|
|
|
|
case nir_address_format_64bit_bounded_global:
|
|
|
|
|
case nir_address_format_32bit_index_offset:
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
case nir_address_format_vec2_index_32bit_offset:
|
2019-05-16 15:11:07 -07:00
|
|
|
case nir_address_format_32bit_offset:
|
2020-08-15 00:57:14 -05:00
|
|
|
case nir_address_format_62bit_generic:
|
2019-05-16 15:11:07 -07:00
|
|
|
return nir_ball_iequal(b, addr0, addr1);
|
|
|
|
|
|
2021-01-20 14:27:32 -06:00
|
|
|
case nir_address_format_64bit_global_32bit_offset:
|
|
|
|
|
return nir_ball_iequal(b, nir_channels(b, addr0, 0xb),
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_channels(b, addr1, 0xb));
|
2021-01-20 14:27:32 -06:00
|
|
|
|
2020-05-25 08:57:14 -07:00
|
|
|
case nir_address_format_32bit_offset_as_64bit:
|
|
|
|
|
assert(addr0->num_components == 1 && addr1->num_components == 1);
|
|
|
|
|
return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1));
|
|
|
|
|
|
2020-05-25 08:53:00 -07:00
|
|
|
case nir_address_format_32bit_index_offset_pack64:
|
|
|
|
|
assert(addr0->num_components == 1 && addr1->num_components == 1);
|
|
|
|
|
return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1));
|
|
|
|
|
|
2019-05-16 15:11:07 -07:00
|
|
|
case nir_address_format_logical:
|
|
|
|
|
unreachable("Unsupported address format");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unreachable("Invalid address format");
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *
|
|
|
|
|
nir_build_addr_isub(nir_builder *b, nir_def *addr0, nir_def *addr1,
|
2019-05-16 15:11:07 -07:00
|
|
|
nir_address_format addr_format)
|
|
|
|
|
{
|
|
|
|
|
switch (addr_format) {
|
|
|
|
|
case nir_address_format_32bit_global:
|
|
|
|
|
case nir_address_format_64bit_global:
|
|
|
|
|
case nir_address_format_32bit_offset:
|
2020-05-25 08:53:00 -07:00
|
|
|
case nir_address_format_32bit_index_offset_pack64:
|
2020-08-15 00:57:14 -05:00
|
|
|
case nir_address_format_62bit_generic:
|
2019-05-16 15:11:07 -07:00
|
|
|
assert(addr0->num_components == 1);
|
|
|
|
|
assert(addr1->num_components == 1);
|
|
|
|
|
return nir_isub(b, addr0, addr1);
|
|
|
|
|
|
2022-06-22 11:28:53 +02:00
|
|
|
case nir_address_format_2x32bit_global:
|
|
|
|
|
return nir_isub(b, addr_to_global(b, addr0, addr_format),
|
2023-08-08 12:00:35 -05:00
|
|
|
addr_to_global(b, addr1, addr_format));
|
2022-06-22 11:28:53 +02:00
|
|
|
|
2020-05-25 08:57:14 -07:00
|
|
|
case nir_address_format_32bit_offset_as_64bit:
|
|
|
|
|
assert(addr0->num_components == 1);
|
|
|
|
|
assert(addr1->num_components == 1);
|
|
|
|
|
return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)));
|
|
|
|
|
|
2021-01-20 14:27:32 -06:00
|
|
|
case nir_address_format_64bit_global_32bit_offset:
|
2019-05-16 15:11:07 -07:00
|
|
|
case nir_address_format_64bit_bounded_global:
|
|
|
|
|
return nir_isub(b, addr_to_global(b, addr0, addr_format),
|
2023-08-08 12:00:35 -05:00
|
|
|
addr_to_global(b, addr1, addr_format));
|
2019-05-16 15:11:07 -07:00
|
|
|
|
|
|
|
|
case nir_address_format_32bit_index_offset:
|
|
|
|
|
assert(addr0->num_components == 2);
|
|
|
|
|
assert(addr1->num_components == 2);
|
|
|
|
|
/* Assume the same buffer index. */
|
|
|
|
|
return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
|
|
|
|
|
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 19:47:57 +02:00
|
|
|
case nir_address_format_vec2_index_32bit_offset:
|
|
|
|
|
assert(addr0->num_components == 3);
|
|
|
|
|
assert(addr1->num_components == 3);
|
|
|
|
|
/* Assume the same buffer index. */
|
|
|
|
|
return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2));
|
|
|
|
|
|
2019-05-16 15:11:07 -07:00
|
|
|
case nir_address_format_logical:
|
|
|
|
|
unreachable("Unsupported address format");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unreachable("Invalid address format");
|
|
|
|
|
}
|
2019-05-14 12:10:11 +02:00
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
is_input(nir_intrinsic_instr *intrin)
|
|
|
|
|
{
|
|
|
|
|
return intrin->intrinsic == nir_intrinsic_load_input ||
|
2022-06-22 09:13:46 +02:00
|
|
|
intrin->intrinsic == nir_intrinsic_load_input_vertex ||
|
2019-05-14 12:10:11 +02:00
|
|
|
intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
is_output(nir_intrinsic_instr *intrin)
|
|
|
|
|
{
|
|
|
|
|
return intrin->intrinsic == nir_intrinsic_load_output ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
|
2021-05-03 12:04:01 -07:00
|
|
|
intrin->intrinsic == nir_intrinsic_load_per_primitive_output ||
|
2019-05-14 12:10:11 +02:00
|
|
|
intrin->intrinsic == nir_intrinsic_store_output ||
|
2021-05-03 12:04:01 -07:00
|
|
|
intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_store_per_primitive_output;
|
2019-05-14 12:10:11 +02:00
|
|
|
}
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
static bool
|
|
|
|
|
is_dual_slot(nir_intrinsic_instr *intrin)
|
2020-08-14 19:33:50 -04:00
|
|
|
{
|
|
|
|
|
if (intrin->intrinsic == nir_intrinsic_store_output ||
|
2021-10-21 11:22:34 +02:00
|
|
|
intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_store_per_primitive_output) {
|
2020-08-14 19:33:50 -04:00
|
|
|
return nir_src_bit_size(intrin->src[0]) == 64 &&
|
|
|
|
|
nir_src_num_components(intrin->src[0]) >= 3;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-14 11:56:00 -05:00
|
|
|
return intrin->def.bit_size == 64 &&
|
|
|
|
|
intrin->def.num_components >= 3;
|
2020-08-14 19:33:50 -04:00
|
|
|
}
|
2019-05-14 12:10:11 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* This pass adds constant offsets to instr->const_index[0] for input/output
|
|
|
|
|
* intrinsics, and resets the offset source to 0. Non-constant offsets remain
|
|
|
|
|
* unchanged - since we don't know what part of a compound variable is
|
|
|
|
|
* accessed, we allocate storage for the entire thing. For drivers that use
|
|
|
|
|
* nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
|
|
|
|
|
* the offset source will be 0, so that they don't have to add it in manually.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
add_const_offset_to_base_block(nir_block *block, nir_builder *b,
|
2020-09-28 13:13:49 +02:00
|
|
|
nir_variable_mode modes)
|
2019-05-14 12:10:11 +02:00
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
|
|
2020-09-28 13:13:49 +02:00
|
|
|
if (((modes & nir_var_shader_in) && is_input(intrin)) ||
|
|
|
|
|
((modes & nir_var_shader_out) && is_output(intrin))) {
|
2021-10-21 11:24:20 +02:00
|
|
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
|
|
|
|
|
|
|
|
|
|
/* NV_mesh_shader: ignore MS primitive indices. */
|
|
|
|
|
if (b->shader->info.stage == MESA_SHADER_MESH &&
|
|
|
|
|
sem.location == VARYING_SLOT_PRIMITIVE_INDICES &&
|
|
|
|
|
!(b->shader->info.per_primitive_outputs &
|
|
|
|
|
BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES)))
|
|
|
|
|
continue;
|
|
|
|
|
|
2019-05-14 12:10:11 +02:00
|
|
|
nir_src *offset = nir_get_io_offset_src(intrin);
|
|
|
|
|
|
2020-09-29 12:02:41 +02:00
|
|
|
/* TODO: Better handling of per-view variables here */
|
|
|
|
|
if (nir_src_is_const(*offset) &&
|
|
|
|
|
!nir_intrinsic_io_semantics(intrin).per_view) {
|
2020-08-14 19:33:50 -04:00
|
|
|
unsigned off = nir_src_as_uint(*offset);
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off);
|
|
|
|
|
|
|
|
|
|
sem.location += off;
|
|
|
|
|
/* non-indirect indexing should reduce num_slots */
|
|
|
|
|
sem.num_slots = is_dual_slot(intrin) ? 2 : 1;
|
|
|
|
|
nir_intrinsic_set_io_semantics(intrin, sem);
|
|
|
|
|
|
2019-05-14 12:10:11 +02:00
|
|
|
b->cursor = nir_before_instr(&intrin->instr);
|
2023-08-17 16:27:15 -05:00
|
|
|
nir_src_rewrite(offset, nir_imm_int(b, 0));
|
2019-05-14 12:10:11 +02:00
|
|
|
progress = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
2020-09-28 13:13:49 +02:00
|
|
|
nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes)
|
2019-05-14 12:10:11 +02:00
|
|
|
{
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
2023-06-22 13:27:59 -04:00
|
|
|
nir_foreach_function_impl(impl, nir) {
|
|
|
|
|
bool impl_progress = false;
|
|
|
|
|
nir_builder b = nir_builder_create(impl);
|
|
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
|
impl_progress |= add_const_offset_to_base_block(block, &b, modes);
|
2019-05-14 12:10:11 +02:00
|
|
|
}
|
2023-06-22 13:27:59 -04:00
|
|
|
progress |= impl_progress;
|
|
|
|
|
if (impl_progress)
|
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
|
|
|
|
|
else
|
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_all);
|
2019-05-14 12:10:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-09 21:30:41 -05:00
|
|
|
bool
|
2022-01-01 05:32:14 -05:00
|
|
|
nir_lower_color_inputs(nir_shader *nir)
|
|
|
|
|
{
|
|
|
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
2023-06-26 10:42:29 -04:00
|
|
|
nir_builder b = nir_builder_create(impl);
|
2022-01-01 05:32:14 -05:00
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
2022-01-01 05:32:14 -05:00
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
|
|
2023-03-09 21:30:41 -05:00
|
|
|
if (intrin->intrinsic != nir_intrinsic_load_input &&
|
|
|
|
|
intrin->intrinsic != nir_intrinsic_load_interpolated_input)
|
2022-01-01 05:32:14 -05:00
|
|
|
continue;
|
|
|
|
|
|
2023-03-09 21:30:41 -05:00
|
|
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
|
|
|
|
|
|
|
|
|
|
if (sem.location != VARYING_SLOT_COL0 &&
|
|
|
|
|
sem.location != VARYING_SLOT_COL1)
|
2022-01-01 05:32:14 -05:00
|
|
|
continue;
|
|
|
|
|
|
2023-03-09 21:30:41 -05:00
|
|
|
/* Default to FLAT (for load_input) */
|
|
|
|
|
enum glsl_interp_mode interp = INTERP_MODE_FLAT;
|
|
|
|
|
bool sample = false;
|
|
|
|
|
bool centroid = false;
|
|
|
|
|
|
|
|
|
|
if (intrin->intrinsic == nir_intrinsic_load_interpolated_input) {
|
|
|
|
|
nir_intrinsic_instr *baryc =
|
|
|
|
|
nir_instr_as_intrinsic(intrin->src[0].ssa->parent_instr);
|
|
|
|
|
|
|
|
|
|
centroid =
|
|
|
|
|
baryc->intrinsic == nir_intrinsic_load_barycentric_centroid;
|
|
|
|
|
sample =
|
|
|
|
|
baryc->intrinsic == nir_intrinsic_load_barycentric_sample;
|
|
|
|
|
assert(centroid || sample ||
|
|
|
|
|
baryc->intrinsic == nir_intrinsic_load_barycentric_pixel);
|
|
|
|
|
|
|
|
|
|
interp = nir_intrinsic_interp_mode(baryc);
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-01 05:32:14 -05:00
|
|
|
b.cursor = nir_before_instr(instr);
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *load = NULL;
|
2023-03-09 21:30:41 -05:00
|
|
|
|
|
|
|
|
if (sem.location == VARYING_SLOT_COL0) {
|
|
|
|
|
load = nir_load_color0(&b);
|
|
|
|
|
nir->info.fs.color0_interp = interp;
|
|
|
|
|
nir->info.fs.color0_sample = sample;
|
|
|
|
|
nir->info.fs.color0_centroid = centroid;
|
2022-01-01 05:32:14 -05:00
|
|
|
} else {
|
2023-03-09 21:30:41 -05:00
|
|
|
assert(sem.location == VARYING_SLOT_COL1);
|
|
|
|
|
load = nir_load_color1(&b);
|
|
|
|
|
nir->info.fs.color1_interp = interp;
|
|
|
|
|
nir->info.fs.color1_sample = sample;
|
|
|
|
|
nir->info.fs.color1_centroid = centroid;
|
2022-01-01 05:32:14 -05:00
|
|
|
}
|
|
|
|
|
|
2023-03-09 21:30:41 -05:00
|
|
|
if (intrin->num_components != 4) {
|
|
|
|
|
unsigned start = nir_intrinsic_component(intrin);
|
|
|
|
|
unsigned count = intrin->num_components;
|
|
|
|
|
load = nir_channels(&b, load, BITFIELD_RANGE(start, count));
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-14 11:56:00 -05:00
|
|
|
nir_def_rewrite_uses(&intrin->def, load);
|
2022-01-01 05:32:14 -05:00
|
|
|
nir_instr_remove(instr);
|
|
|
|
|
progress = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (progress) {
|
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_dominance |
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_metadata_block_index);
|
2022-01-01 05:32:14 -05:00
|
|
|
} else {
|
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_all);
|
|
|
|
|
}
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-30 11:43:03 -04:00
|
|
|
bool
|
|
|
|
|
nir_io_add_intrinsic_xfb_info(nir_shader *nir)
|
2022-01-01 05:32:14 -05:00
|
|
|
{
|
|
|
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++)
|
2022-06-01 13:32:20 -05:00
|
|
|
nir->info.xfb_stride[i] = nir->xfb_info->buffers[i].stride / 4;
|
2022-01-01 05:32:14 -05:00
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
2022-01-01 05:32:14 -05:00
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
|
|
|
|
|
|
|
|
if (!nir_intrinsic_has_io_xfb(intr))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* No indirect indexing allowed. The index is implied to be 0. */
|
|
|
|
|
ASSERTED nir_src offset = *nir_get_io_offset_src(intr);
|
|
|
|
|
assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
|
|
|
|
|
|
|
|
|
|
/* Calling this pass for the second time shouldn't do anything. */
|
|
|
|
|
if (nir_intrinsic_io_xfb(intr).out[0].num_components ||
|
|
|
|
|
nir_intrinsic_io_xfb(intr).out[1].num_components ||
|
|
|
|
|
nir_intrinsic_io_xfb2(intr).out[0].num_components ||
|
|
|
|
|
nir_intrinsic_io_xfb2(intr).out[1].num_components)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
2023-08-08 12:00:35 -05:00
|
|
|
unsigned writemask = nir_intrinsic_write_mask(intr) << nir_intrinsic_component(intr);
|
2022-01-01 05:32:14 -05:00
|
|
|
|
|
|
|
|
nir_io_xfb xfb[2];
|
|
|
|
|
memset(xfb, 0, sizeof(xfb));
|
|
|
|
|
|
2022-05-27 13:36:50 -05:00
|
|
|
for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
|
|
|
|
|
nir_xfb_output_info *out = &nir->xfb_info->outputs[i];
|
|
|
|
|
if (out->location == sem.location) {
|
2022-01-01 05:32:14 -05:00
|
|
|
unsigned xfb_mask = writemask & out->component_mask;
|
|
|
|
|
|
|
|
|
|
/*fprintf(stdout, "output%u: buffer=%u, offset=%u, location=%u, "
|
|
|
|
|
"component_offset=%u, component_mask=0x%x, xfb_mask=0x%x, slots=%u\n",
|
|
|
|
|
i, out->buffer,
|
|
|
|
|
out->offset,
|
|
|
|
|
out->location,
|
|
|
|
|
out->component_offset,
|
|
|
|
|
out->component_mask,
|
|
|
|
|
xfb_mask, sem.num_slots);*/
|
|
|
|
|
|
|
|
|
|
while (xfb_mask) {
|
|
|
|
|
int start, count;
|
|
|
|
|
u_bit_scan_consecutive_range(&xfb_mask, &start, &count);
|
|
|
|
|
|
|
|
|
|
xfb[start / 2].out[start % 2].num_components = count;
|
|
|
|
|
xfb[start / 2].out[start % 2].buffer = out->buffer;
|
|
|
|
|
/* out->offset is relative to the first stored xfb component */
|
|
|
|
|
/* start is relative to component 0 */
|
|
|
|
|
xfb[start / 2].out[start % 2].offset =
|
|
|
|
|
out->offset / 4 - out->component_offset + start;
|
|
|
|
|
|
|
|
|
|
progress = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_set_io_xfb(intr, xfb[0]);
|
|
|
|
|
nir_intrinsic_set_io_xfb2(intr, xfb[1]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_all);
|
|
|
|
|
return progress;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
type_size_vec4(const struct glsl_type *type, bool bindless)
|
|
|
|
|
{
|
|
|
|
|
return glsl_count_attribute_slots(type, false);
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-28 14:59:09 -05:00
|
|
|
/**
|
|
|
|
|
* This runs all compiler passes needed to lower IO, lower indirect IO access,
|
|
|
|
|
* set transform feedback info in IO intrinsics, and clean up the IR.
|
2023-05-04 15:45:49 -04:00
|
|
|
*
|
|
|
|
|
* \param renumber_vs_inputs
|
|
|
|
|
* Set to true if holes between VS inputs should be removed, which is safe
|
|
|
|
|
* to do in any shader linker that can handle that. Set to false if you want
|
|
|
|
|
* to keep holes between VS inputs, which is recommended to do in gallium
|
|
|
|
|
* drivers so as not to break the mapping of vertex elements to VS inputs
|
|
|
|
|
* expected by gallium frontends.
|
2023-02-28 14:59:09 -05:00
|
|
|
*/
|
2022-01-01 05:32:14 -05:00
|
|
|
void
|
2023-05-04 15:45:49 -04:00
|
|
|
nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs)
|
2022-01-01 05:32:14 -05:00
|
|
|
{
|
2023-12-30 16:27:07 -05:00
|
|
|
if (nir->info.stage == MESA_SHADER_COMPUTE)
|
2022-01-01 05:32:14 -05:00
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
bool has_indirect_inputs =
|
|
|
|
|
(nir->options->support_indirect_inputs >> nir->info.stage) & 0x1;
|
|
|
|
|
|
|
|
|
|
/* Transform feedback requires that indirect outputs are lowered. */
|
|
|
|
|
bool has_indirect_outputs =
|
2022-05-27 13:36:50 -05:00
|
|
|
(nir->options->support_indirect_outputs >> nir->info.stage) & 0x1 &&
|
|
|
|
|
nir->xfb_info == NULL;
|
2022-01-01 05:32:14 -05:00
|
|
|
|
2023-09-01 18:56:10 -04:00
|
|
|
/* TODO: Sorting variables by location is required due to some bug
|
|
|
|
|
* in nir_lower_io_to_temporaries. If variables are not sorted,
|
|
|
|
|
* dEQP-GLES31.functional.separate_shader.random.0 fails.
|
|
|
|
|
*
|
|
|
|
|
* This isn't needed if nir_assign_io_var_locations is called because it
|
|
|
|
|
* also sorts variables. However, if IO is lowered sooner than that, we
|
|
|
|
|
* must sort explicitly here to get what nir_assign_io_var_locations does.
|
|
|
|
|
*/
|
|
|
|
|
unsigned varying_var_mask =
|
|
|
|
|
(nir->info.stage != MESA_SHADER_VERTEX ? nir_var_shader_in : 0) |
|
|
|
|
|
(nir->info.stage != MESA_SHADER_FRAGMENT ? nir_var_shader_out : 0);
|
|
|
|
|
nir_sort_variables_by_location(nir, varying_var_mask);
|
|
|
|
|
|
2022-01-01 05:32:14 -05:00
|
|
|
if (!has_indirect_inputs || !has_indirect_outputs) {
|
|
|
|
|
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
|
|
|
|
|
nir_shader_get_entrypoint(nir), !has_indirect_outputs,
|
|
|
|
|
!has_indirect_inputs);
|
|
|
|
|
|
|
|
|
|
/* We need to lower all the copy_deref's introduced by lower_io_to-
|
|
|
|
|
* _temporaries before calling nir_lower_io.
|
|
|
|
|
*/
|
|
|
|
|
NIR_PASS_V(nir, nir_split_var_copies);
|
|
|
|
|
NIR_PASS_V(nir, nir_lower_var_copies);
|
|
|
|
|
NIR_PASS_V(nir, nir_lower_global_vars_to_local);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in,
|
|
|
|
|
type_size_vec4, nir_lower_io_lower_64bit_to_32);
|
|
|
|
|
|
2023-08-31 10:54:39 -04:00
|
|
|
/* nir_io_add_const_offset_to_base needs actual constants. */
|
|
|
|
|
NIR_PASS_V(nir, nir_opt_constant_folding);
|
|
|
|
|
NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
|
|
|
|
|
|
|
|
|
|
/* Lower and remove dead derefs and variables to clean up the IR. */
|
|
|
|
|
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
|
|
|
|
|
NIR_PASS_V(nir, nir_opt_dce);
|
|
|
|
|
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
|
|
|
|
|
2023-02-28 14:59:09 -05:00
|
|
|
/* If IO is lowered before var->data.driver_location is assigned, driver
|
|
|
|
|
* locations are all 0, which means IO bases are all 0. It's not necessary
|
|
|
|
|
* to set driver_location before lowering IO because the only thing that
|
|
|
|
|
* identifies outputs is their semantic, and IO bases can always be
|
|
|
|
|
* computed from the semantics.
|
|
|
|
|
*
|
|
|
|
|
* This assigns IO bases from scratch, using IO semantics to tell which
|
|
|
|
|
* intrinsics refer to the same IO. If the bases already exist, they
|
|
|
|
|
* will be reassigned, sorted by the semantic, and all holes removed.
|
|
|
|
|
* This kind of canonicalizes all bases.
|
2023-08-31 10:54:39 -04:00
|
|
|
*
|
|
|
|
|
* This must be done after DCE to remove dead load_input intrinsics.
|
2023-02-28 14:59:09 -05:00
|
|
|
*/
|
|
|
|
|
NIR_PASS_V(nir, nir_recompute_io_bases,
|
2023-08-31 10:54:39 -04:00
|
|
|
(nir->info.stage != MESA_SHADER_VERTEX || renumber_vs_inputs ?
|
|
|
|
|
nir_var_shader_in : 0) | nir_var_shader_out);
|
2022-01-01 05:32:14 -05:00
|
|
|
|
2022-05-27 13:36:50 -05:00
|
|
|
if (nir->xfb_info)
|
2022-05-30 11:43:03 -04:00
|
|
|
NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
|
2022-01-01 05:32:14 -05:00
|
|
|
|
2023-12-30 16:43:35 -05:00
|
|
|
if (nir->options->lower_mediump_io)
|
|
|
|
|
nir->options->lower_mediump_io(nir);
|
|
|
|
|
|
2022-01-01 05:32:14 -05:00
|
|
|
nir->info.io_lowered = true;
|
|
|
|
|
}
|