nir: treat per-view outputs as arrayed IO

This is needed for implementing multiview in panvk, where the address
calculation for multiview outputs is not well-represented by lowering to
nir_intrinsic_store_output with a single offset.

The case where a variable is both per-view and per-{vertex,primitive} is
now unsupported. This would come up with drivers implementing
NV_mesh_shader or using nir_lower_multiview on geometry, tessellation,
or mesh shaders. No drivers currently do either of these. There was some
code that attempted to handle the nested per-view case by unwrapping
per-view/arrayed types twice, but it's unclear to what extent this
actually worked.

ANV and Turnip both rely on per-view outputs being assigned a unique
driver location for each view, so I've added on option to configure that
behavior rather than removing it.

Signed-off-by: Benjamin Lee <benjamin.lee@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31704>
This commit is contained in:
Benjamin Lee 2024-10-19 21:03:13 -07:00 committed by Marge Bot
parent 6d843cde45
commit becb014d27
35 changed files with 191 additions and 67 deletions

View file

@ -77,7 +77,7 @@ static const struct glsl_type *
get_varying_type(const nir_variable *var, gl_shader_stage stage)
{
const struct glsl_type *type = var->type;
if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
if (nir_is_arrayed_io(var, stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
@ -3387,7 +3387,7 @@ set_variable_io_mask(BITSET_WORD *bits, nir_variable *var, gl_shader_stage stage
assert(var->data.location >= VARYING_SLOT_VAR0);
const struct glsl_type *type = var->type;
if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
if (nir_is_arrayed_io(var, stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
@ -3536,7 +3536,7 @@ remove_unused_io_vars(nir_shader *producer, nir_shader *consumer,
unsigned location = var->data.location - VARYING_SLOT_VAR0;
const struct glsl_type *type = var->type;
if (nir_is_arrayed_io(var, shader->info.stage) || var->data.per_view) {
if (nir_is_arrayed_io(var, shader->info.stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
@ -3606,7 +3606,7 @@ remove_unused_varyings(nir_shader *producer, nir_shader *consumer,
continue;
const struct glsl_type *type = var->type;
if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
if (nir_is_arrayed_io(var, producer->info.stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
@ -3622,7 +3622,7 @@ remove_unused_varyings(nir_shader *producer, nir_shader *consumer,
continue;
const struct glsl_type *type = var->type;
if (nir_is_arrayed_io(var, consumer->info.stage) || var->data.per_view) {
if (nir_is_arrayed_io(var, consumer->info.stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}

View file

@ -222,7 +222,7 @@ lower_packed_varying_needs_lowering(nir_shader *shader, nir_variable *var,
return false;
const struct glsl_type *type = var->type;
if (nir_is_arrayed_io(var, shader->info.stage) || var->data.per_view) {
if (nir_is_arrayed_io(var, shader->info.stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}

View file

@ -4377,6 +4377,15 @@ typedef struct nir_shader_compiler_options {
/** Whether derivative intrinsics must be scalarized. */
bool scalarize_ddx;
/**
* Assign a range of driver locations to per-view outputs, with unique
* slots for each view. If unset, per-view outputs will be treated
* similarly to other arrayed IO, and only slots for one view will be
* assigned. Regardless of this setting, per-view outputs are only assigned
* slots for one value in var->data.location.
*/
bool per_view_unique_driver_locations;
/** Options determining lowering and behavior of inputs and outputs. */
nir_io_options io_options;

View file

@ -418,6 +418,12 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
unreachable("Invalid stage for load_output");
}
break;
case nir_intrinsic_load_per_view_output:
is_divergent = instr->src[0].ssa->divergent ||
instr->src[1].ssa->divergent ||
(stage == MESA_SHADER_TESS_CTRL &&
!(options & nir_divergence_single_patch_per_tcs_subgroup));
break;
case nir_intrinsic_load_per_vertex_output:
/* TCS and NV_mesh_shader only (EXT_mesh_shader does not allow loading outputs). */
assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH);

View file

@ -221,11 +221,6 @@ mark_whole_variable(nir_shader *shader, nir_variable *var,
type = glsl_get_array_element(type);
}
if (var->data.per_view) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
const unsigned slots = nir_variable_count_slots(var, type);
set_io_mask(shader, var, 0, slots, deref, is_output_read);
}
@ -578,6 +573,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
case nir_intrinsic_load_output:
case nir_intrinsic_load_per_vertex_output:
case nir_intrinsic_load_per_view_output:
case nir_intrinsic_load_per_primitive_output:
if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
instr->intrinsic == nir_intrinsic_load_output &&
@ -613,6 +609,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_per_view_output:
case nir_intrinsic_store_per_primitive_output:
if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
instr->intrinsic == nir_intrinsic_store_output &&

View file

@ -1182,6 +1182,11 @@ load("ssbo_address", [1, 1], [], [CAN_ELIMINATE, CAN_REORDER])
load("output", [1], [BASE, RANGE, COMPONENT, DEST_TYPE, IO_SEMANTICS], flags=[CAN_ELIMINATE])
# src[] = { vertex, offset }.
load("per_vertex_output", [1, 1], [BASE, RANGE, COMPONENT, DEST_TYPE, IO_SEMANTICS], [CAN_ELIMINATE])
# src[] = { view_index, offset }.
# view_index is "compacted", meaning it is the index of the Nth *enabled* view,
# not the Nth absolute view. See the nir_lower_multiview docs for a more
# detailed explanation.
load("per_view_output", [1, 1], [BASE, RANGE, COMPONENT, DEST_TYPE, IO_SEMANTICS], [CAN_ELIMINATE])
# src[] = { primitive, offset }.
load("per_primitive_output", [1, 1], [BASE, COMPONENT, DEST_TYPE, IO_SEMANTICS], [CAN_ELIMINATE])
# src[] = { offset }.
@ -1223,6 +1228,8 @@ def store(name, srcs, indices=[], flags=[]):
store("output", [1], [BASE, RANGE, WRITE_MASK, COMPONENT, SRC_TYPE, IO_SEMANTICS, IO_XFB, IO_XFB2])
# src[] = { value, vertex, offset }.
store("per_vertex_output", [1, 1], [BASE, RANGE, WRITE_MASK, COMPONENT, SRC_TYPE, IO_SEMANTICS])
# src[] = { value, view_index, offset }.
store("per_view_output", [1, 1], [BASE, RANGE, WRITE_MASK, COMPONENT, SRC_TYPE, IO_SEMANTICS])
# src[] = { value, primitive, offset }.
store("per_primitive_output", [1, 1], [BASE, RANGE, WRITE_MASK, COMPONENT, SRC_TYPE, IO_SEMANTICS])
# src[] = { value, block_index, offset }

View file

@ -49,7 +49,7 @@ get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
assert(location < 64);
const struct glsl_type *type = var->type;
if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
if (nir_is_arrayed_io(var, stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
@ -337,7 +337,7 @@ get_unmoveable_components_masks(nir_shader *shader,
var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
const struct glsl_type *type = var->type;
if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
if (nir_is_arrayed_io(var, stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
@ -438,7 +438,7 @@ remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
const struct glsl_type *type = var->type;
if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
if (nir_is_arrayed_io(var, stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
@ -578,7 +578,7 @@ gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
continue;
const struct glsl_type *type = var->type;
if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
if (nir_is_arrayed_io(var, producer->info.stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
@ -641,8 +641,7 @@ gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
if (!vc_info->initialised) {
const struct glsl_type *type = in_var->type;
if (nir_is_arrayed_io(in_var, consumer->info.stage) ||
in_var->data.per_view) {
if (nir_is_arrayed_io(in_var, consumer->info.stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
@ -1539,18 +1538,17 @@ nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
last_partial = false;
}
/* per-view variables have an extra array dimension, which is ignored
* when counting user-facing slots (var->data.location), but *not*
* with driver slots (var->data.driver_location). That is, each user
* slot maps to multiple driver slots.
*/
driver_size = glsl_count_attribute_slots(type, false);
if (var->data.per_view) {
assert(glsl_type_is_array(type));
var_size =
glsl_count_attribute_slots(glsl_get_array_element(type), false);
var_size = glsl_count_attribute_slots(type, false);
if (var->data.per_view &&
shader->options->per_view_unique_driver_locations) {
/* per-view variables have an extra array dimension, which is
* ignored when counting user-facing slots (var->data.location),
* but *not* with driver slots (var->data.driver_location). That
* is, each user slot maps to multiple driver slots. */
const struct glsl_type *array_type = var->type;
driver_size = glsl_count_attribute_slots(array_type, false);
} else {
var_size = driver_size;
driver_size = var_size;
}
}

View file

@ -64,6 +64,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, nir_shader *shader)
loc = out->data.location;
break;
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_view_output:
loc = nir_intrinsic_io_semantics(intr).location;
break;
default:

View file

@ -165,6 +165,7 @@ find_output(nir_builder *b, unsigned location)
if ((intr->intrinsic == nir_intrinsic_store_output ||
intr->intrinsic == nir_intrinsic_store_per_vertex_output ||
intr->intrinsic == nir_intrinsic_store_per_view_output ||
intr->intrinsic == nir_intrinsic_store_per_primitive_output) &&
nir_intrinsic_io_semantics(intr).location == location) {
assert(nir_src_is_const(*nir_get_io_offset_src(intr)));

View file

@ -105,11 +105,6 @@ get_unwrapped_array_length(nir_shader *nir, nir_variable *var)
if (nir_is_arrayed_io(var, nir->info.stage))
type = glsl_get_array_element(type);
if (var->data.per_view) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
assert(glsl_type_is_array(type));
return glsl_get_length(type);

View file

@ -128,6 +128,7 @@ lower_clip_plane_store_io(nir_builder *b, nir_intrinsic_instr *intr,
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_primitive_output:
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_per_view_output:
break;
default:
return false;

View file

@ -148,6 +148,14 @@ nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage)
if (var->data.patch || !glsl_type_is_array(var->type))
return false;
if (var->data.per_view) {
/* Nested arrayed outputs (both per-view and per-{vertex,primitive}) are
* unsupported. */
assert(stage == MESA_SHADER_VERTEX);
assert(var->data.mode == nir_var_shader_out);
return true;
}
if (stage == MESA_SHADER_MESH) {
/* NV_mesh_shader: this is flat array for the whole workgroup. */
if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES)
@ -352,8 +360,14 @@ emit_load(struct lower_io_state *state,
}
break;
case nir_var_shader_out:
op = !array_index ? nir_intrinsic_load_output : var->data.per_primitive ? nir_intrinsic_load_per_primitive_output
: nir_intrinsic_load_per_vertex_output;
if (!array_index)
op = nir_intrinsic_load_output;
else if (var->data.per_primitive)
op = nir_intrinsic_load_per_primitive_output;
else if (var->data.per_view)
op = nir_intrinsic_load_per_view_output;
else
op = nir_intrinsic_load_per_vertex_output;
break;
case nir_var_uniform:
op = nir_intrinsic_load_uniform;
@ -495,9 +509,15 @@ emit_store(struct lower_io_state *state, nir_def *data,
nir_builder *b = &state->builder;
assert(var->data.mode == nir_var_shader_out);
nir_intrinsic_op op =
!array_index ? nir_intrinsic_store_output : var->data.per_primitive ? nir_intrinsic_store_per_primitive_output
: nir_intrinsic_store_per_vertex_output;
nir_intrinsic_op op;
if (!array_index)
op = nir_intrinsic_store_output;
else if (var->data.per_view)
op = nir_intrinsic_store_per_view_output;
else if (var->data.per_primitive)
op = nir_intrinsic_store_per_primitive_output;
else
op = nir_intrinsic_store_per_vertex_output;
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(state->builder.shader, op);
@ -2806,6 +2826,7 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
case nir_intrinsic_load_input_vertex:
case nir_intrinsic_load_per_vertex_input:
case nir_intrinsic_load_per_vertex_output:
case nir_intrinsic_load_per_view_output:
case nir_intrinsic_load_per_primitive_output:
case nir_intrinsic_load_interpolated_input:
case nir_intrinsic_load_smem_amd:
@ -2823,6 +2844,7 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
return 1;
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_per_view_output:
case nir_intrinsic_store_per_primitive_output:
return 2;
default:
@ -2849,9 +2871,11 @@ nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr *instr)
switch (instr->intrinsic) {
case nir_intrinsic_load_per_vertex_input:
case nir_intrinsic_load_per_vertex_output:
case nir_intrinsic_load_per_view_output:
case nir_intrinsic_load_per_primitive_output:
return 0;
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_per_view_output:
case nir_intrinsic_store_per_primitive_output:
return 1;
default:
@ -2992,9 +3016,11 @@ is_output(nir_intrinsic_instr *intrin)
{
return intrin->intrinsic == nir_intrinsic_load_output ||
intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
intrin->intrinsic == nir_intrinsic_load_per_view_output ||
intrin->intrinsic == nir_intrinsic_load_per_primitive_output ||
intrin->intrinsic == nir_intrinsic_store_output ||
intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
intrin->intrinsic == nir_intrinsic_store_per_view_output ||
intrin->intrinsic == nir_intrinsic_store_per_primitive_output;
}
@ -3003,6 +3029,7 @@ is_dual_slot(nir_intrinsic_instr *intrin)
{
if (intrin->intrinsic == nir_intrinsic_store_output ||
intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
intrin->intrinsic == nir_intrinsic_store_per_view_output ||
intrin->intrinsic == nir_intrinsic_store_per_primitive_output) {
return nir_src_bit_size(intrin->src[0]) == 64 &&
nir_src_num_components(intrin->src[0]) >= 3;

View file

@ -290,6 +290,7 @@ nir_lower_io_to_scalar_instr(nir_builder *b, nir_instr *instr, void *data)
if ((intr->intrinsic == nir_intrinsic_load_output ||
intr->intrinsic == nir_intrinsic_load_per_vertex_output ||
intr->intrinsic == nir_intrinsic_load_per_view_output ||
intr->intrinsic == nir_intrinsic_load_per_primitive_output) &&
(state->mask & nir_var_shader_out) &&
(!state->filter || state->filter(instr, state->filter_data))) {
@ -308,6 +309,7 @@ nir_lower_io_to_scalar_instr(nir_builder *b, nir_instr *instr, void *data)
if ((intr->intrinsic == nir_intrinsic_store_output ||
intr->intrinsic == nir_intrinsic_store_per_vertex_output ||
intr->intrinsic == nir_intrinsic_store_per_view_output ||
intr->intrinsic == nir_intrinsic_store_per_primitive_output) &&
state->mask & nir_var_shader_out &&
(!state->filter || state->filter(instr, state->filter_data))) {

View file

@ -47,8 +47,10 @@ get_io_intrinsic(nir_instr *instr, nir_variable_mode modes,
return modes & nir_var_shader_in ? intr : NULL;
case nir_intrinsic_load_output:
case nir_intrinsic_load_per_vertex_output:
case nir_intrinsic_load_per_view_output:
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_per_view_output:
*out_mode = nir_var_shader_out;
return modes & nir_var_shader_out ? intr : NULL;
default:

View file

@ -45,7 +45,8 @@ lower_point_size_intrin(nir_builder *b, nir_intrinsic_instr *intr, void *data)
nir_variable *var = nir_deref_instr_get_variable(deref);
location = var->data.location;
psiz_src = &intr->src[1];
} else if (intr->intrinsic == nir_intrinsic_store_output) {
} else if (intr->intrinsic == nir_intrinsic_store_output ||
intr->intrinsic == nir_intrinsic_store_per_view_output) {
location = nir_intrinsic_io_semantics(intr).location;
psiz_src = &intr->src[0];
}

View file

@ -69,6 +69,7 @@ lower_point_size_mov(nir_builder *b, nir_intrinsic_instr *intr, void *data)
switch (intr->intrinsic) {
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_per_view_output:
case nir_intrinsic_store_per_primitive_output: {
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
if (sem.location != VARYING_SLOT_PSIZ)

View file

@ -64,6 +64,7 @@ value_src(nir_intrinsic_op intrinsic)
switch (intrinsic) {
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_per_view_output:
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_shared:
case nir_intrinsic_store_global:
@ -84,6 +85,7 @@ offset_src(nir_intrinsic_op intrinsic)
case nir_intrinsic_store_scratch:
return 1;
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_per_view_output:
case nir_intrinsic_store_ssbo:
return 2;
default:

View file

@ -85,7 +85,8 @@ move_vec_src_uses_to_dest_block(nir_block *block, bool skip_const_srcs)
nir_instr *use_instr = nir_src_parent_instr(src);
if (use_instr->type == nir_instr_type_intrinsic) {
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(use_instr);
if (intr->intrinsic == nir_intrinsic_store_output)
if (intr->intrinsic == nir_intrinsic_store_output ||
intr->intrinsic == nir_intrinsic_store_per_view_output)
return false;
}
}

View file

@ -241,6 +241,7 @@ node_is_dead(nir_cf_node *node)
case nir_intrinsic_load_shared2_amd:
case nir_intrinsic_load_output:
case nir_intrinsic_load_per_vertex_output:
case nir_intrinsic_load_per_view_output:
/* Same as above loads. */
return false;

View file

@ -63,6 +63,7 @@ opt_shrink_store_instr(nir_builder *b, nir_intrinsic_instr *instr, bool shrink_i
switch (instr->intrinsic) {
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_per_view_output:
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_shared:
case nir_intrinsic_store_global:

View file

@ -149,6 +149,7 @@ opt_undef_store(nir_intrinsic_instr *intrin)
break;
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_per_view_output:
case nir_intrinsic_store_per_primitive_output:
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_shared:

View file

@ -1495,14 +1495,17 @@ gather_outputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_
if (intr->intrinsic != nir_intrinsic_store_output &&
intr->intrinsic != nir_intrinsic_load_output &&
intr->intrinsic != nir_intrinsic_store_per_vertex_output &&
intr->intrinsic != nir_intrinsic_store_per_view_output &&
intr->intrinsic != nir_intrinsic_store_per_primitive_output &&
intr->intrinsic != nir_intrinsic_load_per_vertex_output &&
intr->intrinsic != nir_intrinsic_load_per_view_output &&
intr->intrinsic != nir_intrinsic_load_per_primitive_output)
return false;
bool is_store =
intr->intrinsic == nir_intrinsic_store_output ||
intr->intrinsic == nir_intrinsic_store_per_vertex_output ||
intr->intrinsic == nir_intrinsic_store_per_view_output ||
intr->intrinsic == nir_intrinsic_store_per_primitive_output;
if (is_store) {

View file

@ -520,9 +520,11 @@ nir_opt_vectorize_io(nir_shader *shader, nir_variable_mode modes)
case nir_intrinsic_load_output:
case nir_intrinsic_load_per_vertex_output:
case nir_intrinsic_load_per_view_output:
case nir_intrinsic_load_per_primitive_output:
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_per_view_output:
case nir_intrinsic_store_per_primitive_output:
if (!(modes & nir_var_shader_out))
continue;

View file

@ -1371,6 +1371,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_primitive_output:
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_per_view_output:
mode = nir_var_shader_out;
break;
@ -1423,7 +1424,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
state->shader->info.stage == MESA_SHADER_GEOMETRY &&
(instr->intrinsic == nir_intrinsic_store_output ||
instr->intrinsic == nir_intrinsic_store_per_primitive_output ||
instr->intrinsic == nir_intrinsic_store_per_vertex_output)) {
instr->intrinsic == nir_intrinsic_store_per_vertex_output ||
instr->intrinsic == nir_intrinsic_store_per_view_output)) {
unsigned gs_streams = io.gs_streams;
fprintf(fp, " gs_streams(");
for (unsigned i = 0; i < 4; i++) {
@ -1651,6 +1653,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
case nir_intrinsic_load_output:
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_per_view_output:
var_mode = nir_var_shader_out;
break;
default:

View file

@ -618,6 +618,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
case nir_intrinsic_load_interpolated_input:
case nir_intrinsic_load_output:
case nir_intrinsic_load_per_vertex_output:
case nir_intrinsic_load_per_view_output:
case nir_intrinsic_load_per_primitive_output:
case nir_intrinsic_load_push_constant:
/* All memory load operations must load at least a byte */
@ -652,6 +653,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_per_view_output:
if (state->shader->info.stage == MESA_SHADER_FRAGMENT)
validate_assert(state, nir_src_bit_size(instr->src[0]) >= 8);
else
@ -1527,10 +1529,6 @@ validate_var_decl(nir_variable *var, nir_variable_mode valid_modes,
const struct glsl_type *type = glsl_get_array_element(var->type);
if (nir_is_arrayed_io(var, state->shader->info.stage)) {
if (var->data.per_view) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
assert(glsl_type_is_array(type));
assert(glsl_type_is_scalar(glsl_get_array_element(type)));
} else {

View file

@ -118,6 +118,8 @@ static const nir_shader_compiler_options ir3_base_options = {
.divergence_analysis_options = nir_divergence_uniform_load_tears,
.scalarize_ddx = true,
.per_view_unique_driver_locations = true,
};
struct ir3_compiler *

View file

@ -2842,6 +2842,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
b = NULL;
break;
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_view_output:
setup_output(ctx, intr);
break;
case nir_intrinsic_load_base_vertex:
@ -5028,25 +5029,21 @@ setup_output(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_shader_variant *so = ctx->so;
nir_io_semantics io = nir_intrinsic_io_semantics(intr);
compile_assert(ctx, nir_src_is_const(intr->src[1]));
nir_src offset_src = *nir_get_io_offset_src(intr);
compile_assert(ctx, nir_src_is_const(offset_src));
unsigned offset = nir_src_as_uint(intr->src[1]);
unsigned n = nir_intrinsic_base(intr) + offset;
unsigned offset = nir_src_as_uint(offset_src);
unsigned frac = nir_intrinsic_component(intr);
unsigned ncomp = nir_intrinsic_src_components(intr, 0);
unsigned slot = io.location + offset;
/* For per-view variables, each user-facing slot corresponds to multiple
* views, each with a corresponding driver_location, and the offset is for
* the driver_location. To properly figure out of the slot, we'd need to
* plumb through the number of views. However, for now we only use
* per-view with gl_Position, so we assume that the variable is not an
* array or matrix (so there are no indirect accesses to the variable
* itself) and the indirect offset corresponds to the view.
*/
unsigned slot = io.location + (io.per_view ? 0 : offset);
if (io.per_view && offset > 0)
so->multi_pos_output = true;
* views, each with a corresponding driver_location, and the view index
* offsets the driver_location. */
unsigned view_index = intr->intrinsic == nir_intrinsic_store_per_view_output
? nir_src_as_uint(intr->src[1])
: 0;
unsigned n = nir_intrinsic_base(intr) + offset + view_index;
if (ctx->so->type == MESA_SHADER_FRAGMENT) {
switch (slot) {
@ -5124,8 +5121,9 @@ setup_output(struct ir3_context *ctx, nir_intrinsic_instr *intr)
compile_assert(ctx, so->outputs_count <= ARRAY_SIZE(so->outputs));
so->outputs[n].slot = slot;
if (io.per_view)
so->outputs[n].view = offset;
if (view_index > 0)
so->multi_pos_output = true;
so->outputs[n].view = view_index;
for (int i = 0; i < ncomp; i++) {
unsigned idx = (n * 4) + i + frac;

View file

@ -926,7 +926,8 @@ output_slot_used_for_binning(gl_varying_slot slot)
static bool
remove_nonbinning_output(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
if (intr->intrinsic != nir_intrinsic_store_output)
if (intr->intrinsic != nir_intrinsic_store_output &&
intr->intrinsic != nir_intrinsic_store_per_view_output)
return false;
nir_io_semantics io = nir_intrinsic_io_semantics(intr);

View file

@ -145,6 +145,7 @@ is_intrinsic_store(nir_intrinsic_op op)
{
switch (op) {
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_view_output:
case nir_intrinsic_store_scratch:
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_shared:

View file

@ -62,6 +62,7 @@ lower_64b_intrinsics(nir_builder *b, nir_instr *instr, void *unused)
switch (intr->intrinsic) {
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_global_ir3:
case nir_intrinsic_store_per_view_output:
offset_src_idx = 2;
break;
default:
@ -123,6 +124,7 @@ lower_64b_intrinsics(nir_builder *b, nir_instr *instr, void *unused)
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_global_ir3:
case nir_intrinsic_load_per_view_output:
offset_src_idx = 1;
break;
default:

View file

@ -81,6 +81,7 @@ const struct nir_shader_compiler_options brw_scalar_nir_options = {
.scalarize_ddx = true,
.support_indirect_inputs = (uint8_t)BITFIELD_MASK(PIPE_SHADER_TYPES),
.support_indirect_outputs = (uint8_t)BITFIELD_MASK(PIPE_SHADER_TYPES),
.per_view_unique_driver_locations = true,
};
struct brw_compiler *

View file

@ -285,8 +285,10 @@ is_output(nir_intrinsic_instr *intrin)
{
return intrin->intrinsic == nir_intrinsic_load_output ||
intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
intrin->intrinsic == nir_intrinsic_load_per_view_output ||
intrin->intrinsic == nir_intrinsic_store_output ||
intrin->intrinsic == nir_intrinsic_store_per_vertex_output;
intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
intrin->intrinsic == nir_intrinsic_store_per_view_output;
}
@ -342,6 +344,56 @@ remap_patch_urb_offsets(nir_block *block, nir_builder *b,
return true;
}
/* Replace store_per_view_output to plain store_output, mapping the view index
* to IO offset. Because we only use per-view outputs for position, the offset
* pitch is always 1. */
static bool
lower_per_view_outputs(nir_builder *b,
nir_intrinsic_instr *intrin,
UNUSED void *cb_data)
{
if (intrin->intrinsic != nir_intrinsic_store_per_view_output &&
intrin->intrinsic != nir_intrinsic_load_per_view_output)
return false;
b->cursor = nir_before_instr(&intrin->instr);
nir_src *view_index = nir_get_io_arrayed_index_src(intrin);
nir_src *offset = nir_get_io_offset_src(intrin);
nir_def *new_offset = nir_iadd(b, view_index->ssa, offset->ssa);
nir_intrinsic_instr *new;
if (intrin->intrinsic == nir_intrinsic_store_per_view_output)
new = nir_store_output(b, intrin->src[0].ssa, new_offset);
else {
nir_def *new_def = nir_load_output(b, intrin->def.num_components,
intrin->def.bit_size, new_offset);
new = nir_instr_as_intrinsic(new_def->parent_instr);
}
nir_intrinsic_set_base(new, nir_intrinsic_base(intrin));
nir_intrinsic_set_range(new, nir_intrinsic_range(intrin));
nir_intrinsic_set_write_mask(new, nir_intrinsic_write_mask(intrin));
nir_intrinsic_set_component(new, nir_intrinsic_component(intrin));
nir_intrinsic_set_src_type(new, nir_intrinsic_src_type(intrin));
nir_intrinsic_set_io_semantics(new, nir_intrinsic_io_semantics(intrin));
if (intrin->intrinsic == nir_intrinsic_load_per_view_output)
nir_def_rewrite_uses(&intrin->def, &new->def);
nir_instr_remove(&intrin->instr);
return true;
}
static bool
brw_nir_lower_per_view_outputs(nir_shader *nir)
{
return nir_shader_intrinsics_pass(nir, lower_per_view_outputs,
nir_metadata_control_flow,
NULL);
}
void
brw_nir_lower_vs_inputs(nir_shader *nir)
{
@ -640,6 +692,7 @@ brw_nir_lower_vue_outputs(nir_shader *nir)
nir_lower_io(nir, nir_var_shader_out, type_size_vec4,
nir_lower_io_lower_64bit_to_32);
brw_nir_lower_per_view_outputs(nir);
}
void
@ -1220,7 +1273,7 @@ brw_mesh_compact_io(nir_shader *mesh, nir_shader *frag)
assert(location < ARRAY_SIZE(mapping));
const struct glsl_type *type = var->type;
if (nir_is_arrayed_io(var, MESA_SHADER_MESH) || var->data.per_view) {
if (nir_is_arrayed_io(var, MESA_SHADER_MESH)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
@ -2253,7 +2306,7 @@ brw_nir_get_var_type(const struct nir_shader *nir, nir_variable *var)
const struct glsl_type *type = var->interface_type;
if (!type) {
type = var->type;
if (nir_is_arrayed_io(var, nir->info.stage) || var->data.per_view) {
if (nir_is_arrayed_io(var, nir->info.stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}

View file

@ -1899,7 +1899,7 @@ elk_nir_get_var_type(const struct nir_shader *nir, nir_variable *var)
const struct glsl_type *type = var->interface_type;
if (!type) {
type = var->type;
if (nir_is_arrayed_io(var, nir->info.stage) || var->data.per_view) {
if (nir_is_arrayed_io(var, nir->info.stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}

View file

@ -114,7 +114,7 @@ anv_mesh_convert_attrs_prim_to_vert(struct nir_shader *nir,
location >= VARYING_SLOT_VAR0);
const struct glsl_type *type = var->type;
if (nir_is_arrayed_io(var, MESA_SHADER_MESH) || var->data.per_view) {
if (nir_is_arrayed_io(var, MESA_SHADER_MESH)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}

View file

@ -60,6 +60,12 @@ bi_lower_divergent_indirects_impl(nir_builder *b, nir_intrinsic_instr *intr,
offset = nir_get_io_offset_src(intr);
break;
case nir_intrinsic_store_per_view_output:
assert(stage == MESA_SHADER_VERTEX);
assert(!nir_src_is_divergent(&intr->src[1]));
offset = nir_get_io_offset_src(intr);
break;
case nir_intrinsic_image_texel_address:
case nir_intrinsic_image_load:
case nir_intrinsic_image_store: