From becb014d2747ecf455a6a3ac0c38be236e2da53a Mon Sep 17 00:00:00 2001 From: Benjamin Lee Date: Sat, 19 Oct 2024 21:03:13 -0700 Subject: [PATCH] nir: treat per-view outputs as arrayed IO This is needed for implementing multiview in panvk, where the address calculation for multiview outputs is not well-represented by lowering to nir_intrinsic_store_output with a single offset. The case where a variable is both per-view and per-{vertex,primitive} is now unsupported. This would come up with drivers implementing NV_mesh_shader or using nir_lower_multiview on geometry, tessellation, or mesh shaders. No drivers currently do either of these. There was some code that attempted to handle the nested per-view case by unwrapping per-view/arrayed types twice, but it's unclear to what extent this actually worked. ANV and Turnip both rely on per-view outputs being assigned a unique driver location for each view, so I've added on option to configure that behavior rather than removing it. Signed-off-by: Benjamin Lee Reviewed-by: Boris Brezillon Reviewed-by: Lionel Landwerlin Reviewed-by: Connor Abbott Part-of: --- src/compiler/glsl/gl_nir_link_varyings.c | 10 ++-- .../glsl/gl_nir_lower_packed_varyings.c | 2 +- src/compiler/nir/nir.h | 9 +++ src/compiler/nir/nir_divergence_analysis.c | 6 ++ src/compiler/nir/nir_gather_info.c | 7 +-- src/compiler/nir/nir_intrinsics.py | 7 +++ src/compiler/nir/nir_linking_helpers.c | 32 +++++----- .../nir/nir_lower_clamp_color_outputs.c | 1 + src/compiler/nir/nir_lower_clip.c | 1 + .../nir/nir_lower_clip_cull_distance_arrays.c | 5 -- src/compiler/nir/nir_lower_clip_disable.c | 1 + src/compiler/nir/nir_lower_io.c | 37 ++++++++++-- src/compiler/nir/nir_lower_io_to_scalar.c | 2 + src/compiler/nir/nir_lower_mediump.c | 2 + src/compiler/nir/nir_lower_point_size.c | 3 +- src/compiler/nir/nir_lower_point_size_mov.c | 1 + src/compiler/nir/nir_lower_wrmasks.c | 2 + .../nir/nir_move_vec_src_uses_to_dest.c | 3 +- src/compiler/nir/nir_opt_dead_cf.c | 1 + src/compiler/nir/nir_opt_shrink_stores.c | 1 + src/compiler/nir/nir_opt_undef.c | 1 + src/compiler/nir/nir_opt_varyings.c | 3 + src/compiler/nir/nir_opt_vectorize_io.c | 2 + src/compiler/nir/nir_print.c | 5 +- src/compiler/nir/nir_validate.c | 6 +- src/freedreno/ir3/ir3_compiler.c | 2 + src/freedreno/ir3/ir3_compiler_nir.c | 30 +++++----- src/freedreno/ir3/ir3_nir.c | 3 +- src/freedreno/ir3/ir3_nir.h | 1 + src/freedreno/ir3/ir3_nir_lower_64b.c | 2 + src/intel/compiler/brw_compiler.c | 1 + src/intel/compiler/brw_nir.c | 59 ++++++++++++++++++- src/intel/compiler/elk/elk_nir.c | 2 +- src/intel/vulkan/anv_mesh_perprim_wa.c | 2 +- .../compiler/bi_lower_divergent_indirects.c | 6 ++ 35 files changed, 191 insertions(+), 67 deletions(-) diff --git a/src/compiler/glsl/gl_nir_link_varyings.c b/src/compiler/glsl/gl_nir_link_varyings.c index 36e370a72ab..42d70e96d43 100644 --- a/src/compiler/glsl/gl_nir_link_varyings.c +++ b/src/compiler/glsl/gl_nir_link_varyings.c @@ -77,7 +77,7 @@ static const struct glsl_type * get_varying_type(const nir_variable *var, gl_shader_stage stage) { const struct glsl_type *type = var->type; - if (nir_is_arrayed_io(var, stage) || var->data.per_view) { + if (nir_is_arrayed_io(var, stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } @@ -3387,7 +3387,7 @@ set_variable_io_mask(BITSET_WORD *bits, nir_variable *var, gl_shader_stage stage assert(var->data.location >= VARYING_SLOT_VAR0); const struct glsl_type *type = var->type; - if (nir_is_arrayed_io(var, stage) || var->data.per_view) { + if (nir_is_arrayed_io(var, stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } @@ -3536,7 +3536,7 @@ remove_unused_io_vars(nir_shader *producer, nir_shader *consumer, unsigned location = var->data.location - VARYING_SLOT_VAR0; const struct glsl_type *type = var->type; - if (nir_is_arrayed_io(var, shader->info.stage) || var->data.per_view) { + if (nir_is_arrayed_io(var, shader->info.stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } @@ -3606,7 +3606,7 @@ remove_unused_varyings(nir_shader *producer, nir_shader *consumer, continue; const struct glsl_type *type = var->type; - if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) { + if (nir_is_arrayed_io(var, producer->info.stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } @@ -3622,7 +3622,7 @@ remove_unused_varyings(nir_shader *producer, nir_shader *consumer, continue; const struct glsl_type *type = var->type; - if (nir_is_arrayed_io(var, consumer->info.stage) || var->data.per_view) { + if (nir_is_arrayed_io(var, consumer->info.stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } diff --git a/src/compiler/glsl/gl_nir_lower_packed_varyings.c b/src/compiler/glsl/gl_nir_lower_packed_varyings.c index 1bec7ee59ca..928e92e10d1 100644 --- a/src/compiler/glsl/gl_nir_lower_packed_varyings.c +++ b/src/compiler/glsl/gl_nir_lower_packed_varyings.c @@ -222,7 +222,7 @@ lower_packed_varying_needs_lowering(nir_shader *shader, nir_variable *var, return false; const struct glsl_type *type = var->type; - if (nir_is_arrayed_io(var, shader->info.stage) || var->data.per_view) { + if (nir_is_arrayed_io(var, shader->info.stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 89a96122230..5e9e5ef069d 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4377,6 +4377,15 @@ typedef struct nir_shader_compiler_options { /** Whether derivative intrinsics must be scalarized. */ bool scalarize_ddx; + /** + * Assign a range of driver locations to per-view outputs, with unique + * slots for each view. If unset, per-view outputs will be treated + * similarly to other arrayed IO, and only slots for one view will be + * assigned. Regardless of this setting, per-view outputs are only assigned + * slots for one value in var->data.location. + */ + bool per_view_unique_driver_locations; + /** Options determining lowering and behavior of inputs and outputs. */ nir_io_options io_options; diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 2e556007174..fe8f266ad58 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -418,6 +418,12 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) unreachable("Invalid stage for load_output"); } break; + case nir_intrinsic_load_per_view_output: + is_divergent = instr->src[0].ssa->divergent || + instr->src[1].ssa->divergent || + (stage == MESA_SHADER_TESS_CTRL && + !(options & nir_divergence_single_patch_per_tcs_subgroup)); + break; case nir_intrinsic_load_per_vertex_output: /* TCS and NV_mesh_shader only (EXT_mesh_shader does not allow loading outputs). */ assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH); diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index a8621ab4a3f..b6ecba36ecb 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -221,11 +221,6 @@ mark_whole_variable(nir_shader *shader, nir_variable *var, type = glsl_get_array_element(type); } - if (var->data.per_view) { - assert(glsl_type_is_array(type)); - type = glsl_get_array_element(type); - } - const unsigned slots = nir_variable_count_slots(var, type); set_io_mask(shader, var, 0, slots, deref, is_output_read); } @@ -578,6 +573,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader, case nir_intrinsic_load_output: case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_load_per_view_output: case nir_intrinsic_load_per_primitive_output: if (shader->info.stage == MESA_SHADER_TESS_CTRL && instr->intrinsic == nir_intrinsic_load_output && @@ -613,6 +609,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader, case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_store_per_view_output: case nir_intrinsic_store_per_primitive_output: if (shader->info.stage == MESA_SHADER_TESS_CTRL && instr->intrinsic == nir_intrinsic_store_output && diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index d664753095c..0253c953d09 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1182,6 +1182,11 @@ load("ssbo_address", [1, 1], [], [CAN_ELIMINATE, CAN_REORDER]) load("output", [1], [BASE, RANGE, COMPONENT, DEST_TYPE, IO_SEMANTICS], flags=[CAN_ELIMINATE]) # src[] = { vertex, offset }. load("per_vertex_output", [1, 1], [BASE, RANGE, COMPONENT, DEST_TYPE, IO_SEMANTICS], [CAN_ELIMINATE]) +# src[] = { view_index, offset }. +# view_index is "compacted", meaning it is the index of the Nth *enabled* view, +# not the Nth absolute view. See the nir_lower_multiview docs for a more +# detailed explanation. +load("per_view_output", [1, 1], [BASE, RANGE, COMPONENT, DEST_TYPE, IO_SEMANTICS], [CAN_ELIMINATE]) # src[] = { primitive, offset }. load("per_primitive_output", [1, 1], [BASE, COMPONENT, DEST_TYPE, IO_SEMANTICS], [CAN_ELIMINATE]) # src[] = { offset }. @@ -1223,6 +1228,8 @@ def store(name, srcs, indices=[], flags=[]): store("output", [1], [BASE, RANGE, WRITE_MASK, COMPONENT, SRC_TYPE, IO_SEMANTICS, IO_XFB, IO_XFB2]) # src[] = { value, vertex, offset }. store("per_vertex_output", [1, 1], [BASE, RANGE, WRITE_MASK, COMPONENT, SRC_TYPE, IO_SEMANTICS]) +# src[] = { value, view_index, offset }. +store("per_view_output", [1, 1], [BASE, RANGE, WRITE_MASK, COMPONENT, SRC_TYPE, IO_SEMANTICS]) # src[] = { value, primitive, offset }. store("per_primitive_output", [1, 1], [BASE, RANGE, WRITE_MASK, COMPONENT, SRC_TYPE, IO_SEMANTICS]) # src[] = { value, block_index, offset } diff --git a/src/compiler/nir/nir_linking_helpers.c b/src/compiler/nir/nir_linking_helpers.c index 4b84369b1a0..b9fdea68eca 100644 --- a/src/compiler/nir/nir_linking_helpers.c +++ b/src/compiler/nir/nir_linking_helpers.c @@ -49,7 +49,7 @@ get_variable_io_mask(nir_variable *var, gl_shader_stage stage) assert(location < 64); const struct glsl_type *type = var->type; - if (nir_is_arrayed_io(var, stage) || var->data.per_view) { + if (nir_is_arrayed_io(var, stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } @@ -337,7 +337,7 @@ get_unmoveable_components_masks(nir_shader *shader, var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { const struct glsl_type *type = var->type; - if (nir_is_arrayed_io(var, stage) || var->data.per_view) { + if (nir_is_arrayed_io(var, stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } @@ -438,7 +438,7 @@ remap_slots_and_components(nir_shader *shader, nir_variable_mode mode, var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { const struct glsl_type *type = var->type; - if (nir_is_arrayed_io(var, stage) || var->data.per_view) { + if (nir_is_arrayed_io(var, stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } @@ -578,7 +578,7 @@ gather_varying_component_info(nir_shader *producer, nir_shader *consumer, continue; const struct glsl_type *type = var->type; - if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) { + if (nir_is_arrayed_io(var, producer->info.stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } @@ -641,8 +641,7 @@ gather_varying_component_info(nir_shader *producer, nir_shader *consumer, if (!vc_info->initialised) { const struct glsl_type *type = in_var->type; - if (nir_is_arrayed_io(in_var, consumer->info.stage) || - in_var->data.per_view) { + if (nir_is_arrayed_io(in_var, consumer->info.stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } @@ -1539,18 +1538,17 @@ nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode, last_partial = false; } - /* per-view variables have an extra array dimension, which is ignored - * when counting user-facing slots (var->data.location), but *not* - * with driver slots (var->data.driver_location). That is, each user - * slot maps to multiple driver slots. - */ - driver_size = glsl_count_attribute_slots(type, false); - if (var->data.per_view) { - assert(glsl_type_is_array(type)); - var_size = - glsl_count_attribute_slots(glsl_get_array_element(type), false); + var_size = glsl_count_attribute_slots(type, false); + if (var->data.per_view && + shader->options->per_view_unique_driver_locations) { + /* per-view variables have an extra array dimension, which is + * ignored when counting user-facing slots (var->data.location), + * but *not* with driver slots (var->data.driver_location). That + * is, each user slot maps to multiple driver slots. */ + const struct glsl_type *array_type = var->type; + driver_size = glsl_count_attribute_slots(array_type, false); } else { - var_size = driver_size; + driver_size = var_size; } } diff --git a/src/compiler/nir/nir_lower_clamp_color_outputs.c b/src/compiler/nir/nir_lower_clamp_color_outputs.c index 17afa82716d..a0a7db9d57b 100644 --- a/src/compiler/nir/nir_lower_clamp_color_outputs.c +++ b/src/compiler/nir/nir_lower_clamp_color_outputs.c @@ -64,6 +64,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, nir_shader *shader) loc = out->data.location; break; case nir_intrinsic_store_output: + case nir_intrinsic_store_per_view_output: loc = nir_intrinsic_io_semantics(intr).location; break; default: diff --git a/src/compiler/nir/nir_lower_clip.c b/src/compiler/nir/nir_lower_clip.c index f7823a69bb7..e105929953d 100644 --- a/src/compiler/nir/nir_lower_clip.c +++ b/src/compiler/nir/nir_lower_clip.c @@ -165,6 +165,7 @@ find_output(nir_builder *b, unsigned location) if ((intr->intrinsic == nir_intrinsic_store_output || intr->intrinsic == nir_intrinsic_store_per_vertex_output || + intr->intrinsic == nir_intrinsic_store_per_view_output || intr->intrinsic == nir_intrinsic_store_per_primitive_output) && nir_intrinsic_io_semantics(intr).location == location) { assert(nir_src_is_const(*nir_get_io_offset_src(intr))); diff --git a/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c b/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c index fd13145e934..1a12c2903c0 100644 --- a/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c +++ b/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c @@ -105,11 +105,6 @@ get_unwrapped_array_length(nir_shader *nir, nir_variable *var) if (nir_is_arrayed_io(var, nir->info.stage)) type = glsl_get_array_element(type); - if (var->data.per_view) { - assert(glsl_type_is_array(type)); - type = glsl_get_array_element(type); - } - assert(glsl_type_is_array(type)); return glsl_get_length(type); diff --git a/src/compiler/nir/nir_lower_clip_disable.c b/src/compiler/nir/nir_lower_clip_disable.c index beeb00400a4..bf705cc9f04 100644 --- a/src/compiler/nir/nir_lower_clip_disable.c +++ b/src/compiler/nir/nir_lower_clip_disable.c @@ -128,6 +128,7 @@ lower_clip_plane_store_io(nir_builder *b, nir_intrinsic_instr *intr, case nir_intrinsic_store_output: case nir_intrinsic_store_per_primitive_output: case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_store_per_view_output: break; default: return false; diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 1313a15855d..0c5732d5f69 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -148,6 +148,14 @@ nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage) if (var->data.patch || !glsl_type_is_array(var->type)) return false; + if (var->data.per_view) { + /* Nested arrayed outputs (both per-view and per-{vertex,primitive}) are + * unsupported. */ + assert(stage == MESA_SHADER_VERTEX); + assert(var->data.mode == nir_var_shader_out); + return true; + } + if (stage == MESA_SHADER_MESH) { /* NV_mesh_shader: this is flat array for the whole workgroup. */ if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES) @@ -352,8 +360,14 @@ emit_load(struct lower_io_state *state, } break; case nir_var_shader_out: - op = !array_index ? nir_intrinsic_load_output : var->data.per_primitive ? nir_intrinsic_load_per_primitive_output - : nir_intrinsic_load_per_vertex_output; + if (!array_index) + op = nir_intrinsic_load_output; + else if (var->data.per_primitive) + op = nir_intrinsic_load_per_primitive_output; + else if (var->data.per_view) + op = nir_intrinsic_load_per_view_output; + else + op = nir_intrinsic_load_per_vertex_output; break; case nir_var_uniform: op = nir_intrinsic_load_uniform; @@ -495,9 +509,15 @@ emit_store(struct lower_io_state *state, nir_def *data, nir_builder *b = &state->builder; assert(var->data.mode == nir_var_shader_out); - nir_intrinsic_op op = - !array_index ? nir_intrinsic_store_output : var->data.per_primitive ? nir_intrinsic_store_per_primitive_output - : nir_intrinsic_store_per_vertex_output; + nir_intrinsic_op op; + if (!array_index) + op = nir_intrinsic_store_output; + else if (var->data.per_view) + op = nir_intrinsic_store_per_view_output; + else if (var->data.per_primitive) + op = nir_intrinsic_store_per_primitive_output; + else + op = nir_intrinsic_store_per_vertex_output; nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->builder.shader, op); @@ -2806,6 +2826,7 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr) case nir_intrinsic_load_input_vertex: case nir_intrinsic_load_per_vertex_input: case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_load_per_view_output: case nir_intrinsic_load_per_primitive_output: case nir_intrinsic_load_interpolated_input: case nir_intrinsic_load_smem_amd: @@ -2823,6 +2844,7 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr) return 1; case nir_intrinsic_store_ssbo: case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_store_per_view_output: case nir_intrinsic_store_per_primitive_output: return 2; default: @@ -2849,9 +2871,11 @@ nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_load_per_vertex_input: case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_load_per_view_output: case nir_intrinsic_load_per_primitive_output: return 0; case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_store_per_view_output: case nir_intrinsic_store_per_primitive_output: return 1; default: @@ -2992,9 +3016,11 @@ is_output(nir_intrinsic_instr *intrin) { return intrin->intrinsic == nir_intrinsic_load_output || intrin->intrinsic == nir_intrinsic_load_per_vertex_output || + intrin->intrinsic == nir_intrinsic_load_per_view_output || intrin->intrinsic == nir_intrinsic_load_per_primitive_output || intrin->intrinsic == nir_intrinsic_store_output || intrin->intrinsic == nir_intrinsic_store_per_vertex_output || + intrin->intrinsic == nir_intrinsic_store_per_view_output || intrin->intrinsic == nir_intrinsic_store_per_primitive_output; } @@ -3003,6 +3029,7 @@ is_dual_slot(nir_intrinsic_instr *intrin) { if (intrin->intrinsic == nir_intrinsic_store_output || intrin->intrinsic == nir_intrinsic_store_per_vertex_output || + intrin->intrinsic == nir_intrinsic_store_per_view_output || intrin->intrinsic == nir_intrinsic_store_per_primitive_output) { return nir_src_bit_size(intrin->src[0]) == 64 && nir_src_num_components(intrin->src[0]) >= 3; diff --git a/src/compiler/nir/nir_lower_io_to_scalar.c b/src/compiler/nir/nir_lower_io_to_scalar.c index fe28722bd50..d28d4667372 100644 --- a/src/compiler/nir/nir_lower_io_to_scalar.c +++ b/src/compiler/nir/nir_lower_io_to_scalar.c @@ -290,6 +290,7 @@ nir_lower_io_to_scalar_instr(nir_builder *b, nir_instr *instr, void *data) if ((intr->intrinsic == nir_intrinsic_load_output || intr->intrinsic == nir_intrinsic_load_per_vertex_output || + intr->intrinsic == nir_intrinsic_load_per_view_output || intr->intrinsic == nir_intrinsic_load_per_primitive_output) && (state->mask & nir_var_shader_out) && (!state->filter || state->filter(instr, state->filter_data))) { @@ -308,6 +309,7 @@ nir_lower_io_to_scalar_instr(nir_builder *b, nir_instr *instr, void *data) if ((intr->intrinsic == nir_intrinsic_store_output || intr->intrinsic == nir_intrinsic_store_per_vertex_output || + intr->intrinsic == nir_intrinsic_store_per_view_output || intr->intrinsic == nir_intrinsic_store_per_primitive_output) && state->mask & nir_var_shader_out && (!state->filter || state->filter(instr, state->filter_data))) { diff --git a/src/compiler/nir/nir_lower_mediump.c b/src/compiler/nir/nir_lower_mediump.c index 176e2d1e1b0..2114007e719 100644 --- a/src/compiler/nir/nir_lower_mediump.c +++ b/src/compiler/nir/nir_lower_mediump.c @@ -47,8 +47,10 @@ get_io_intrinsic(nir_instr *instr, nir_variable_mode modes, return modes & nir_var_shader_in ? intr : NULL; case nir_intrinsic_load_output: case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_load_per_view_output: case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_store_per_view_output: *out_mode = nir_var_shader_out; return modes & nir_var_shader_out ? intr : NULL; default: diff --git a/src/compiler/nir/nir_lower_point_size.c b/src/compiler/nir/nir_lower_point_size.c index f91df81d130..b6fdc17f1e4 100644 --- a/src/compiler/nir/nir_lower_point_size.c +++ b/src/compiler/nir/nir_lower_point_size.c @@ -45,7 +45,8 @@ lower_point_size_intrin(nir_builder *b, nir_intrinsic_instr *intr, void *data) nir_variable *var = nir_deref_instr_get_variable(deref); location = var->data.location; psiz_src = &intr->src[1]; - } else if (intr->intrinsic == nir_intrinsic_store_output) { + } else if (intr->intrinsic == nir_intrinsic_store_output || + intr->intrinsic == nir_intrinsic_store_per_view_output) { location = nir_intrinsic_io_semantics(intr).location; psiz_src = &intr->src[0]; } diff --git a/src/compiler/nir/nir_lower_point_size_mov.c b/src/compiler/nir/nir_lower_point_size_mov.c index a951996396b..f199329c413 100644 --- a/src/compiler/nir/nir_lower_point_size_mov.c +++ b/src/compiler/nir/nir_lower_point_size_mov.c @@ -69,6 +69,7 @@ lower_point_size_mov(nir_builder *b, nir_intrinsic_instr *intr, void *data) switch (intr->intrinsic) { case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_store_per_view_output: case nir_intrinsic_store_per_primitive_output: { nir_io_semantics sem = nir_intrinsic_io_semantics(intr); if (sem.location != VARYING_SLOT_PSIZ) diff --git a/src/compiler/nir/nir_lower_wrmasks.c b/src/compiler/nir/nir_lower_wrmasks.c index a0ff9df1026..605611c6e33 100644 --- a/src/compiler/nir/nir_lower_wrmasks.c +++ b/src/compiler/nir/nir_lower_wrmasks.c @@ -64,6 +64,7 @@ value_src(nir_intrinsic_op intrinsic) switch (intrinsic) { case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_store_per_view_output: case nir_intrinsic_store_ssbo: case nir_intrinsic_store_shared: case nir_intrinsic_store_global: @@ -84,6 +85,7 @@ offset_src(nir_intrinsic_op intrinsic) case nir_intrinsic_store_scratch: return 1; case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_store_per_view_output: case nir_intrinsic_store_ssbo: return 2; default: diff --git a/src/compiler/nir/nir_move_vec_src_uses_to_dest.c b/src/compiler/nir/nir_move_vec_src_uses_to_dest.c index fc62361cade..c85aa19fc2a 100644 --- a/src/compiler/nir/nir_move_vec_src_uses_to_dest.c +++ b/src/compiler/nir/nir_move_vec_src_uses_to_dest.c @@ -85,7 +85,8 @@ move_vec_src_uses_to_dest_block(nir_block *block, bool skip_const_srcs) nir_instr *use_instr = nir_src_parent_instr(src); if (use_instr->type == nir_instr_type_intrinsic) { nir_intrinsic_instr *intr = nir_instr_as_intrinsic(use_instr); - if (intr->intrinsic == nir_intrinsic_store_output) + if (intr->intrinsic == nir_intrinsic_store_output || + intr->intrinsic == nir_intrinsic_store_per_view_output) return false; } } diff --git a/src/compiler/nir/nir_opt_dead_cf.c b/src/compiler/nir/nir_opt_dead_cf.c index cb260c7ce56..592491fec05 100644 --- a/src/compiler/nir/nir_opt_dead_cf.c +++ b/src/compiler/nir/nir_opt_dead_cf.c @@ -241,6 +241,7 @@ node_is_dead(nir_cf_node *node) case nir_intrinsic_load_shared2_amd: case nir_intrinsic_load_output: case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_load_per_view_output: /* Same as above loads. */ return false; diff --git a/src/compiler/nir/nir_opt_shrink_stores.c b/src/compiler/nir/nir_opt_shrink_stores.c index a39e60a2015..027834e1f92 100644 --- a/src/compiler/nir/nir_opt_shrink_stores.c +++ b/src/compiler/nir/nir_opt_shrink_stores.c @@ -63,6 +63,7 @@ opt_shrink_store_instr(nir_builder *b, nir_intrinsic_instr *instr, bool shrink_i switch (instr->intrinsic) { case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_store_per_view_output: case nir_intrinsic_store_ssbo: case nir_intrinsic_store_shared: case nir_intrinsic_store_global: diff --git a/src/compiler/nir/nir_opt_undef.c b/src/compiler/nir/nir_opt_undef.c index 613a80ec685..4114def55cc 100644 --- a/src/compiler/nir/nir_opt_undef.c +++ b/src/compiler/nir/nir_opt_undef.c @@ -149,6 +149,7 @@ opt_undef_store(nir_intrinsic_instr *intrin) break; case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_store_per_view_output: case nir_intrinsic_store_per_primitive_output: case nir_intrinsic_store_ssbo: case nir_intrinsic_store_shared: diff --git a/src/compiler/nir/nir_opt_varyings.c b/src/compiler/nir/nir_opt_varyings.c index 243178b7de9..74f69b8073d 100644 --- a/src/compiler/nir/nir_opt_varyings.c +++ b/src/compiler/nir/nir_opt_varyings.c @@ -1495,14 +1495,17 @@ gather_outputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_ if (intr->intrinsic != nir_intrinsic_store_output && intr->intrinsic != nir_intrinsic_load_output && intr->intrinsic != nir_intrinsic_store_per_vertex_output && + intr->intrinsic != nir_intrinsic_store_per_view_output && intr->intrinsic != nir_intrinsic_store_per_primitive_output && intr->intrinsic != nir_intrinsic_load_per_vertex_output && + intr->intrinsic != nir_intrinsic_load_per_view_output && intr->intrinsic != nir_intrinsic_load_per_primitive_output) return false; bool is_store = intr->intrinsic == nir_intrinsic_store_output || intr->intrinsic == nir_intrinsic_store_per_vertex_output || + intr->intrinsic == nir_intrinsic_store_per_view_output || intr->intrinsic == nir_intrinsic_store_per_primitive_output; if (is_store) { diff --git a/src/compiler/nir/nir_opt_vectorize_io.c b/src/compiler/nir/nir_opt_vectorize_io.c index 888ca409155..04cd236f840 100644 --- a/src/compiler/nir/nir_opt_vectorize_io.c +++ b/src/compiler/nir/nir_opt_vectorize_io.c @@ -520,9 +520,11 @@ nir_opt_vectorize_io(nir_shader *shader, nir_variable_mode modes) case nir_intrinsic_load_output: case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_load_per_view_output: case nir_intrinsic_load_per_primitive_output: case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_store_per_view_output: case nir_intrinsic_store_per_primitive_output: if (!(modes & nir_var_shader_out)) continue; diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 8b4c2c25fef..6eb04eff4f0 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -1371,6 +1371,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) case nir_intrinsic_store_output: case nir_intrinsic_store_per_primitive_output: case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_store_per_view_output: mode = nir_var_shader_out; break; @@ -1423,7 +1424,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) state->shader->info.stage == MESA_SHADER_GEOMETRY && (instr->intrinsic == nir_intrinsic_store_output || instr->intrinsic == nir_intrinsic_store_per_primitive_output || - instr->intrinsic == nir_intrinsic_store_per_vertex_output)) { + instr->intrinsic == nir_intrinsic_store_per_vertex_output || + instr->intrinsic == nir_intrinsic_store_per_view_output)) { unsigned gs_streams = io.gs_streams; fprintf(fp, " gs_streams("); for (unsigned i = 0; i < 4; i++) { @@ -1651,6 +1653,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) case nir_intrinsic_load_output: case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_store_per_view_output: var_mode = nir_var_shader_out; break; default: diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index 815dd634df0..99befd17e47 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -618,6 +618,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) case nir_intrinsic_load_interpolated_input: case nir_intrinsic_load_output: case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_load_per_view_output: case nir_intrinsic_load_per_primitive_output: case nir_intrinsic_load_push_constant: /* All memory load operations must load at least a byte */ @@ -652,6 +653,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_store_per_view_output: if (state->shader->info.stage == MESA_SHADER_FRAGMENT) validate_assert(state, nir_src_bit_size(instr->src[0]) >= 8); else @@ -1527,10 +1529,6 @@ validate_var_decl(nir_variable *var, nir_variable_mode valid_modes, const struct glsl_type *type = glsl_get_array_element(var->type); if (nir_is_arrayed_io(var, state->shader->info.stage)) { - if (var->data.per_view) { - assert(glsl_type_is_array(type)); - type = glsl_get_array_element(type); - } assert(glsl_type_is_array(type)); assert(glsl_type_is_scalar(glsl_get_array_element(type))); } else { diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index 7566b9a057b..72b8e18a79f 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -118,6 +118,8 @@ static const nir_shader_compiler_options ir3_base_options = { .divergence_analysis_options = nir_divergence_uniform_load_tears, .scalarize_ddx = true, + + .per_view_unique_driver_locations = true, }; struct ir3_compiler * diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 6498ae48af0..aede968fefe 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2842,6 +2842,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) b = NULL; break; case nir_intrinsic_store_output: + case nir_intrinsic_store_per_view_output: setup_output(ctx, intr); break; case nir_intrinsic_load_base_vertex: @@ -5028,25 +5029,21 @@ setup_output(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_shader_variant *so = ctx->so; nir_io_semantics io = nir_intrinsic_io_semantics(intr); - compile_assert(ctx, nir_src_is_const(intr->src[1])); + nir_src offset_src = *nir_get_io_offset_src(intr); + compile_assert(ctx, nir_src_is_const(offset_src)); - unsigned offset = nir_src_as_uint(intr->src[1]); - unsigned n = nir_intrinsic_base(intr) + offset; + unsigned offset = nir_src_as_uint(offset_src); unsigned frac = nir_intrinsic_component(intr); unsigned ncomp = nir_intrinsic_src_components(intr, 0); + unsigned slot = io.location + offset; /* For per-view variables, each user-facing slot corresponds to multiple - * views, each with a corresponding driver_location, and the offset is for - * the driver_location. To properly figure out of the slot, we'd need to - * plumb through the number of views. However, for now we only use - * per-view with gl_Position, so we assume that the variable is not an - * array or matrix (so there are no indirect accesses to the variable - * itself) and the indirect offset corresponds to the view. - */ - unsigned slot = io.location + (io.per_view ? 0 : offset); - - if (io.per_view && offset > 0) - so->multi_pos_output = true; + * views, each with a corresponding driver_location, and the view index + * offsets the driver_location. */ + unsigned view_index = intr->intrinsic == nir_intrinsic_store_per_view_output + ? nir_src_as_uint(intr->src[1]) + : 0; + unsigned n = nir_intrinsic_base(intr) + offset + view_index; if (ctx->so->type == MESA_SHADER_FRAGMENT) { switch (slot) { @@ -5124,8 +5121,9 @@ setup_output(struct ir3_context *ctx, nir_intrinsic_instr *intr) compile_assert(ctx, so->outputs_count <= ARRAY_SIZE(so->outputs)); so->outputs[n].slot = slot; - if (io.per_view) - so->outputs[n].view = offset; + if (view_index > 0) + so->multi_pos_output = true; + so->outputs[n].view = view_index; for (int i = 0; i < ncomp; i++) { unsigned idx = (n * 4) + i + frac; diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 82ca86d6a47..000455491fa 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -926,7 +926,8 @@ output_slot_used_for_binning(gl_varying_slot slot) static bool remove_nonbinning_output(nir_builder *b, nir_intrinsic_instr *intr, void *data) { - if (intr->intrinsic != nir_intrinsic_store_output) + if (intr->intrinsic != nir_intrinsic_store_output && + intr->intrinsic != nir_intrinsic_store_per_view_output) return false; nir_io_semantics io = nir_intrinsic_io_semantics(intr); diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 7aabc5de438..417d3039ddc 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -145,6 +145,7 @@ is_intrinsic_store(nir_intrinsic_op op) { switch (op) { case nir_intrinsic_store_output: + case nir_intrinsic_store_per_view_output: case nir_intrinsic_store_scratch: case nir_intrinsic_store_ssbo: case nir_intrinsic_store_shared: diff --git a/src/freedreno/ir3/ir3_nir_lower_64b.c b/src/freedreno/ir3/ir3_nir_lower_64b.c index c244c609f9f..a9171c1d4e0 100644 --- a/src/freedreno/ir3/ir3_nir_lower_64b.c +++ b/src/freedreno/ir3/ir3_nir_lower_64b.c @@ -62,6 +62,7 @@ lower_64b_intrinsics(nir_builder *b, nir_instr *instr, void *unused) switch (intr->intrinsic) { case nir_intrinsic_store_ssbo: case nir_intrinsic_store_global_ir3: + case nir_intrinsic_store_per_view_output: offset_src_idx = 2; break; default: @@ -123,6 +124,7 @@ lower_64b_intrinsics(nir_builder *b, nir_instr *instr, void *unused) case nir_intrinsic_load_ssbo: case nir_intrinsic_load_ubo: case nir_intrinsic_load_global_ir3: + case nir_intrinsic_load_per_view_output: offset_src_idx = 1; break; default: diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index dd090a24a85..591782c7806 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -81,6 +81,7 @@ const struct nir_shader_compiler_options brw_scalar_nir_options = { .scalarize_ddx = true, .support_indirect_inputs = (uint8_t)BITFIELD_MASK(PIPE_SHADER_TYPES), .support_indirect_outputs = (uint8_t)BITFIELD_MASK(PIPE_SHADER_TYPES), + .per_view_unique_driver_locations = true, }; struct brw_compiler * diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index e9eec017c33..8fc5300c3f8 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -285,8 +285,10 @@ is_output(nir_intrinsic_instr *intrin) { return intrin->intrinsic == nir_intrinsic_load_output || intrin->intrinsic == nir_intrinsic_load_per_vertex_output || + intrin->intrinsic == nir_intrinsic_load_per_view_output || intrin->intrinsic == nir_intrinsic_store_output || - intrin->intrinsic == nir_intrinsic_store_per_vertex_output; + intrin->intrinsic == nir_intrinsic_store_per_vertex_output || + intrin->intrinsic == nir_intrinsic_store_per_view_output; } @@ -342,6 +344,56 @@ remap_patch_urb_offsets(nir_block *block, nir_builder *b, return true; } +/* Replace store_per_view_output to plain store_output, mapping the view index + * to IO offset. Because we only use per-view outputs for position, the offset + * pitch is always 1. */ +static bool +lower_per_view_outputs(nir_builder *b, + nir_intrinsic_instr *intrin, + UNUSED void *cb_data) +{ + if (intrin->intrinsic != nir_intrinsic_store_per_view_output && + intrin->intrinsic != nir_intrinsic_load_per_view_output) + return false; + + b->cursor = nir_before_instr(&intrin->instr); + + nir_src *view_index = nir_get_io_arrayed_index_src(intrin); + nir_src *offset = nir_get_io_offset_src(intrin); + + nir_def *new_offset = nir_iadd(b, view_index->ssa, offset->ssa); + + nir_intrinsic_instr *new; + if (intrin->intrinsic == nir_intrinsic_store_per_view_output) + new = nir_store_output(b, intrin->src[0].ssa, new_offset); + else { + nir_def *new_def = nir_load_output(b, intrin->def.num_components, + intrin->def.bit_size, new_offset); + new = nir_instr_as_intrinsic(new_def->parent_instr); + } + + nir_intrinsic_set_base(new, nir_intrinsic_base(intrin)); + nir_intrinsic_set_range(new, nir_intrinsic_range(intrin)); + nir_intrinsic_set_write_mask(new, nir_intrinsic_write_mask(intrin)); + nir_intrinsic_set_component(new, nir_intrinsic_component(intrin)); + nir_intrinsic_set_src_type(new, nir_intrinsic_src_type(intrin)); + nir_intrinsic_set_io_semantics(new, nir_intrinsic_io_semantics(intrin)); + + if (intrin->intrinsic == nir_intrinsic_load_per_view_output) + nir_def_rewrite_uses(&intrin->def, &new->def); + nir_instr_remove(&intrin->instr); + + return true; +} + +static bool +brw_nir_lower_per_view_outputs(nir_shader *nir) +{ + return nir_shader_intrinsics_pass(nir, lower_per_view_outputs, + nir_metadata_control_flow, + NULL); +} + void brw_nir_lower_vs_inputs(nir_shader *nir) { @@ -640,6 +692,7 @@ brw_nir_lower_vue_outputs(nir_shader *nir) nir_lower_io(nir, nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32); + brw_nir_lower_per_view_outputs(nir); } void @@ -1220,7 +1273,7 @@ brw_mesh_compact_io(nir_shader *mesh, nir_shader *frag) assert(location < ARRAY_SIZE(mapping)); const struct glsl_type *type = var->type; - if (nir_is_arrayed_io(var, MESA_SHADER_MESH) || var->data.per_view) { + if (nir_is_arrayed_io(var, MESA_SHADER_MESH)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } @@ -2253,7 +2306,7 @@ brw_nir_get_var_type(const struct nir_shader *nir, nir_variable *var) const struct glsl_type *type = var->interface_type; if (!type) { type = var->type; - if (nir_is_arrayed_io(var, nir->info.stage) || var->data.per_view) { + if (nir_is_arrayed_io(var, nir->info.stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } diff --git a/src/intel/compiler/elk/elk_nir.c b/src/intel/compiler/elk/elk_nir.c index 61a5f3240e2..f8a8c151d30 100644 --- a/src/intel/compiler/elk/elk_nir.c +++ b/src/intel/compiler/elk/elk_nir.c @@ -1899,7 +1899,7 @@ elk_nir_get_var_type(const struct nir_shader *nir, nir_variable *var) const struct glsl_type *type = var->interface_type; if (!type) { type = var->type; - if (nir_is_arrayed_io(var, nir->info.stage) || var->data.per_view) { + if (nir_is_arrayed_io(var, nir->info.stage)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } diff --git a/src/intel/vulkan/anv_mesh_perprim_wa.c b/src/intel/vulkan/anv_mesh_perprim_wa.c index f46d6a1082b..c35b9f5c365 100644 --- a/src/intel/vulkan/anv_mesh_perprim_wa.c +++ b/src/intel/vulkan/anv_mesh_perprim_wa.c @@ -114,7 +114,7 @@ anv_mesh_convert_attrs_prim_to_vert(struct nir_shader *nir, location >= VARYING_SLOT_VAR0); const struct glsl_type *type = var->type; - if (nir_is_arrayed_io(var, MESA_SHADER_MESH) || var->data.per_view) { + if (nir_is_arrayed_io(var, MESA_SHADER_MESH)) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } diff --git a/src/panfrost/compiler/bi_lower_divergent_indirects.c b/src/panfrost/compiler/bi_lower_divergent_indirects.c index b4f3a88c582..54fb34d07c5 100644 --- a/src/panfrost/compiler/bi_lower_divergent_indirects.c +++ b/src/panfrost/compiler/bi_lower_divergent_indirects.c @@ -60,6 +60,12 @@ bi_lower_divergent_indirects_impl(nir_builder *b, nir_intrinsic_instr *intr, offset = nir_get_io_offset_src(intr); break; + case nir_intrinsic_store_per_view_output: + assert(stage == MESA_SHADER_VERTEX); + assert(!nir_src_is_divergent(&intr->src[1])); + offset = nir_get_io_offset_src(intr); + break; + case nir_intrinsic_image_texel_address: case nir_intrinsic_image_load: case nir_intrinsic_image_store: