ir3: Support multiview in GS lowering

With GS+multiview, the VS will loop over each view in the shader while
each GS invocation only corresponds to a single view. Varyings for each
view will be stored next to each other in local memory. Implement view
index calculations when lowering VS outputs/GS inputs.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40153>
This commit is contained in:
Connor Abbott 2026-03-24 19:28:52 -04:00 committed by Marge Bot
parent bc72ef2ee9
commit be84cb6211
2 changed files with 50 additions and 3 deletions

View file

@ -16,6 +16,9 @@ struct state {
unsigned stride;
} map;
uint32_t view_mask;
unsigned view_count;
nir_def *header;
nir_variable *vertex_count_var;
@ -122,7 +125,8 @@ shader_io_get_unique_index(gl_varying_slot slot)
static nir_def *
build_local_offset(nir_builder *b, struct state *state, nir_def *vertex,
uint32_t location, uint32_t comp, nir_def *offset)
nir_def *view, uint32_t location, uint32_t comp,
nir_def *offset)
{
nir_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
nir_def *primitive_offset =
@ -147,6 +151,9 @@ build_local_offset(nir_builder *b, struct state *state, nir_def *vertex,
UNREACHABLE("bad shader stage");
}
if (state->view_count > 1)
vertex = nir_iadd(b, nir_imul_imm(b, vertex, state->view_count), view);
nir_def *vertex_offset = nir_imul24(b, vertex, vertex_stride);
return nir_iadd(
@ -249,10 +256,17 @@ lower_block_to_explicit_output(nir_block *block, nir_builder *b,
continue;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
nir_def *view = NULL;
switch (intr->intrinsic) {
case nir_intrinsic_store_per_view_output:
view = intr->src[1].ssa;
FALLTHROUGH;
case nir_intrinsic_store_output: {
// src[] = { value, offset }.
nir_def *intr_offset = intr->intrinsic ==
nir_intrinsic_store_per_view_output ? intr->src[2].ssa :
intr->src[1].ssa;
/* nir_lower_io_vars_to_temporaries replaces all access to output
* variables with temp variables and then emits a nir_copy_var at
@ -266,8 +280,9 @@ lower_block_to_explicit_output(nir_block *block, nir_builder *b,
nir_def *vertex_id = build_vertex_id(b, state);
nir_def *offset = build_local_offset(
b, state, vertex_id, nir_intrinsic_io_semantics(intr).location,
nir_intrinsic_component(intr), intr->src[1].ssa);
b, state, vertex_id, view,
nir_intrinsic_io_semantics(intr).location,
nir_intrinsic_component(intr), intr_offset);
nir_store_shared_ir3(b, intr->src[0].ssa, offset);
progress = true;
@ -295,6 +310,9 @@ ir3_nir_lower_to_explicit_output(nir_shader *shader,
{
struct state state = {};
state.view_mask = shader->info.view_mask;
state.view_count = MAX2(1, util_bitcount(shader->info.view_mask));
build_primitive_map(shader, &state.map);
memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
@ -314,6 +332,7 @@ ir3_nir_lower_to_explicit_output(nir_shader *shader,
progress |= lower_block_to_explicit_output(block, &b, &state);
v->output_size = state.map.stride;
v->view_count = state.view_count;
return nir_progress(progress, impl, nir_metadata_control_flow);
}
@ -335,9 +354,29 @@ lower_block_to_explicit_input(nir_block *block, nir_builder *b,
b->cursor = nir_before_instr(&intr->instr);
nir_def *view = NULL;
if (state->view_count > 1) {
view = nir_load_view_index(b);
/* nir_lower_multiview tightly packs the outputs, skipping over
* inactive views. This means we need to compute the tightly packed
* index from the original view_index if the view mask is not
* contiguous (i.e. not a power of two minus one):
*
* mask = (1u << view) - 1
* packed_view = bitcount(mask & view_mask)
*/
if (!util_is_power_of_two_or_zero(state->view_mask + 1)) {
nir_def *mask =
nir_iadd_imm(b, nir_ishl(b, nir_imm_int(b, 1), view), -1);
view =
nir_bit_count(b, nir_iand_imm(b, mask, state->view_mask));
}
}
nir_def *offset = build_local_offset(
b, state,
intr->src[0].ssa, // this is typically gl_InvocationID
view,
nir_intrinsic_io_semantics(intr).location,
nir_intrinsic_component(intr), intr->src[1].ssa);
@ -370,6 +409,9 @@ ir3_nir_lower_to_explicit_input(nir_shader *shader,
{
struct state state = {};
state.view_mask = shader->info.view_mask;
state.view_count = MAX2(1, util_bitcount(shader->info.view_mask));
/* when using stl/ldl (instead of stlw/ldlw) for linking VS and HS,
* HS uses a different primitive id, which starts at bit 16 in the header
*/

View file

@ -771,6 +771,11 @@ struct ir3_shader_variant {
/* Size in dwords of all outputs for VS, size of entire patch for HS. */
uint32_t output_size;
/* For stages with output_size, the number of views. Outputs are replicated
* per view.
*/
uint32_t view_count;
/* Expected size of incoming output_loc for HS, DS, and GS */
uint32_t input_size;