diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c index 6fc2f3a92f1..6b5d66a77b7 100644 --- a/src/freedreno/ir3/ir3_nir_lower_tess.c +++ b/src/freedreno/ir3/ir3_nir_lower_tess.c @@ -16,6 +16,9 @@ struct state { unsigned stride; } map; + uint32_t view_mask; + unsigned view_count; + nir_def *header; nir_variable *vertex_count_var; @@ -122,7 +125,8 @@ shader_io_get_unique_index(gl_varying_slot slot) static nir_def * build_local_offset(nir_builder *b, struct state *state, nir_def *vertex, - uint32_t location, uint32_t comp, nir_def *offset) + nir_def *view, uint32_t location, uint32_t comp, + nir_def *offset) { nir_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b); nir_def *primitive_offset = @@ -147,6 +151,9 @@ build_local_offset(nir_builder *b, struct state *state, nir_def *vertex, UNREACHABLE("bad shader stage"); } + if (state->view_count > 1) + vertex = nir_iadd(b, nir_imul_imm(b, vertex, state->view_count), view); + nir_def *vertex_offset = nir_imul24(b, vertex, vertex_stride); return nir_iadd( @@ -249,10 +256,17 @@ lower_block_to_explicit_output(nir_block *block, nir_builder *b, continue; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + nir_def *view = NULL; switch (intr->intrinsic) { + case nir_intrinsic_store_per_view_output: + view = intr->src[1].ssa; + FALLTHROUGH; case nir_intrinsic_store_output: { // src[] = { value, offset }. + nir_def *intr_offset = intr->intrinsic == + nir_intrinsic_store_per_view_output ? intr->src[2].ssa : + intr->src[1].ssa; /* nir_lower_io_vars_to_temporaries replaces all access to output * variables with temp variables and then emits a nir_copy_var at @@ -266,8 +280,9 @@ lower_block_to_explicit_output(nir_block *block, nir_builder *b, nir_def *vertex_id = build_vertex_id(b, state); nir_def *offset = build_local_offset( - b, state, vertex_id, nir_intrinsic_io_semantics(intr).location, - nir_intrinsic_component(intr), intr->src[1].ssa); + b, state, vertex_id, view, + nir_intrinsic_io_semantics(intr).location, + nir_intrinsic_component(intr), intr_offset); nir_store_shared_ir3(b, intr->src[0].ssa, offset); progress = true; @@ -295,6 +310,9 @@ ir3_nir_lower_to_explicit_output(nir_shader *shader, { struct state state = {}; + state.view_mask = shader->info.view_mask; + state.view_count = MAX2(1, util_bitcount(shader->info.view_mask)); + build_primitive_map(shader, &state.map); memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc)); @@ -314,6 +332,7 @@ ir3_nir_lower_to_explicit_output(nir_shader *shader, progress |= lower_block_to_explicit_output(block, &b, &state); v->output_size = state.map.stride; + v->view_count = state.view_count; return nir_progress(progress, impl, nir_metadata_control_flow); } @@ -335,9 +354,29 @@ lower_block_to_explicit_input(nir_block *block, nir_builder *b, b->cursor = nir_before_instr(&intr->instr); + nir_def *view = NULL; + if (state->view_count > 1) { + view = nir_load_view_index(b); + /* nir_lower_multiview tightly packs the outputs, skipping over + * inactive views. This means we need to compute the tightly packed + * index from the original view_index if the view mask is not + * contiguous (i.e. not a power of two minus one): + * + * mask = (1u << view) - 1 + * packed_view = bitcount(mask & view_mask) + */ + if (!util_is_power_of_two_or_zero(state->view_mask + 1)) { + nir_def *mask = + nir_iadd_imm(b, nir_ishl(b, nir_imm_int(b, 1), view), -1); + view = + nir_bit_count(b, nir_iand_imm(b, mask, state->view_mask)); + } + } + nir_def *offset = build_local_offset( b, state, intr->src[0].ssa, // this is typically gl_InvocationID + view, nir_intrinsic_io_semantics(intr).location, nir_intrinsic_component(intr), intr->src[1].ssa); @@ -370,6 +409,9 @@ ir3_nir_lower_to_explicit_input(nir_shader *shader, { struct state state = {}; + state.view_mask = shader->info.view_mask; + state.view_count = MAX2(1, util_bitcount(shader->info.view_mask)); + /* when using stl/ldl (instead of stlw/ldlw) for linking VS and HS, * HS uses a different primitive id, which starts at bit 16 in the header */ diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 5c3026f4845..182eeff5573 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -771,6 +771,11 @@ struct ir3_shader_variant { /* Size in dwords of all outputs for VS, size of entire patch for HS. */ uint32_t output_size; + /* For stages with output_size, the number of views. Outputs are replicated + * per view. + */ + uint32_t view_count; + /* Expected size of incoming output_loc for HS, DS, and GS */ uint32_t input_size;