diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index be682c7d734..7bb5f5723d4 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -621,6 +621,14 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var) if (var->data.patch) spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationPatch); + if (var->data.explicit_xfb_buffer) { + spirv_builder_emit_offset(&ctx->builder, var_id, var->data.offset); + spirv_builder_emit_xfb_buffer(&ctx->builder, var_id, var->data.xfb.buffer); + spirv_builder_emit_xfb_stride(&ctx->builder, var_id, var->data.xfb.stride); + if (var->data.stream) + spirv_builder_emit_stream(&ctx->builder, var_id, var->data.stream); + } + _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id); assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); @@ -1252,6 +1260,8 @@ emit_so_info(struct ntv_context *ctx, const struct zink_so_info *so_info) { unsigned output = 0; for (unsigned i = 0; i < so_info->so_info.num_outputs; i++) { + if (so_info->skip[i]) + continue; struct pipe_stream_output so_output = so_info->so_info.output[i]; unsigned slot = so_info->so_info_slots[i] << 2 | so_output.start_component; SpvId out_type = get_output_type(ctx, slot, so_output.num_components); @@ -1299,6 +1309,8 @@ emit_so_outputs(struct ntv_context *ctx, const struct zink_so_info *so_info) { for (unsigned i = 0; i < so_info->so_info.num_outputs; i++) { + if (so_info->skip[i]) + continue; uint32_t components[NIR_MAX_VEC_COMPONENTS]; unsigned slot = so_info->so_info_slots[i]; struct pipe_stream_output so_output = so_info->so_info.output[i]; diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 620bc434618..38e95c194a4 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -402,13 +402,13 @@ check_psiz(struct nir_shader *s) return false; } -/* semi-copied from iris */ static void update_so_info(struct zink_shader *sh, uint64_t outputs_written, bool have_psiz) { uint8_t reverse_map[64] = {}; unsigned slot = 0; + /* semi-copied from iris */ while (outputs_written) { int bit = u_bit_scan64(&outputs_written); /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */ @@ -417,8 +417,36 @@ update_so_info(struct zink_shader *sh, reverse_map[slot++] = bit; } + nir_foreach_shader_out_variable(var, sh->nir) + var->data.explicit_xfb_buffer = 0; + + bool inlined[64] = {0}; for (unsigned i = 0; i < sh->streamout.so_info.num_outputs; i++) { struct pipe_stream_output *output = &sh->streamout.so_info.output[i]; + unsigned slot = reverse_map[output->register_index]; + if ((sh->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(sh->nir->info.gs.active_stream_mask) == 1) && + !output->start_component) { + nir_variable *var = NULL; + while (!var) + var = nir_find_variable_with_location(sh->nir, nir_var_shader_out, slot--); + slot++; + if (inlined[slot]) { + sh->streamout.skip[i] = true; + continue; + } + assert(var && var->data.location == slot); + /* if this is the entire variable, try to blast it out during the initial declaration */ + if (glsl_get_components(var->type) == output->num_components) { + var->data.explicit_xfb_buffer = 1; + var->data.xfb.buffer = output->output_buffer; + var->data.xfb.stride = sh->streamout.so_info.stride[output->output_buffer] * 4; + var->data.offset = output->dst_offset * 4; + var->data.stream = output->stream; + sh->streamout.skip[i] = true; + inlined[slot] = true; + continue; + } + } /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ sh->streamout.so_info_slots[i] = reverse_map[output->register_index]; } diff --git a/src/gallium/drivers/zink/zink_compiler.h b/src/gallium/drivers/zink/zink_compiler.h index e61d8ae45b7..bc2c57df257 100644 --- a/src/gallium/drivers/zink/zink_compiler.h +++ b/src/gallium/drivers/zink/zink_compiler.h @@ -51,6 +51,7 @@ struct tgsi_token; struct zink_so_info { struct pipe_stream_output_info so_info; unsigned so_info_slots[PIPE_MAX_SO_OUTPUTS]; + bool skip[PIPE_MAX_SO_OUTPUTS]; bool have_xfb; };