diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index 3fb9017e217..a7028c01dea 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -668,7 +668,8 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var) if (var->data.patch) spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationPatch); - if (var->data.explicit_xfb_buffer) { + if (var->data.explicit_xfb_buffer && + (!glsl_type_is_array(var->type) || glsl_array_size(var->type) == 1 || !glsl_type_is_interface(glsl_without_array(var->type)))) { spirv_builder_emit_offset(&ctx->builder, var_id, var->data.offset); spirv_builder_emit_xfb_buffer(&ctx->builder, var_id, var->data.xfb.buffer); spirv_builder_emit_xfb_stride(&ctx->builder, var_id, var->data.xfb.stride); @@ -1281,9 +1282,13 @@ get_output_type(struct ntv_context *ctx, unsigned register_index, unsigned num_c /* index is based on component, so we might have to go back a few slots to get to the base */ while (!out_type) out_type = ctx->so_output_gl_types[register_index--]; - enum glsl_base_type base_type = glsl_get_base_type(out_type); - if (base_type == GLSL_TYPE_ARRAY) - base_type = glsl_get_base_type(glsl_without_array(out_type)); + const struct glsl_type *bare_type = glsl_without_array(out_type); + enum glsl_base_type base_type; + if (glsl_type_is_struct_or_ifc(bare_type)) + base_type = GLSL_TYPE_UINT; + else + base_type = glsl_get_base_type(bare_type); + switch (base_type) { case GLSL_TYPE_BOOL: @@ -1305,6 +1310,16 @@ get_output_type(struct ntv_context *ctx, unsigned register_index, unsigned num_c return 0; } +static nir_variable * +find_propagate_var(nir_shader *nir, unsigned slot) +{ + nir_foreach_shader_out_variable(var, nir) { + if (var->data.location == slot && glsl_type_is_array(var->type)) + return var; + } + return NULL; +} + /* for streamout create new outputs, as streamout can be done on individual components, from complete outputs, so we just can't use the created packed outputs */ static void @@ -1353,6 +1368,66 @@ emit_so_info(struct ntv_context *ctx, const struct zink_shader_info *so_info, ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id; output += align(so_output.num_components, 4) / 4; } + + /* these are interface block arrays which need to be split + * across N buffers due to GL spec requirements + */ + u_foreach_bit(bit, so_info->so_propagate) { + unsigned slot = bit + VARYING_SLOT_VAR0; + nir_variable *var = find_propagate_var(ctx->nir, slot); + assert(var); + const struct glsl_type *bare_type = glsl_without_array(var->type); + SpvId base_type = get_glsl_type(ctx, bare_type); + for (unsigned i = 0; i < glsl_array_size(var->type); i++) { + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassOutput, + base_type); + SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, + SpvStorageClassOutput); + char name[1024]; + if (var->name) + snprintf(name, sizeof(name), "xfb_%s[%d]", var->name, i); + else + snprintf(name, sizeof(name), "xfb_slot%u[%d]", slot, i); + spirv_builder_emit_name(&ctx->builder, var_id, name); + spirv_builder_emit_offset(&ctx->builder, var_id, var->data.offset); + spirv_builder_emit_xfb_buffer(&ctx->builder, var_id, var->data.xfb.buffer + i); + spirv_builder_emit_xfb_stride(&ctx->builder, var_id, var->data.xfb.stride); + if (var->data.stream) + spirv_builder_emit_stream(&ctx->builder, var_id, var->data.stream); + + uint32_t location = first_so + so_info->so_info.num_outputs + i; + assert(location < VARYING_SLOT_VAR0); + spirv_builder_emit_location(&ctx->builder, var_id, location); + + uint32_t *key = ralloc_size(ctx->mem_ctx, sizeof(uint32_t)); + *key = (uint32_t)(slot + i) << 2; + _mesa_hash_table_insert(ctx->so_outputs, key, (void *)(intptr_t)var_id); + + assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); + ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id; + } + } +} + +static const struct glsl_type * +unroll_struct_type(struct ntv_context *ctx, const struct glsl_type *slot_type, unsigned *slot_idx, SpvId *deref, const struct glsl_type **arraytype) +{ + const struct glsl_type *type = slot_type; + unsigned slot_count = 0; + unsigned cur_slot = 0; + unsigned idx = 0; + /* iterate over all the members in the struct, stopping once the slot idx is reached */ + for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) { + /* use array type for slot counting but return array member type for unroll */ + *arraytype = glsl_get_struct_field(slot_type, i); + type = glsl_without_array(*arraytype); + slot_count = glsl_count_vec4_slots(*arraytype, false, false); + idx = i; + } + *deref = spirv_builder_emit_composite_extract(&ctx->builder, get_glsl_type(ctx, glsl_get_struct_field(slot_type, idx)), *deref, &idx, 1); + *slot_idx -= (cur_slot - slot_count); + return type; } static void @@ -1364,7 +1439,8 @@ emit_so_outputs(struct ntv_context *ctx, unsigned slot = so_info->so_info_slots[i]; struct pipe_stream_output so_output = so_info->so_info.output[i]; uint32_t so_key = (uint32_t) so_output.register_index << 2 | so_output.start_component; - uint32_t location = (uint32_t) slot << 2 | so_output.start_component; + uint32_t output_location = (uint32_t) slot << 2 | so_output.start_component; + uint32_t location = output_location; struct hash_entry *he = _mesa_hash_table_search(ctx->so_outputs, &so_key); assert(he); SpvId so_output_var_id = (SpvId)(intptr_t)he->data; @@ -1390,6 +1466,36 @@ emit_so_outputs(struct ntv_context *ctx, components[c] += 4; } + const struct glsl_type *bare_type = glsl_without_array(out_type); + if (glsl_type_is_struct_or_ifc(bare_type)) { + uint32_t base_slot = (location & ~so_output.start_component) / 4; + unsigned slot_idx = slot - base_slot; + unsigned struct_slots = glsl_count_vec4_slots(bare_type, false, false); + unsigned array_idx = slot_idx / struct_slots; + slot_idx %= glsl_count_vec4_slots(bare_type, false, false); + src = spirv_builder_emit_composite_extract(&ctx->builder, get_glsl_type(ctx, bare_type), src, &array_idx, 1); + /* need to find the vec4 that's being exported by this slot */ + while (glsl_type_is_struct_or_ifc(bare_type)) + bare_type = unroll_struct_type(ctx, bare_type, &slot_idx, &src, &out_type); + output_type = get_glsl_type(ctx, out_type); + if (glsl_type_is_vector_or_scalar(out_type)) + src = emit_bitcast(ctx, type, src); + else if (glsl_type_is_array(out_type)) { + for (unsigned c = 0; c < so_output.num_components; c++) { + uint32_t member = so_output.start_component + c; + SpvId base_type = get_glsl_basetype(ctx, glsl_get_base_type(glsl_without_array_or_matrix(out_type))); + components[c] = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, &member, 1); + } + if (so_output.num_components > 1) + src = spirv_builder_emit_composite_construct(&ctx->builder, type, components, so_output.num_components); + else + src = components[0]; + out_type = glsl_vector_type(GLSL_TYPE_UINT, so_output.num_components); + output_type = type; + } + } + assert(!glsl_type_is_struct_or_ifc(out_type)); + /* if we're emitting a scalar or the type we're emitting matches the output's original type and we're * emitting the same number of components, then we can skip any sort of conversion here */ @@ -1404,8 +1510,10 @@ emit_so_outputs(struct ntv_context *ctx, result = spirv_builder_emit_vector_shuffle(&ctx->builder, type, src, src, components, so_output.num_components); - result = emit_bitcast(ctx, type, result); } else { + assert(glsl_type_is_array_or_matrix(out_type)); + ASSERTED const struct glsl_type *bare_type = glsl_without_array(out_type); + assert(!glsl_type_is_struct_or_ifc(bare_type)); /* for arrays, we need to manually extract each desired member * and re-pack them into the desired output type */ @@ -1427,8 +1535,34 @@ emit_so_outputs(struct ntv_context *ctx, } } + result = emit_bitcast(ctx, type, result); spirv_builder_emit_store(&ctx->builder, so_output_var_id, result); } + + u_foreach_bit(bit, so_info->so_propagate) { + unsigned slot = bit + VARYING_SLOT_VAR0; + nir_variable *var = find_propagate_var(ctx->nir, slot); + assert(var); + + const struct glsl_type *bare_type = glsl_without_array(var->type); + SpvId base_type = get_glsl_type(ctx, bare_type); + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassOutput, + base_type); + SpvId output = ctx->outputs[slot << 2]; + assert(output); + for (unsigned i = 0; i < glsl_array_size(var->type); i++) { + uint32_t so_key = (uint32_t) (slot + i) << 2; + struct hash_entry *he = _mesa_hash_table_search(ctx->so_outputs, &so_key); + assert(he); + SpvId so_output_var_id = (SpvId)(intptr_t)he->data; + + SpvId idx = emit_uint_const(ctx, 32, i); + SpvId deref = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, output, &idx, 1); + SpvId load = spirv_builder_emit_load(&ctx->builder, base_type, deref); + spirv_builder_emit_store(&ctx->builder, so_output_var_id, load); + } + } } static SpvId diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index fb04d51308b..6887bb0289d 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -730,6 +730,9 @@ update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_ var->data.xfb.stride = so_info->stride[output->output_buffer] * 4; var->data.offset = output->dst_offset * 4; var->data.stream = output->stream; + /* GLSL specifies that interface blocks are split per-buffer in XFB */ + if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type))) + zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0); /* mark all slot components inlined to skip subsequent loop iterations */ for (unsigned j = 0; j < num_slots; j++) { slot = var->data.location + j; @@ -745,7 +748,7 @@ out: /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index]; } - zs->sinfo.have_xfb = !!zs->sinfo.so_info.num_outputs; + zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate; /* ensure this doesn't get output in the shader by unsetting location */ if (have_fake_psiz && psiz) update_psiz_location(zs->nir, psiz); diff --git a/src/gallium/drivers/zink/zink_compiler.h b/src/gallium/drivers/zink/zink_compiler.h index 935997df90e..aa7ca91bf4e 100644 --- a/src/gallium/drivers/zink/zink_compiler.h +++ b/src/gallium/drivers/zink/zink_compiler.h @@ -55,6 +55,7 @@ struct tgsi_token; struct zink_shader_info { struct pipe_stream_output_info so_info; unsigned so_info_slots[PIPE_MAX_SO_OUTPUTS]; + uint32_t so_propagate; //left shifted by 32 bool last_vertex; bool have_xfb; bool have_sparse;