zink: handle spirv xfb insanity

this comes in two flavors:
* streamout of array<struct/block>
* partial streamout of array/struct/block

for the former:
* arrays of structs can just be blasted out in the initial var declaration (easy)
* arrays of blocks must be output to separate xfb buffers for each array block,
  which requires skipping initial xfb blast-off and instead propagating the values
  using tmp variables at a later point

for the latter:
* the optimal way to do this is to unwrap the struct first to figure out what's being
  emitted, at which point the value can be extracted and exported

fixes the rest of spec@arb_gl_spirv@execution@xfb
...except spec@arb_gl_spirv@execution@xfb@vs_block_array, which I'm suspecting is broken
due to vtn bugs

Reviewed-by: Dave Airlie <airlied@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15224>
This commit is contained in:
Mike Blumenkrantz 2022-02-24 15:43:44 -05:00 committed by Marge Bot
parent dfbb7c1f73
commit 3634063b08
3 changed files with 145 additions and 7 deletions

View file

@ -668,7 +668,8 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var)
if (var->data.patch)
spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationPatch);
if (var->data.explicit_xfb_buffer) {
if (var->data.explicit_xfb_buffer &&
(!glsl_type_is_array(var->type) || glsl_array_size(var->type) == 1 || !glsl_type_is_interface(glsl_without_array(var->type)))) {
spirv_builder_emit_offset(&ctx->builder, var_id, var->data.offset);
spirv_builder_emit_xfb_buffer(&ctx->builder, var_id, var->data.xfb.buffer);
spirv_builder_emit_xfb_stride(&ctx->builder, var_id, var->data.xfb.stride);
@ -1281,9 +1282,13 @@ get_output_type(struct ntv_context *ctx, unsigned register_index, unsigned num_c
/* index is based on component, so we might have to go back a few slots to get to the base */
while (!out_type)
out_type = ctx->so_output_gl_types[register_index--];
enum glsl_base_type base_type = glsl_get_base_type(out_type);
if (base_type == GLSL_TYPE_ARRAY)
base_type = glsl_get_base_type(glsl_without_array(out_type));
const struct glsl_type *bare_type = glsl_without_array(out_type);
enum glsl_base_type base_type;
if (glsl_type_is_struct_or_ifc(bare_type))
base_type = GLSL_TYPE_UINT;
else
base_type = glsl_get_base_type(bare_type);
switch (base_type) {
case GLSL_TYPE_BOOL:
@ -1305,6 +1310,16 @@ get_output_type(struct ntv_context *ctx, unsigned register_index, unsigned num_c
return 0;
}
static nir_variable *
find_propagate_var(nir_shader *nir, unsigned slot)
{
nir_foreach_shader_out_variable(var, nir) {
if (var->data.location == slot && glsl_type_is_array(var->type))
return var;
}
return NULL;
}
/* for streamout create new outputs, as streamout can be done on individual components,
from complete outputs, so we just can't use the created packed outputs */
static void
@ -1353,6 +1368,66 @@ emit_so_info(struct ntv_context *ctx, const struct zink_shader_info *so_info,
ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id;
output += align(so_output.num_components, 4) / 4;
}
/* these are interface block arrays which need to be split
* across N buffers due to GL spec requirements
*/
u_foreach_bit(bit, so_info->so_propagate) {
unsigned slot = bit + VARYING_SLOT_VAR0;
nir_variable *var = find_propagate_var(ctx->nir, slot);
assert(var);
const struct glsl_type *bare_type = glsl_without_array(var->type);
SpvId base_type = get_glsl_type(ctx, bare_type);
for (unsigned i = 0; i < glsl_array_size(var->type); i++) {
SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
SpvStorageClassOutput,
base_type);
SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type,
SpvStorageClassOutput);
char name[1024];
if (var->name)
snprintf(name, sizeof(name), "xfb_%s[%d]", var->name, i);
else
snprintf(name, sizeof(name), "xfb_slot%u[%d]", slot, i);
spirv_builder_emit_name(&ctx->builder, var_id, name);
spirv_builder_emit_offset(&ctx->builder, var_id, var->data.offset);
spirv_builder_emit_xfb_buffer(&ctx->builder, var_id, var->data.xfb.buffer + i);
spirv_builder_emit_xfb_stride(&ctx->builder, var_id, var->data.xfb.stride);
if (var->data.stream)
spirv_builder_emit_stream(&ctx->builder, var_id, var->data.stream);
uint32_t location = first_so + so_info->so_info.num_outputs + i;
assert(location < VARYING_SLOT_VAR0);
spirv_builder_emit_location(&ctx->builder, var_id, location);
uint32_t *key = ralloc_size(ctx->mem_ctx, sizeof(uint32_t));
*key = (uint32_t)(slot + i) << 2;
_mesa_hash_table_insert(ctx->so_outputs, key, (void *)(intptr_t)var_id);
assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces));
ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id;
}
}
}
static const struct glsl_type *
unroll_struct_type(struct ntv_context *ctx, const struct glsl_type *slot_type, unsigned *slot_idx, SpvId *deref, const struct glsl_type **arraytype)
{
const struct glsl_type *type = slot_type;
unsigned slot_count = 0;
unsigned cur_slot = 0;
unsigned idx = 0;
/* iterate over all the members in the struct, stopping once the slot idx is reached */
for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
/* use array type for slot counting but return array member type for unroll */
*arraytype = glsl_get_struct_field(slot_type, i);
type = glsl_without_array(*arraytype);
slot_count = glsl_count_vec4_slots(*arraytype, false, false);
idx = i;
}
*deref = spirv_builder_emit_composite_extract(&ctx->builder, get_glsl_type(ctx, glsl_get_struct_field(slot_type, idx)), *deref, &idx, 1);
*slot_idx -= (cur_slot - slot_count);
return type;
}
static void
@ -1364,7 +1439,8 @@ emit_so_outputs(struct ntv_context *ctx,
unsigned slot = so_info->so_info_slots[i];
struct pipe_stream_output so_output = so_info->so_info.output[i];
uint32_t so_key = (uint32_t) so_output.register_index << 2 | so_output.start_component;
uint32_t location = (uint32_t) slot << 2 | so_output.start_component;
uint32_t output_location = (uint32_t) slot << 2 | so_output.start_component;
uint32_t location = output_location;
struct hash_entry *he = _mesa_hash_table_search(ctx->so_outputs, &so_key);
assert(he);
SpvId so_output_var_id = (SpvId)(intptr_t)he->data;
@ -1390,6 +1466,36 @@ emit_so_outputs(struct ntv_context *ctx,
components[c] += 4;
}
const struct glsl_type *bare_type = glsl_without_array(out_type);
if (glsl_type_is_struct_or_ifc(bare_type)) {
uint32_t base_slot = (location & ~so_output.start_component) / 4;
unsigned slot_idx = slot - base_slot;
unsigned struct_slots = glsl_count_vec4_slots(bare_type, false, false);
unsigned array_idx = slot_idx / struct_slots;
slot_idx %= glsl_count_vec4_slots(bare_type, false, false);
src = spirv_builder_emit_composite_extract(&ctx->builder, get_glsl_type(ctx, bare_type), src, &array_idx, 1);
/* need to find the vec4 that's being exported by this slot */
while (glsl_type_is_struct_or_ifc(bare_type))
bare_type = unroll_struct_type(ctx, bare_type, &slot_idx, &src, &out_type);
output_type = get_glsl_type(ctx, out_type);
if (glsl_type_is_vector_or_scalar(out_type))
src = emit_bitcast(ctx, type, src);
else if (glsl_type_is_array(out_type)) {
for (unsigned c = 0; c < so_output.num_components; c++) {
uint32_t member = so_output.start_component + c;
SpvId base_type = get_glsl_basetype(ctx, glsl_get_base_type(glsl_without_array_or_matrix(out_type)));
components[c] = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, &member, 1);
}
if (so_output.num_components > 1)
src = spirv_builder_emit_composite_construct(&ctx->builder, type, components, so_output.num_components);
else
src = components[0];
out_type = glsl_vector_type(GLSL_TYPE_UINT, so_output.num_components);
output_type = type;
}
}
assert(!glsl_type_is_struct_or_ifc(out_type));
/* if we're emitting a scalar or the type we're emitting matches the output's original type and we're
* emitting the same number of components, then we can skip any sort of conversion here
*/
@ -1404,8 +1510,10 @@ emit_so_outputs(struct ntv_context *ctx,
result = spirv_builder_emit_vector_shuffle(&ctx->builder, type,
src, src,
components, so_output.num_components);
result = emit_bitcast(ctx, type, result);
} else {
assert(glsl_type_is_array_or_matrix(out_type));
ASSERTED const struct glsl_type *bare_type = glsl_without_array(out_type);
assert(!glsl_type_is_struct_or_ifc(bare_type));
/* for arrays, we need to manually extract each desired member
* and re-pack them into the desired output type
*/
@ -1427,8 +1535,34 @@ emit_so_outputs(struct ntv_context *ctx,
}
}
result = emit_bitcast(ctx, type, result);
spirv_builder_emit_store(&ctx->builder, so_output_var_id, result);
}
u_foreach_bit(bit, so_info->so_propagate) {
unsigned slot = bit + VARYING_SLOT_VAR0;
nir_variable *var = find_propagate_var(ctx->nir, slot);
assert(var);
const struct glsl_type *bare_type = glsl_without_array(var->type);
SpvId base_type = get_glsl_type(ctx, bare_type);
SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
SpvStorageClassOutput,
base_type);
SpvId output = ctx->outputs[slot << 2];
assert(output);
for (unsigned i = 0; i < glsl_array_size(var->type); i++) {
uint32_t so_key = (uint32_t) (slot + i) << 2;
struct hash_entry *he = _mesa_hash_table_search(ctx->so_outputs, &so_key);
assert(he);
SpvId so_output_var_id = (SpvId)(intptr_t)he->data;
SpvId idx = emit_uint_const(ctx, 32, i);
SpvId deref = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, output, &idx, 1);
SpvId load = spirv_builder_emit_load(&ctx->builder, base_type, deref);
spirv_builder_emit_store(&ctx->builder, so_output_var_id, load);
}
}
}
static SpvId

View file

@ -730,6 +730,9 @@ update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_
var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
var->data.offset = output->dst_offset * 4;
var->data.stream = output->stream;
/* GLSL specifies that interface blocks are split per-buffer in XFB */
if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type)))
zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0);
/* mark all slot components inlined to skip subsequent loop iterations */
for (unsigned j = 0; j < num_slots; j++) {
slot = var->data.location + j;
@ -745,7 +748,7 @@ out:
/* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index];
}
zs->sinfo.have_xfb = !!zs->sinfo.so_info.num_outputs;
zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate;
/* ensure this doesn't get output in the shader by unsetting location */
if (have_fake_psiz && psiz)
update_psiz_location(zs->nir, psiz);

View file

@ -55,6 +55,7 @@ struct tgsi_token;
struct zink_shader_info {
struct pipe_stream_output_info so_info;
unsigned so_info_slots[PIPE_MAX_SO_OUTPUTS];
uint32_t so_propagate; //left shifted by 32
bool last_vertex;
bool have_xfb;
bool have_sparse;