diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index bd9655e319d..b00ad603700 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -29,65 +29,7 @@ #include "util/u_memory.h" #include "util/hash_table.h" -/* this consistently maps slots to a zero-indexed value to avoid wasting slots */ -static unsigned slot_pack_map[] = { - /* Position is builtin */ - [VARYING_SLOT_POS] = UINT_MAX, - [VARYING_SLOT_COL0] = 0, /* input/output */ - [VARYING_SLOT_COL1] = 1, /* input/output */ - [VARYING_SLOT_FOGC] = 2, /* input/output */ - /* TEX0-7 are deprecated, so we put them at the end of the range and hope nobody uses them all */ - [VARYING_SLOT_TEX0] = VARYING_SLOT_VAR0 - 1, /* input/output */ - [VARYING_SLOT_TEX1] = VARYING_SLOT_VAR0 - 2, - [VARYING_SLOT_TEX2] = VARYING_SLOT_VAR0 - 3, - [VARYING_SLOT_TEX3] = VARYING_SLOT_VAR0 - 4, - [VARYING_SLOT_TEX4] = VARYING_SLOT_VAR0 - 5, - [VARYING_SLOT_TEX5] = VARYING_SLOT_VAR0 - 6, - [VARYING_SLOT_TEX6] = VARYING_SLOT_VAR0 - 7, - [VARYING_SLOT_TEX7] = VARYING_SLOT_VAR0 - 8, - - /* PointSize is builtin */ - [VARYING_SLOT_PSIZ] = UINT_MAX, - - [VARYING_SLOT_BFC0] = 3, /* output only */ - [VARYING_SLOT_BFC1] = 4, /* output only */ - [VARYING_SLOT_EDGE] = 5, /* output only */ - [VARYING_SLOT_CLIP_VERTEX] = 6, /* output only */ - - /* ClipDistance is builtin */ - [VARYING_SLOT_CLIP_DIST0] = UINT_MAX, - [VARYING_SLOT_CLIP_DIST1] = UINT_MAX, - - /* CullDistance is builtin */ - [VARYING_SLOT_CULL_DIST0] = UINT_MAX, /* input/output */ - [VARYING_SLOT_CULL_DIST1] = UINT_MAX, /* never actually used */ - - /* PrimitiveId is builtin */ - [VARYING_SLOT_PRIMITIVE_ID] = UINT_MAX, - - /* Layer is builtin */ - [VARYING_SLOT_LAYER] = UINT_MAX, /* input/output */ - - /* ViewportIndex is builtin */ - [VARYING_SLOT_VIEWPORT] = UINT_MAX, /* input/output */ - - /* FrontFacing is builtin */ - [VARYING_SLOT_FACE] = UINT_MAX, - - /* PointCoord is builtin */ - [VARYING_SLOT_PNTC] = UINT_MAX, /* input only */ - - /* TessLevelOuter is builtin */ - [VARYING_SLOT_TESS_LEVEL_OUTER] = UINT_MAX, - /* TessLevelInner is builtin */ - [VARYING_SLOT_TESS_LEVEL_INNER] = UINT_MAX, - - [VARYING_SLOT_BOUNDING_BOX0] = 7, /* Only appears as TCS output. */ - [VARYING_SLOT_BOUNDING_BOX1] = 8, /* Only appears as TCS output. */ - [VARYING_SLOT_VIEW_INDEX] = 9, /* input/output */ - [VARYING_SLOT_VIEWPORT_MASK] = 10, /* output only */ -}; -#define NTV_MIN_RESERVED_SLOTS 11 +#define SLOT_UNSET ((unsigned char) -1) struct ntv_context { void *mem_ctx; @@ -123,10 +65,10 @@ struct ntv_context { bool block_started; SpvId loop_break, loop_cont; + unsigned char *shader_slot_map; + unsigned char shader_slots_reserved; + SpvId front_face_var, instance_id_var, vertex_id_var; -#ifndef NDEBUG - bool seen_texcoord[8]; //whether we've seen a VARYING_SLOT_TEX[n] this pass -#endif }; static SpvId @@ -295,25 +237,24 @@ get_glsl_type(struct ntv_context *ctx, const struct glsl_type *type) unreachable("we shouldn't get here, I think..."); } +static inline unsigned char +reserve_slot(struct ntv_context *ctx) +{ + /* TODO: this should actually be clamped to the limits value as in the table + * in 14.1.4 of the vulkan spec, though there's not really any recourse + * other than aborting if we do hit it... + */ + assert(ctx->shader_slots_reserved < MAX_VARYING); + return ctx->shader_slots_reserved++; +} + static inline unsigned handle_slot(struct ntv_context *ctx, unsigned slot) { - unsigned orig = slot; - if (slot < VARYING_SLOT_VAR0) { -#ifndef NDEBUG - if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) - ctx->seen_texcoord[slot - VARYING_SLOT_TEX0] = true; -#endif - slot = slot_pack_map[slot]; - if (slot == UINT_MAX) - debug_printf("unhandled varying slot: %s\n", gl_varying_slot_name(orig)); - } else { - slot -= VARYING_SLOT_VAR0 - NTV_MIN_RESERVED_SLOTS; - assert(slot <= VARYING_SLOT_VAR0 - 8 || - !ctx->seen_texcoord[VARYING_SLOT_VAR0 - slot - 1]); - - } - assert(slot < VARYING_SLOT_VAR0); + if (ctx->shader_slot_map[slot] == SLOT_UNSET) + ctx->shader_slot_map[slot] = reserve_slot(ctx); + slot = ctx->shader_slot_map[slot]; + assert(slot < MAX_VARYING); return slot; } @@ -901,8 +842,7 @@ get_output_type(struct ntv_context *ctx, unsigned register_index, unsigned num_c /* for streamout create new outputs, as streamout can be done on individual components, from complete outputs, so we just can't use the created packed outputs */ static void -emit_so_info(struct ntv_context *ctx, unsigned max_output_location, - const struct zink_so_info *so_info) +emit_so_info(struct ntv_context *ctx, const struct zink_so_info *so_info) { for (unsigned i = 0; i < so_info->so_info.num_outputs; i++) { struct pipe_stream_output so_output = so_info->so_info.output[i]; @@ -924,16 +864,9 @@ emit_so_info(struct ntv_context *ctx, unsigned max_output_location, /* output location is incremented by VARYING_SLOT_VAR0 for non-builtins in vtn, * so we need to ensure that the new xfb location slot doesn't conflict with any previously-emitted * outputs. - * - * if there's no previous outputs that take up user slots (VAR0+) then we can start right after the - * glsl builtin reserved slots, otherwise we start just after the adjusted user output slot */ - uint32_t location = NTV_MIN_RESERVED_SLOTS + i; - if (max_output_location >= VARYING_SLOT_VAR0) - location = max_output_location - VARYING_SLOT_VAR0 + 1 + i; + uint32_t location = reserve_slot(ctx); assert(location < VARYING_SLOT_VAR0); - assert(location <= VARYING_SLOT_VAR0 - 8 || - !ctx->seen_texcoord[VARYING_SLOT_VAR0 - location - 1]); spirv_builder_emit_location(&ctx->builder, var_id, location); /* note: gl_ClipDistance[4] can the 0-indexed member of VARYING_SLOT_CLIP_DIST1 here, @@ -2236,7 +2169,8 @@ emit_cf_list(struct ntv_context *ctx, struct exec_list *list) } struct spirv_shader * -nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info) +nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, + unsigned char *shader_slot_map, unsigned char *shader_slots_reserved) { struct spirv_shader *ret = NULL; @@ -2279,6 +2213,8 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info) } ctx.stage = s->info.stage; + ctx.shader_slot_map = shader_slot_map; + ctx.shader_slots_reserved = *shader_slots_reserved; ctx.GLSL_std_450 = spirv_builder_import(&ctx.builder, "GLSL.std.450"); spirv_builder_emit_source(&ctx.builder, SpvSourceLanguageGLSL, 450); @@ -2329,7 +2265,7 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info) if (so_info) - emit_so_info(&ctx, util_last_bit64(s->info.outputs_written), so_info); + emit_so_info(&ctx, so_info); /* we have to reverse iterate to match what's done in zink_compiler.c */ foreach_list_typed_reverse(nir_variable, var, node, &s->variables) if (_nir_shader_variable_has_mode(var, nir_var_uniform | @@ -2421,6 +2357,7 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info) assert(ret->num_words == num_words); ralloc_free(ctx.mem_ctx); + *shader_slots_reserved = ctx.shader_slots_reserved; return ret; diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h index 77d77add4e4..04543c31915 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.h @@ -42,7 +42,8 @@ struct nir_shader; struct pipe_stream_output_info; struct spirv_shader * -nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info); +nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, + unsigned char *shader_slot_map, unsigned char *shader_slots_reserved); void spirv_shader_delete(struct spirv_shader *s); diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 87b9a785cef..ad2e6b0588e 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -217,11 +217,12 @@ update_so_info(struct zink_shader *sh, } VkShaderModule -zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs) +zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, + unsigned char *shader_slot_map, unsigned char *shader_slots_reserved) { VkShaderModule mod = VK_NULL_HANDLE; void *streamout = zs->streamout.so_info_slots ? &zs->streamout : NULL; - struct spirv_shader *spirv = nir_to_spirv(zs->nir, streamout); + struct spirv_shader *spirv = nir_to_spirv(zs->nir, streamout, shader_slot_map, shader_slots_reserved); assert(spirv); if (zink_debug & ZINK_DEBUG_SPIRV) { diff --git a/src/gallium/drivers/zink/zink_compiler.h b/src/gallium/drivers/zink/zink_compiler.h index 810be0163bd..3a473bdeaa9 100644 --- a/src/gallium/drivers/zink/zink_compiler.h +++ b/src/gallium/drivers/zink/zink_compiler.h @@ -72,7 +72,8 @@ struct zink_shader { }; VkShaderModule -zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs); +zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, + unsigned char *shader_slot_map, unsigned char *shader_slots_reserved); struct zink_shader * zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index 0bcaa4a03e7..f7bd9ca95fb 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -146,13 +146,15 @@ update_shader_modules(struct zink_context *ctx, struct zink_shader *stages[ZINK_ unsigned type = u_bit_scan(&dirty_shader_stages); dirty[tgsi_processor_to_shader_stage(type)] = stages[type]; } + for (int i = 0; i < ZINK_SHADER_COUNT; ++i) { enum pipe_shader_type type = pipe_shader_type_from_mesa(i); if (dirty[i]) { prog->modules[type] = CALLOC_STRUCT(zink_shader_module); assert(prog->modules[type]); pipe_reference_init(&prog->modules[type]->reference, 1); - prog->modules[type]->shader = zink_shader_compile(zink_screen(ctx->base.screen), dirty[i]); + prog->modules[type]->shader = zink_shader_compile(zink_screen(ctx->base.screen), dirty[i], + prog->shader_slot_map, &prog->shader_slots_reserved); } else if (stages[type]) /* reuse existing shader module */ zink_shader_module_reference(zink_screen(ctx->base.screen), &prog->modules[type], ctx->curr_program->modules[type]); prog->shaders[type] = stages[type]; @@ -172,6 +174,28 @@ equals_gfx_pipeline_state(const void *a, const void *b) return memcmp(a, b, offsetof(struct zink_gfx_pipeline_state, hash)) == 0; } +static void +init_slot_map(struct zink_context *ctx, struct zink_gfx_program *prog) +{ + unsigned existing_shaders = 0; + + /* if there's a case where we'll be reusing any shaders, we need to reuse the slot map too */ + if (ctx->curr_program) { + for (int i = 0; i < ZINK_SHADER_COUNT; ++i) { + if (ctx->curr_program->shaders[i]) + existing_shaders |= 1 << i; + } + } + if (ctx->dirty_shader_stages == existing_shaders || !existing_shaders) + /* all shaders are being recompiled: new slot map */ + memset(prog->shader_slot_map, -1, sizeof(prog->shader_slot_map)); + else { + /* at least some shaders are being reused: use existing slot map so locations match up */ + memcpy(prog->shader_slot_map, ctx->curr_program->shader_slot_map, sizeof(prog->shader_slot_map)); + prog->shader_slots_reserved = ctx->curr_program->shader_slots_reserved; + } +} + struct zink_gfx_program * zink_create_gfx_program(struct zink_context *ctx, struct zink_shader *stages[ZINK_SHADER_COUNT]) @@ -183,6 +207,8 @@ zink_create_gfx_program(struct zink_context *ctx, pipe_reference_init(&prog->reference, 1); + init_slot_map(ctx, prog); + update_shader_modules(ctx, stages, prog); for (int i = 0; i < ARRAY_SIZE(prog->pipelines); ++i) { diff --git a/src/gallium/drivers/zink/zink_program.h b/src/gallium/drivers/zink/zink_program.h index 5e68783a2f8..82900c92201 100644 --- a/src/gallium/drivers/zink/zink_program.h +++ b/src/gallium/drivers/zink/zink_program.h @@ -26,6 +26,7 @@ #include +#include "compiler/shader_enums.h" #include "pipe/p_state.h" #include "util/u_inlines.h" @@ -48,6 +49,8 @@ struct zink_gfx_program { struct zink_shader_module *modules[ZINK_SHADER_COUNT]; // compute stage doesn't belong here struct zink_shader *shaders[ZINK_SHADER_COUNT]; + unsigned char shader_slot_map[VARYING_SLOT_MAX]; + unsigned char shader_slots_reserved; VkDescriptorSetLayout dsl; VkPipelineLayout layout; unsigned num_descriptors;