zink: remove gfx program slot mapping

if shaders aren't being shared between programs, they can instead directly
assign io between the shader stages such that only the producer and consumer
pair need to have their slots match up

this requires that each gfx program struct stores base nir for a given shader
in order to avoid breaking io for other programs which reuse shader states

Reviewed-by: Dave Airlie <airlied@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11691>
This commit is contained in:
Mike Blumenkrantz 2021-05-14 13:29:48 -04:00 committed by Marge Bot
parent a8448a91e6
commit 61f2667cf5
4 changed files with 170 additions and 138 deletions

View file

@ -524,69 +524,155 @@ update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_
}
static void
assign_io_locations(nir_shader *nir, unsigned char *shader_slot_map,
unsigned char *shader_slots_reserved)
assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
{
unsigned reserved = shader_slots_reserved ? *shader_slots_reserved : 0;
nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
if ((nir->info.stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in) ||
(nir->info.stage == MESA_SHADER_FRAGMENT && var->data.mode == nir_var_shader_out))
continue;
unsigned slot = var->data.location;
switch (var->data.location) {
case VARYING_SLOT_POS:
case VARYING_SLOT_PNTC:
case VARYING_SLOT_PSIZ:
case VARYING_SLOT_LAYER:
case VARYING_SLOT_PRIMITIVE_ID:
case VARYING_SLOT_CLIP_DIST0:
case VARYING_SLOT_CULL_DIST0:
case VARYING_SLOT_VIEWPORT:
case VARYING_SLOT_FACE:
case VARYING_SLOT_TESS_LEVEL_OUTER:
case VARYING_SLOT_TESS_LEVEL_INNER:
/* use a sentinel value to avoid counting later */
var->data.driver_location = UINT_MAX;
break;
unsigned slot = var->data.location;
switch (var->data.location) {
case VARYING_SLOT_POS:
case VARYING_SLOT_PNTC:
case VARYING_SLOT_PSIZ:
case VARYING_SLOT_LAYER:
case VARYING_SLOT_PRIMITIVE_ID:
case VARYING_SLOT_CLIP_DIST0:
case VARYING_SLOT_CULL_DIST0:
case VARYING_SLOT_VIEWPORT:
case VARYING_SLOT_FACE:
case VARYING_SLOT_TESS_LEVEL_OUTER:
case VARYING_SLOT_TESS_LEVEL_INNER:
/* use a sentinel value to avoid counting later */
var->data.driver_location = UINT_MAX;
break;
default:
if (var->data.patch) {
assert(var->data.location >= VARYING_SLOT_PATCH0);
slot = var->data.location - VARYING_SLOT_PATCH0;
} else if (var->data.location >= VARYING_SLOT_VAR0 &&
((var->data.mode == nir_var_shader_out &&
nir->info.stage == MESA_SHADER_TESS_CTRL) ||
(var->data.mode != nir_var_shader_out &&
nir->info.stage == MESA_SHADER_TESS_EVAL))) {
slot = var->data.location - VARYING_SLOT_VAR0;
} else {
if (shader_slot_map[var->data.location] == 0xff) {
assert(reserved < MAX_VARYING);
shader_slot_map[var->data.location] = reserved;
if (nir->info.stage == MESA_SHADER_TESS_CTRL && var->data.location >= VARYING_SLOT_VAR0)
reserved += (glsl_count_vec4_slots(var->type, false, false) / 32 /*MAX_PATCH_VERTICES*/);
else
reserved += glsl_count_vec4_slots(var->type, false, false);
}
slot = shader_slot_map[var->data.location];
assert(slot < MAX_VARYING);
default:
if (var->data.patch) {
assert(var->data.location >= VARYING_SLOT_PATCH0);
slot = var->data.location - VARYING_SLOT_PATCH0;
} else if (var->data.location >= VARYING_SLOT_VAR0 &&
var->data.mode == nir_var_shader_in &&
stage == MESA_SHADER_TESS_EVAL) {
slot = var->data.location - VARYING_SLOT_VAR0;
} else {
if (slot_map[var->data.location] == 0xff) {
assert(*reserved < MAX_VARYING);
slot_map[var->data.location] = *reserved;
*reserved += glsl_count_vec4_slots(var->type, false, false);
}
var->data.driver_location = slot;
slot = slot_map[var->data.location];
assert(slot < MAX_VARYING);
}
var->data.driver_location = slot;
}
}
ALWAYS_INLINE static bool
is_texcoord(gl_shader_stage stage, const nir_variable *var)
{
if (stage != MESA_SHADER_FRAGMENT)
return false;
return var->data.location >= VARYING_SLOT_TEX0 &&
var->data.location <= VARYING_SLOT_TEX7;
}
static bool
assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
{
switch (var->data.location) {
case VARYING_SLOT_POS:
case VARYING_SLOT_PNTC:
case VARYING_SLOT_PSIZ:
case VARYING_SLOT_LAYER:
case VARYING_SLOT_PRIMITIVE_ID:
case VARYING_SLOT_CLIP_DIST0:
case VARYING_SLOT_CULL_DIST0:
case VARYING_SLOT_VIEWPORT:
case VARYING_SLOT_FACE:
case VARYING_SLOT_TESS_LEVEL_OUTER:
case VARYING_SLOT_TESS_LEVEL_INNER:
/* use a sentinel value to avoid counting later */
var->data.driver_location = UINT_MAX;
break;
default:
if (var->data.patch) {
assert(var->data.location >= VARYING_SLOT_PATCH0);
var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
} else if (var->data.location >= VARYING_SLOT_VAR0 &&
stage == MESA_SHADER_TESS_CTRL &&
var->data.mode == nir_var_shader_out)
var->data.driver_location = var->data.location - VARYING_SLOT_VAR0;
else {
if (slot_map[var->data.location] == (unsigned char)-1) {
if (!is_texcoord(stage, var))
/* dead io */
return false;
/* texcoords can't be eliminated in fs due to GL_COORD_REPLACE */
slot_map[var->data.location] = (*reserved)++;
}
var->data.driver_location = slot_map[var->data.location];
}
}
return true;
}
if (shader_slots_reserved)
*shader_slots_reserved = reserved;
static bool
rewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data)
{
nir_variable *var = data;
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_load_deref)
return false;
nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
if (deref_var != var)
return false;
nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef);
return true;
}
void
zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer)
{
unsigned reserved = 0;
unsigned char slot_map[VARYING_SLOT_MAX];
memset(slot_map, -1, sizeof(slot_map));
bool do_fixup = false;
nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
/* never assign from tcs -> tes, always invert */
nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
/* this is an output, nothing more needs to be done for it to be dropped */
do_fixup = true;
}
} else {
nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
do_fixup = true;
/* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */
nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var);
}
}
}
if (!do_fixup)
return;
nir_fixup_deref_modes(nir);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
optimize_nir(nir);
}
VkShaderModule
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, struct zink_shader_key *key,
unsigned char *shader_slot_map, unsigned char *shader_slots_reserved)
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, struct zink_shader_key *key)
{
VkShaderModule mod = VK_NULL_HANDLE;
void *streamout = NULL;
nir_shader *nir = nir_shader_clone(NULL, zs->nir);
nir_shader *nir = nir_shader_clone(NULL, base_nir);
if (key) {
if (key->inline_uniforms) {
@ -640,8 +726,6 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, struct z
}
NIR_PASS_V(nir, nir_convert_from_ssa, true);
assign_io_locations(nir, shader_slot_map, shader_slots_reserved);
struct spirv_shader *spirv = nir_to_spirv(nir, streamout, screen->spirv_version);
if (!spirv)
goto done;

View file

@ -93,10 +93,10 @@ struct zink_shader {
void
zink_screen_init_compiler(struct zink_screen *screen);
void
zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer);
VkShaderModule
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, struct zink_shader_key *key,
unsigned char *shader_slot_map, unsigned char *shader_slots_reserved);
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, struct zink_shader_key *key);
struct zink_shader *
zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,

View file

@ -249,8 +249,7 @@ get_shader_module_for_stage(struct zink_context *ctx, struct zink_shader *zs, st
return NULL;
}
pipe_reference_init(&zm->reference, 1);
mod = zink_shader_compile(zink_screen(ctx->base.screen), zs, &key,
prog->shader_slot_map, &prog->shader_slots_reserved);
mod = zink_shader_compile(zink_screen(ctx->base.screen), zs, prog->nir[stage], &key);
if (!mod) {
ralloc_free(keybox);
FREE(zm);
@ -370,82 +369,6 @@ equals_gfx_pipeline_state(const void *a, const void *b)
!memcmp(a, b, offsetof(struct zink_gfx_pipeline_state, hash));
}
static void
init_slot_map(struct zink_context *ctx, struct zink_gfx_program *prog)
{
unsigned existing_shaders = 0;
bool needs_new_map = false;
/* if there's a case where we'll be reusing any shaders, we need to (maybe) reuse the slot map too */
if (ctx->curr_program) {
for (int i = 0; i < ZINK_SHADER_COUNT; ++i) {
if (ctx->curr_program->shaders[i])
existing_shaders |= 1 << i;
}
/* if there's reserved slots, check whether we have enough remaining slots */
if (ctx->curr_program->shader_slots_reserved) {
uint64_t max_outputs = 0;
uint32_t num_xfb_outputs = 0;
for (int i = 0; i < ZINK_SHADER_COUNT; ++i) {
if (i != PIPE_SHADER_TESS_CTRL &&
i != PIPE_SHADER_FRAGMENT &&
ctx->gfx_stages[i]) {
uint32_t user_outputs = ctx->gfx_stages[i]->nir->info.outputs_written >> 32;
uint32_t builtin_outputs = ctx->gfx_stages[i]->nir->info.outputs_written;
num_xfb_outputs = MAX2(num_xfb_outputs, ctx->gfx_stages[i]->streamout.so_info.num_outputs);
unsigned user_outputs_count = 0;
/* check builtins first */
u_foreach_bit(slot, builtin_outputs) {
switch (slot) {
/* none of these require slot map entries */
case VARYING_SLOT_POS:
case VARYING_SLOT_PSIZ:
case VARYING_SLOT_LAYER:
case VARYING_SLOT_PRIMITIVE_ID:
case VARYING_SLOT_CULL_DIST0:
case VARYING_SLOT_CLIP_DIST0:
case VARYING_SLOT_VIEWPORT:
case VARYING_SLOT_TESS_LEVEL_INNER:
case VARYING_SLOT_TESS_LEVEL_OUTER:
break;
default:
/* remaining legacy builtins only require 1 slot each */
if (ctx->curr_program->shader_slot_map[slot] == -1)
user_outputs_count++;
break;
}
}
u_foreach_bit(slot, user_outputs) {
if (ctx->curr_program->shader_slot_map[slot] == -1) {
/* user variables can span multiple slots */
nir_variable *var = nir_find_variable_with_location(ctx->gfx_stages[i]->nir,
nir_var_shader_out, slot);
assert(var);
if (i == PIPE_SHADER_TESS_CTRL && var->data.location >= VARYING_SLOT_VAR0)
user_outputs_count += (glsl_count_vec4_slots(var->type, false, false) / 32 /*MAX_PATCH_VERTICES*/);
else
user_outputs_count += glsl_count_vec4_slots(var->type, false, false);
}
}
max_outputs = MAX2(max_outputs, user_outputs_count);
}
}
/* slot map can only hold 32 entries, so dump this one if we'll exceed that */
if (ctx->curr_program->shader_slots_reserved + max_outputs + num_xfb_outputs > 32)
needs_new_map = true;
}
}
if (needs_new_map || ctx->dirty_shader_stages == existing_shaders || !existing_shaders) {
/* all shaders are being recompiled: new slot map */
memset(prog->shader_slot_map, -1, sizeof(prog->shader_slot_map));
} else {
/* at least some shaders are being reused: use existing slot map so locations match up */
memcpy(prog->shader_slot_map, ctx->curr_program->shader_slot_map, sizeof(prog->shader_slot_map));
prog->shader_slots_reserved = ctx->curr_program->shader_slots_reserved;
}
}
void
zink_update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog)
{
@ -489,6 +412,32 @@ zink_pipeline_layout_create(struct zink_screen *screen, struct zink_program *pg)
return layout;
}
static void
assign_io(struct zink_gfx_program *prog, struct zink_shader *stages[ZINK_SHADER_COUNT])
{
struct zink_shader *shaders[PIPE_SHADER_TYPES];
/* build array in pipeline order */
for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++)
shaders[tgsi_processor_to_shader_stage(i)] = stages[i];
for (unsigned i = 0; i < MESA_SHADER_FRAGMENT;) {
nir_shader *producer = shaders[i]->nir;
for (unsigned j = i + 1; j < ZINK_SHADER_COUNT; i++, j++) {
struct zink_shader *consumer = shaders[j];
if (!consumer)
continue;
if (!prog->nir[producer->info.stage])
prog->nir[producer->info.stage] = nir_shader_clone(prog, producer);
if (!prog->nir[j])
prog->nir[j] = nir_shader_clone(prog, consumer->nir);
zink_compiler_assign_io(prog->nir[producer->info.stage], prog->nir[j]);
i = j;
break;
}
}
}
struct zink_gfx_program *
zink_create_gfx_program(struct zink_context *ctx,
struct zink_shader *stages[ZINK_SHADER_COUNT])
@ -513,7 +462,7 @@ zink_create_gfx_program(struct zink_context *ctx,
ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_TESS_CTRL);
}
init_slot_map(ctx, prog);
assign_io(prog, prog->shaders);
update_shader_modules(ctx, prog->shaders, prog, false);
@ -604,7 +553,7 @@ zink_create_compute_program(struct zink_context *ctx, struct zink_shader *shader
comp->module = CALLOC_STRUCT(zink_shader_module);
assert(comp->module);
pipe_reference_init(&comp->module->reference, 1);
comp->module->shader = zink_shader_compile(screen, shader, NULL, NULL, NULL);
comp->module->shader = zink_shader_compile(screen, shader, shader->nir, NULL);
assert(comp->module->shader);
_mesa_hash_table_insert(&comp->base.shader_cache[0], &shader->shader_id, comp->module);

View file

@ -90,13 +90,12 @@ struct zink_program {
struct zink_gfx_program {
struct zink_program base;
struct nir_shader *nir[ZINK_SHADER_COUNT];
struct zink_shader_module *modules[ZINK_SHADER_COUNT]; // compute stage doesn't belong here
struct zink_shader_module *default_variants[ZINK_SHADER_COUNT][2]; //[default, no streamout]
const void *default_variant_key[ZINK_SHADER_COUNT];
struct zink_shader *shaders[ZINK_SHADER_COUNT];
unsigned char shader_slot_map[VARYING_SLOT_MAX];
unsigned char shader_slots_reserved;
struct hash_table *pipelines[11]; // number of draw modes we support
};