mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-11 08:00:13 +01:00
zink: remove gfx program slot mapping
if shaders aren't being shared between programs, they can instead directly assign io between the shader stages such that only the producer and consumer pair need to have their slots match up this requires that each gfx program struct stores base nir for a given shader in order to avoid breaking io for other programs which reuse shader states Reviewed-by: Dave Airlie <airlied@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11691>
This commit is contained in:
parent
a8448a91e6
commit
61f2667cf5
4 changed files with 170 additions and 138 deletions
|
|
@ -524,69 +524,155 @@ update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_
|
|||
}
|
||||
|
||||
static void
|
||||
assign_io_locations(nir_shader *nir, unsigned char *shader_slot_map,
|
||||
unsigned char *shader_slots_reserved)
|
||||
assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
|
||||
{
|
||||
unsigned reserved = shader_slots_reserved ? *shader_slots_reserved : 0;
|
||||
nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
|
||||
if ((nir->info.stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in) ||
|
||||
(nir->info.stage == MESA_SHADER_FRAGMENT && var->data.mode == nir_var_shader_out))
|
||||
continue;
|
||||
unsigned slot = var->data.location;
|
||||
switch (var->data.location) {
|
||||
case VARYING_SLOT_POS:
|
||||
case VARYING_SLOT_PNTC:
|
||||
case VARYING_SLOT_PSIZ:
|
||||
case VARYING_SLOT_LAYER:
|
||||
case VARYING_SLOT_PRIMITIVE_ID:
|
||||
case VARYING_SLOT_CLIP_DIST0:
|
||||
case VARYING_SLOT_CULL_DIST0:
|
||||
case VARYING_SLOT_VIEWPORT:
|
||||
case VARYING_SLOT_FACE:
|
||||
case VARYING_SLOT_TESS_LEVEL_OUTER:
|
||||
case VARYING_SLOT_TESS_LEVEL_INNER:
|
||||
/* use a sentinel value to avoid counting later */
|
||||
var->data.driver_location = UINT_MAX;
|
||||
break;
|
||||
|
||||
unsigned slot = var->data.location;
|
||||
switch (var->data.location) {
|
||||
case VARYING_SLOT_POS:
|
||||
case VARYING_SLOT_PNTC:
|
||||
case VARYING_SLOT_PSIZ:
|
||||
case VARYING_SLOT_LAYER:
|
||||
case VARYING_SLOT_PRIMITIVE_ID:
|
||||
case VARYING_SLOT_CLIP_DIST0:
|
||||
case VARYING_SLOT_CULL_DIST0:
|
||||
case VARYING_SLOT_VIEWPORT:
|
||||
case VARYING_SLOT_FACE:
|
||||
case VARYING_SLOT_TESS_LEVEL_OUTER:
|
||||
case VARYING_SLOT_TESS_LEVEL_INNER:
|
||||
/* use a sentinel value to avoid counting later */
|
||||
var->data.driver_location = UINT_MAX;
|
||||
break;
|
||||
|
||||
default:
|
||||
if (var->data.patch) {
|
||||
assert(var->data.location >= VARYING_SLOT_PATCH0);
|
||||
slot = var->data.location - VARYING_SLOT_PATCH0;
|
||||
} else if (var->data.location >= VARYING_SLOT_VAR0 &&
|
||||
((var->data.mode == nir_var_shader_out &&
|
||||
nir->info.stage == MESA_SHADER_TESS_CTRL) ||
|
||||
(var->data.mode != nir_var_shader_out &&
|
||||
nir->info.stage == MESA_SHADER_TESS_EVAL))) {
|
||||
slot = var->data.location - VARYING_SLOT_VAR0;
|
||||
} else {
|
||||
if (shader_slot_map[var->data.location] == 0xff) {
|
||||
assert(reserved < MAX_VARYING);
|
||||
shader_slot_map[var->data.location] = reserved;
|
||||
if (nir->info.stage == MESA_SHADER_TESS_CTRL && var->data.location >= VARYING_SLOT_VAR0)
|
||||
reserved += (glsl_count_vec4_slots(var->type, false, false) / 32 /*MAX_PATCH_VERTICES*/);
|
||||
else
|
||||
reserved += glsl_count_vec4_slots(var->type, false, false);
|
||||
}
|
||||
slot = shader_slot_map[var->data.location];
|
||||
assert(slot < MAX_VARYING);
|
||||
default:
|
||||
if (var->data.patch) {
|
||||
assert(var->data.location >= VARYING_SLOT_PATCH0);
|
||||
slot = var->data.location - VARYING_SLOT_PATCH0;
|
||||
} else if (var->data.location >= VARYING_SLOT_VAR0 &&
|
||||
var->data.mode == nir_var_shader_in &&
|
||||
stage == MESA_SHADER_TESS_EVAL) {
|
||||
slot = var->data.location - VARYING_SLOT_VAR0;
|
||||
} else {
|
||||
if (slot_map[var->data.location] == 0xff) {
|
||||
assert(*reserved < MAX_VARYING);
|
||||
slot_map[var->data.location] = *reserved;
|
||||
*reserved += glsl_count_vec4_slots(var->type, false, false);
|
||||
}
|
||||
var->data.driver_location = slot;
|
||||
slot = slot_map[var->data.location];
|
||||
assert(slot < MAX_VARYING);
|
||||
}
|
||||
var->data.driver_location = slot;
|
||||
}
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static bool
|
||||
is_texcoord(gl_shader_stage stage, const nir_variable *var)
|
||||
{
|
||||
if (stage != MESA_SHADER_FRAGMENT)
|
||||
return false;
|
||||
return var->data.location >= VARYING_SLOT_TEX0 &&
|
||||
var->data.location <= VARYING_SLOT_TEX7;
|
||||
}
|
||||
|
||||
static bool
|
||||
assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
|
||||
{
|
||||
switch (var->data.location) {
|
||||
case VARYING_SLOT_POS:
|
||||
case VARYING_SLOT_PNTC:
|
||||
case VARYING_SLOT_PSIZ:
|
||||
case VARYING_SLOT_LAYER:
|
||||
case VARYING_SLOT_PRIMITIVE_ID:
|
||||
case VARYING_SLOT_CLIP_DIST0:
|
||||
case VARYING_SLOT_CULL_DIST0:
|
||||
case VARYING_SLOT_VIEWPORT:
|
||||
case VARYING_SLOT_FACE:
|
||||
case VARYING_SLOT_TESS_LEVEL_OUTER:
|
||||
case VARYING_SLOT_TESS_LEVEL_INNER:
|
||||
/* use a sentinel value to avoid counting later */
|
||||
var->data.driver_location = UINT_MAX;
|
||||
break;
|
||||
default:
|
||||
if (var->data.patch) {
|
||||
assert(var->data.location >= VARYING_SLOT_PATCH0);
|
||||
var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
|
||||
} else if (var->data.location >= VARYING_SLOT_VAR0 &&
|
||||
stage == MESA_SHADER_TESS_CTRL &&
|
||||
var->data.mode == nir_var_shader_out)
|
||||
var->data.driver_location = var->data.location - VARYING_SLOT_VAR0;
|
||||
else {
|
||||
if (slot_map[var->data.location] == (unsigned char)-1) {
|
||||
if (!is_texcoord(stage, var))
|
||||
/* dead io */
|
||||
return false;
|
||||
/* texcoords can't be eliminated in fs due to GL_COORD_REPLACE */
|
||||
slot_map[var->data.location] = (*reserved)++;
|
||||
}
|
||||
var->data.driver_location = slot_map[var->data.location];
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (shader_slots_reserved)
|
||||
*shader_slots_reserved = reserved;
|
||||
|
||||
static bool
|
||||
rewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
nir_variable *var = data;
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic != nir_intrinsic_load_deref)
|
||||
return false;
|
||||
nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
|
||||
if (deref_var != var)
|
||||
return false;
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer)
|
||||
{
|
||||
unsigned reserved = 0;
|
||||
unsigned char slot_map[VARYING_SLOT_MAX];
|
||||
memset(slot_map, -1, sizeof(slot_map));
|
||||
bool do_fixup = false;
|
||||
nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
|
||||
if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
/* never assign from tcs -> tes, always invert */
|
||||
nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
|
||||
assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
|
||||
nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
|
||||
if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
|
||||
/* this is an output, nothing more needs to be done for it to be dropped */
|
||||
do_fixup = true;
|
||||
}
|
||||
} else {
|
||||
nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
|
||||
assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
|
||||
nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
|
||||
if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
|
||||
do_fixup = true;
|
||||
/* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */
|
||||
nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!do_fixup)
|
||||
return;
|
||||
nir_fixup_deref_modes(nir);
|
||||
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
|
||||
optimize_nir(nir);
|
||||
}
|
||||
|
||||
VkShaderModule
|
||||
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, struct zink_shader_key *key,
|
||||
unsigned char *shader_slot_map, unsigned char *shader_slots_reserved)
|
||||
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, struct zink_shader_key *key)
|
||||
{
|
||||
VkShaderModule mod = VK_NULL_HANDLE;
|
||||
void *streamout = NULL;
|
||||
nir_shader *nir = nir_shader_clone(NULL, zs->nir);
|
||||
nir_shader *nir = nir_shader_clone(NULL, base_nir);
|
||||
|
||||
if (key) {
|
||||
if (key->inline_uniforms) {
|
||||
|
|
@ -640,8 +726,6 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, struct z
|
|||
}
|
||||
NIR_PASS_V(nir, nir_convert_from_ssa, true);
|
||||
|
||||
assign_io_locations(nir, shader_slot_map, shader_slots_reserved);
|
||||
|
||||
struct spirv_shader *spirv = nir_to_spirv(nir, streamout, screen->spirv_version);
|
||||
if (!spirv)
|
||||
goto done;
|
||||
|
|
|
|||
|
|
@ -93,10 +93,10 @@ struct zink_shader {
|
|||
|
||||
void
|
||||
zink_screen_init_compiler(struct zink_screen *screen);
|
||||
|
||||
void
|
||||
zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer);
|
||||
VkShaderModule
|
||||
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, struct zink_shader_key *key,
|
||||
unsigned char *shader_slot_map, unsigned char *shader_slots_reserved);
|
||||
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, struct zink_shader_key *key);
|
||||
|
||||
struct zink_shader *
|
||||
zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
|
||||
|
|
|
|||
|
|
@ -249,8 +249,7 @@ get_shader_module_for_stage(struct zink_context *ctx, struct zink_shader *zs, st
|
|||
return NULL;
|
||||
}
|
||||
pipe_reference_init(&zm->reference, 1);
|
||||
mod = zink_shader_compile(zink_screen(ctx->base.screen), zs, &key,
|
||||
prog->shader_slot_map, &prog->shader_slots_reserved);
|
||||
mod = zink_shader_compile(zink_screen(ctx->base.screen), zs, prog->nir[stage], &key);
|
||||
if (!mod) {
|
||||
ralloc_free(keybox);
|
||||
FREE(zm);
|
||||
|
|
@ -370,82 +369,6 @@ equals_gfx_pipeline_state(const void *a, const void *b)
|
|||
!memcmp(a, b, offsetof(struct zink_gfx_pipeline_state, hash));
|
||||
}
|
||||
|
||||
static void
|
||||
init_slot_map(struct zink_context *ctx, struct zink_gfx_program *prog)
|
||||
{
|
||||
unsigned existing_shaders = 0;
|
||||
bool needs_new_map = false;
|
||||
|
||||
/* if there's a case where we'll be reusing any shaders, we need to (maybe) reuse the slot map too */
|
||||
if (ctx->curr_program) {
|
||||
for (int i = 0; i < ZINK_SHADER_COUNT; ++i) {
|
||||
if (ctx->curr_program->shaders[i])
|
||||
existing_shaders |= 1 << i;
|
||||
}
|
||||
/* if there's reserved slots, check whether we have enough remaining slots */
|
||||
if (ctx->curr_program->shader_slots_reserved) {
|
||||
uint64_t max_outputs = 0;
|
||||
uint32_t num_xfb_outputs = 0;
|
||||
for (int i = 0; i < ZINK_SHADER_COUNT; ++i) {
|
||||
if (i != PIPE_SHADER_TESS_CTRL &&
|
||||
i != PIPE_SHADER_FRAGMENT &&
|
||||
ctx->gfx_stages[i]) {
|
||||
uint32_t user_outputs = ctx->gfx_stages[i]->nir->info.outputs_written >> 32;
|
||||
uint32_t builtin_outputs = ctx->gfx_stages[i]->nir->info.outputs_written;
|
||||
num_xfb_outputs = MAX2(num_xfb_outputs, ctx->gfx_stages[i]->streamout.so_info.num_outputs);
|
||||
unsigned user_outputs_count = 0;
|
||||
/* check builtins first */
|
||||
u_foreach_bit(slot, builtin_outputs) {
|
||||
switch (slot) {
|
||||
/* none of these require slot map entries */
|
||||
case VARYING_SLOT_POS:
|
||||
case VARYING_SLOT_PSIZ:
|
||||
case VARYING_SLOT_LAYER:
|
||||
case VARYING_SLOT_PRIMITIVE_ID:
|
||||
case VARYING_SLOT_CULL_DIST0:
|
||||
case VARYING_SLOT_CLIP_DIST0:
|
||||
case VARYING_SLOT_VIEWPORT:
|
||||
case VARYING_SLOT_TESS_LEVEL_INNER:
|
||||
case VARYING_SLOT_TESS_LEVEL_OUTER:
|
||||
break;
|
||||
default:
|
||||
/* remaining legacy builtins only require 1 slot each */
|
||||
if (ctx->curr_program->shader_slot_map[slot] == -1)
|
||||
user_outputs_count++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
u_foreach_bit(slot, user_outputs) {
|
||||
if (ctx->curr_program->shader_slot_map[slot] == -1) {
|
||||
/* user variables can span multiple slots */
|
||||
nir_variable *var = nir_find_variable_with_location(ctx->gfx_stages[i]->nir,
|
||||
nir_var_shader_out, slot);
|
||||
assert(var);
|
||||
if (i == PIPE_SHADER_TESS_CTRL && var->data.location >= VARYING_SLOT_VAR0)
|
||||
user_outputs_count += (glsl_count_vec4_slots(var->type, false, false) / 32 /*MAX_PATCH_VERTICES*/);
|
||||
else
|
||||
user_outputs_count += glsl_count_vec4_slots(var->type, false, false);
|
||||
}
|
||||
}
|
||||
max_outputs = MAX2(max_outputs, user_outputs_count);
|
||||
}
|
||||
}
|
||||
/* slot map can only hold 32 entries, so dump this one if we'll exceed that */
|
||||
if (ctx->curr_program->shader_slots_reserved + max_outputs + num_xfb_outputs > 32)
|
||||
needs_new_map = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (needs_new_map || ctx->dirty_shader_stages == existing_shaders || !existing_shaders) {
|
||||
/* all shaders are being recompiled: new slot map */
|
||||
memset(prog->shader_slot_map, -1, sizeof(prog->shader_slot_map));
|
||||
} else {
|
||||
/* at least some shaders are being reused: use existing slot map so locations match up */
|
||||
memcpy(prog->shader_slot_map, ctx->curr_program->shader_slot_map, sizeof(prog->shader_slot_map));
|
||||
prog->shader_slots_reserved = ctx->curr_program->shader_slots_reserved;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
zink_update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog)
|
||||
{
|
||||
|
|
@ -489,6 +412,32 @@ zink_pipeline_layout_create(struct zink_screen *screen, struct zink_program *pg)
|
|||
return layout;
|
||||
}
|
||||
|
||||
static void
|
||||
assign_io(struct zink_gfx_program *prog, struct zink_shader *stages[ZINK_SHADER_COUNT])
|
||||
{
|
||||
struct zink_shader *shaders[PIPE_SHADER_TYPES];
|
||||
|
||||
/* build array in pipeline order */
|
||||
for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++)
|
||||
shaders[tgsi_processor_to_shader_stage(i)] = stages[i];
|
||||
|
||||
for (unsigned i = 0; i < MESA_SHADER_FRAGMENT;) {
|
||||
nir_shader *producer = shaders[i]->nir;
|
||||
for (unsigned j = i + 1; j < ZINK_SHADER_COUNT; i++, j++) {
|
||||
struct zink_shader *consumer = shaders[j];
|
||||
if (!consumer)
|
||||
continue;
|
||||
if (!prog->nir[producer->info.stage])
|
||||
prog->nir[producer->info.stage] = nir_shader_clone(prog, producer);
|
||||
if (!prog->nir[j])
|
||||
prog->nir[j] = nir_shader_clone(prog, consumer->nir);
|
||||
zink_compiler_assign_io(prog->nir[producer->info.stage], prog->nir[j]);
|
||||
i = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct zink_gfx_program *
|
||||
zink_create_gfx_program(struct zink_context *ctx,
|
||||
struct zink_shader *stages[ZINK_SHADER_COUNT])
|
||||
|
|
@ -513,7 +462,7 @@ zink_create_gfx_program(struct zink_context *ctx,
|
|||
ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_TESS_CTRL);
|
||||
}
|
||||
|
||||
init_slot_map(ctx, prog);
|
||||
assign_io(prog, prog->shaders);
|
||||
|
||||
update_shader_modules(ctx, prog->shaders, prog, false);
|
||||
|
||||
|
|
@ -604,7 +553,7 @@ zink_create_compute_program(struct zink_context *ctx, struct zink_shader *shader
|
|||
comp->module = CALLOC_STRUCT(zink_shader_module);
|
||||
assert(comp->module);
|
||||
pipe_reference_init(&comp->module->reference, 1);
|
||||
comp->module->shader = zink_shader_compile(screen, shader, NULL, NULL, NULL);
|
||||
comp->module->shader = zink_shader_compile(screen, shader, shader->nir, NULL);
|
||||
assert(comp->module->shader);
|
||||
_mesa_hash_table_insert(&comp->base.shader_cache[0], &shader->shader_id, comp->module);
|
||||
|
||||
|
|
|
|||
|
|
@ -90,13 +90,12 @@ struct zink_program {
|
|||
struct zink_gfx_program {
|
||||
struct zink_program base;
|
||||
|
||||
struct nir_shader *nir[ZINK_SHADER_COUNT];
|
||||
struct zink_shader_module *modules[ZINK_SHADER_COUNT]; // compute stage doesn't belong here
|
||||
|
||||
struct zink_shader_module *default_variants[ZINK_SHADER_COUNT][2]; //[default, no streamout]
|
||||
const void *default_variant_key[ZINK_SHADER_COUNT];
|
||||
struct zink_shader *shaders[ZINK_SHADER_COUNT];
|
||||
unsigned char shader_slot_map[VARYING_SLOT_MAX];
|
||||
unsigned char shader_slots_reserved;
|
||||
struct hash_table *pipelines[11]; // number of draw modes we support
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue