zink: plug in the program/module parts of shadow texture mode emulation

this is clunky because of how big the swizzle data block is,
but the gist of it is the data block is stored onto the shader module key
after all the other data, and then it gets manually hashed/compared in
relevant cases

it's gross, but so is this functionality

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20598>
This commit is contained in:
Mike Blumenkrantz 2023-01-05 16:43:08 -05:00 committed by Marge Bot
parent ef233d43f4
commit c46fb43473
3 changed files with 55 additions and 13 deletions

View file

@ -131,7 +131,10 @@ create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *scr
const struct zink_shader_key *key = &state->shader_keys.key[stage];
/* non-generated tcs won't use the shader key */
const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
zm = malloc(sizeof(struct zink_shader_module) + key->size + (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t));
const bool shadow_needs_shader_swizzle = stage == MESA_SHADER_FRAGMENT && key->key.fs.base.shadow_needs_shader_swizzle;
zm = malloc(sizeof(struct zink_shader_module) + key->size +
(!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t) +
(shadow_needs_shader_swizzle ? sizeof(struct zink_fs_shadow_key) : 0));
if (!zm) {
return NULL;
}
@ -166,6 +169,10 @@ create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *scr
zm->hash = patch_vertices;
else
zm->hash = shader_module_hash(zm);
if (unlikely(shadow_needs_shader_swizzle)) {
memcpy(zm->key + key->size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.shadow, sizeof(struct zink_fs_shadow_key));
zm->hash ^= _mesa_hash_data(&ctx->di.shadow, sizeof(struct zink_fs_shadow_key));
}
zm->default_variant = !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
if (inline_size)
prog->inlined_variant_count[stage]++;
@ -185,6 +192,7 @@ get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen
const struct zink_shader_key *key = &state->shader_keys.key[stage];
/* non-generated tcs won't use the shader key */
const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
const bool shadow_needs_shader_swizzle = stage == MESA_SHADER_FRAGMENT && unlikely(key->key.fs.base.shadow_needs_shader_swizzle);
struct util_dynarray *shader_cache = &prog->shader_cache[stage][!has_nonseamless ? !!nonseamless_size : 0][has_inline ? !!inline_size : 0];
unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
@ -199,6 +207,12 @@ get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen
continue;
if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless))
continue;
if (unlikely(shadow_needs_shader_swizzle)) {
/* shadow swizzle data needs a manual compare since it's so fat */
if (memcmp(iter->key + iter->key_size + nonseamless_size + iter->num_uniforms * sizeof(uint32_t),
&ctx->di.shadow, sizeof(struct zink_fs_shadow_key)))
continue;
}
}
if (i > 0) {
struct zink_shader_module *zero = pzm[0];
@ -221,17 +235,19 @@ create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_scr
struct zink_shader_module *zm;
uint16_t *key;
unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
bool shadow_needs_shader_swizzle = false;
if (zs == prog->last_vertex_stage) {
key = (uint16_t*)&state->shader_keys_optimal.key.vs_base;
} else if (stage == MESA_SHADER_FRAGMENT) {
key = (uint16_t*)&state->shader_keys_optimal.key.fs;
shadow_needs_shader_swizzle = ctx ? ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle : false;
} else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
key = (uint16_t*)&state->shader_keys_optimal.key.tcs;
} else {
key = NULL;
}
size_t key_size = sizeof(uint16_t);
zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0));
zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0) + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_fs_shadow_key) : 0));
if (!zm) {
return NULL;
}
@ -254,6 +270,8 @@ create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_scr
uint16_t *data = (uint16_t*)zm->key;
/* sanitize actual key bits */
*data = (*key) & mask;
if (unlikely(shadow_needs_shader_swizzle))
memcpy(&data[1], &ctx->di.shadow, sizeof(struct zink_fs_shadow_key));
}
zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
util_dynarray_append(&prog->shader_cache[stage][0][0], void*, zm);
@ -268,12 +286,14 @@ get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen
{
/* non-generated tcs won't use the shader key */
const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
bool shadow_needs_shader_swizzle = false;
uint16_t *key;
unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
if (zs == prog->last_vertex_stage) {
key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base;
} else if (stage == MESA_SHADER_FRAGMENT) {
key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.fs;
shadow_needs_shader_swizzle = ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle;
} else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs;
} else {
@ -291,6 +311,11 @@ get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen
/* no key is bigger than uint16_t */
if (memcmp(iter->key, &val, sizeof(uint16_t)))
continue;
if (unlikely(shadow_needs_shader_swizzle)) {
/* shadow swizzle data needs a manual compare since it's so fat */
if (memcmp(iter->key + sizeof(uint16_t), &ctx->di.shadow, sizeof(struct zink_fs_shadow_key)))
continue;
}
}
if (i > 0) {
struct zink_shader_module *zero = pzm[0];
@ -603,9 +628,16 @@ update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *pr
bool changed = update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->nir->info.stage);
ctx->gfx_pipeline_state.modules_changed |= changed;
}
if (ctx->gfx_pipeline_state.shader_keys_optimal.key.fs_bits != optimal_key->fs_bits) {
const bool shadow_needs_shader_swizzle = optimal_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT));
if (ctx->gfx_pipeline_state.shader_keys_optimal.key.fs_bits != optimal_key->fs_bits ||
/* always recheck shadow swizzles since they aren't directly part of the key */
unlikely(shadow_needs_shader_swizzle)) {
bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT);
ctx->gfx_pipeline_state.modules_changed |= changed;
if (unlikely(shadow_needs_shader_swizzle)) {
struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data;
ctx->gfx_pipeline_state.shadow = (struct zink_fs_shadow_key*)pzm[0]->key + sizeof(uint16_t);
}
}
if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated &&
ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs_bits != optimal_key->tcs_bits) {

View file

@ -362,6 +362,10 @@ equals_gfx_pipeline_state(const void *a, const void *b)
if (STAGE_MASK & STAGE_MASK_OPTIMAL) {
if (sa->optimal_key != sb->optimal_key)
return false;
if (STAGE_MASK & STAGE_MASK_OPTIMAL_SHADOW) {
if (sa->shadow != sb->shadow)
return false;
}
} else {
if (STAGE_MASK & BITFIELD_BIT(MESA_SHADER_TESS_CTRL)) {
if (sa->modules[MESA_SHADER_TESS_CTRL] != sb->modules[MESA_SHADER_TESS_CTRL])
@ -387,10 +391,13 @@ equals_gfx_pipeline_state(const void *a, const void *b)
/* below is a bunch of code to pick the right equals_gfx_pipeline_state template for runtime */
template <zink_pipeline_dynamic_state DYNAMIC_STATE, unsigned STAGE_MASK>
static equals_gfx_pipeline_state_func
get_optimal_gfx_pipeline_stage_eq_func(bool optimal_keys)
get_optimal_gfx_pipeline_stage_eq_func(bool optimal_keys, bool shadow_needs_shader_swizzle)
{
if (optimal_keys)
if (optimal_keys) {
if (shadow_needs_shader_swizzle)
return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK | STAGE_MASK_OPTIMAL | STAGE_MASK_OPTIMAL_SHADOW>;
return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK | STAGE_MASK_OPTIMAL>;
}
return equals_gfx_pipeline_state<DYNAMIC_STATE, STAGE_MASK>;
}
@ -398,6 +405,7 @@ template <zink_pipeline_dynamic_state DYNAMIC_STATE>
static equals_gfx_pipeline_state_func
get_gfx_pipeline_stage_eq_func(struct zink_gfx_program *prog, bool optimal_keys)
{
bool shadow_needs_shader_swizzle = prog->shaders[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask > 0;
unsigned vertex_stages = prog->stages_present & BITFIELD_MASK(MESA_SHADER_FRAGMENT);
if (vertex_stages & BITFIELD_BIT(MESA_SHADER_TESS_CTRL)) {
if (prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
@ -407,30 +415,30 @@ get_gfx_pipeline_stage_eq_func(struct zink_gfx_program *prog, bool optimal_keys)
if (vertex_stages == BITFIELD_MASK(MESA_SHADER_FRAGMENT))
/* all stages */
return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
BITFIELD_MASK(MESA_SHADER_COMPUTE)>(optimal_keys);
BITFIELD_MASK(MESA_SHADER_COMPUTE)>(optimal_keys, shadow_needs_shader_swizzle);
if (vertex_stages == BITFIELD_MASK(MESA_SHADER_GEOMETRY))
/* tess only: includes generated tcs too */
return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys);
BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle);
if (vertex_stages == (BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)))
/* geom only */
return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys);
BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle);
}
if (vertex_stages == (BITFIELD_MASK(MESA_SHADER_FRAGMENT) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)))
/* all stages but tcs */
return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)>(optimal_keys);
BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)>(optimal_keys, shadow_needs_shader_swizzle);
if (vertex_stages == (BITFIELD_MASK(MESA_SHADER_GEOMETRY) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)))
/* tess only: generated tcs */
return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~(BITFIELD_BIT(MESA_SHADER_GEOMETRY) | BITFIELD_BIT(MESA_SHADER_TESS_CTRL))>(optimal_keys);
BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~(BITFIELD_BIT(MESA_SHADER_GEOMETRY) | BITFIELD_BIT(MESA_SHADER_TESS_CTRL))>(optimal_keys, shadow_needs_shader_swizzle);
if (vertex_stages == (BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)))
/* geom only */
return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys);
BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle);
return get_optimal_gfx_pipeline_stage_eq_func<DYNAMIC_STATE,
BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT)>(optimal_keys);
BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT)>(optimal_keys, shadow_needs_shader_swizzle);
}
equals_gfx_pipeline_state_func

View file

@ -797,6 +797,7 @@ struct zink_gfx_pipeline_state {
uint32_t vertex_buffers_enabled_mask;
uint32_t vertex_strides[PIPE_MAX_ATTRIBS];
struct zink_vertex_elements_hw_state *element_state;
struct zink_fs_shadow_key *shadow;
bool sample_locations_enabled;
enum pipe_prim_type shader_rast_prim, rast_prim; /* reduced type or max for unknown */
union {
@ -875,7 +876,7 @@ struct zink_shader_module {
bool has_nonseamless;
uint8_t num_uniforms;
uint8_t key_size;
uint8_t key[0]; /* | key | uniforms | */
uint8_t key[0]; /* | key | uniforms | shadow swizzle | */
};
struct zink_program {
@ -900,6 +901,7 @@ struct zink_program {
};
#define STAGE_MASK_OPTIMAL (1<<16)
#define STAGE_MASK_OPTIMAL_SHADOW (1<<17)
typedef bool (*equals_gfx_pipeline_state_func)(const void *a, const void *b);
struct zink_gfx_library_key {