d3d12: Track max varying slot, set and compare less bytes

Often, the full range of available slots don't need to participate in
the comparison or be zeroed out.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21527>
This commit is contained in:
Giancarlo Devich 2023-02-24 11:50:57 -08:00 committed by Marge Bot
parent 9a3820fe11
commit 56d30bf591
2 changed files with 41 additions and 26 deletions

View file

@ -549,7 +549,8 @@ static void
fill_varyings(struct d3d12_varying_info *info, nir_shader *s,
nir_variable_mode modes, uint64_t mask, bool patch)
{
memset(info, 0, sizeof(d3d12_varying_info));
info->max = 0;
info->mask = 0;
nir_foreach_variable_with_modes(var, s, modes) {
unsigned slot = var->data.location;
@ -563,6 +564,11 @@ fill_varyings(struct d3d12_varying_info *info, nir_shader *s,
if (!(mask & slot_bit))
continue;
if ((info->mask & slot_bit) == 0) {
memset(info->slots + slot, 0, sizeof(info->slots[0]));
info->max = MAX2(info->max, slot);
}
const struct glsl_type *type = var->type;
if ((s->info.stage == MESA_SHADER_GEOMETRY ||
s->info.stage == MESA_SHADER_TESS_CTRL) &&
@ -579,6 +585,13 @@ fill_varyings(struct d3d12_varying_info *info, nir_shader *s,
info->mask |= slot_bit;
info->slots[slot].location_frac_mask |= (1 << var->data.location_frac);
}
for (uint32_t i = 0; i < info->max; ++i) {
if (((1llu << i) & info->mask) == 0) {
memset(info->slots + i, 0, sizeof(info->slots[0]));
}
}
}
static void
@ -594,6 +607,32 @@ fill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs)
}
}
bool
d3d12_compare_varying_info(const d3d12_varying_info *expect, const d3d12_varying_info *have)
{
if (expect->mask != have->mask
|| expect->max != have->max)
return false;
if (!expect->mask)
return true;
/* 6 is a rough (wild) guess for a bulk memcmp cross-over point. When there
* are a small number of slots present, individual is much faster. */
if (util_bitcount64(expect->mask) < 6) {
uint64_t mask = expect->mask;
while (mask) {
int slot = u_bit_scan64(&mask);
if (memcmp(&expect->slots[slot], &have->slots[slot], sizeof(have->slots[slot])))
return false;
}
return true;
}
return !memcmp(expect->slots, have->slots, sizeof(expect->slots[0]) * expect->max);
}
static void
validate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx)
{
@ -676,31 +715,6 @@ validate_tess_ctrl_shader_variant(struct d3d12_selection_context *sel_ctx)
ctx->gfx_stages[PIPE_SHADER_TESS_CTRL] = tcs;
}
static bool
d3d12_compare_varying_info(const d3d12_varying_info *expect, const d3d12_varying_info *have)
{
if (expect->mask != have->mask)
return false;
if (!expect->mask)
return true;
/* 6 is a rough (wild) guess for a bulk memcmp cross-over point. When there
* are a small number of slots present, individual memcmp is much faster. */
if (util_bitcount64(expect->mask) < 6) {
uint64_t mask = expect->mask;
while (mask) {
int slot = u_bit_scan64(&mask);
if (memcmp(&expect->slots[slot], &have->slots[slot], sizeof(have->slots[slot])))
return false;
}
return true;
}
return !memcmp(expect, have, sizeof(struct d3d12_varying_info));
}
static bool
d3d12_compare_shader_keys(struct d3d12_selection_context* sel_ctx, const d3d12_shader_key *expect, const d3d12_shader_key *have)
{

View file

@ -76,6 +76,7 @@ struct d3d12_varying_info {
} vars[4];
} slots[VARYING_SLOT_MAX];
uint64_t mask;
uint32_t max;
};
struct d3d12_image_format_conversion_info {