mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
vc4: Avoid VS shader recompiles by keeping a set of FS inputs seen so far.
We don't want to bake the whole array into the FS key, because of the hashing overhead. But we can keep a set of the arrays seen, and use a pointer to the copy in as the array's proxy. Between this and the previous patch, gl-1.0-blend-func now passes on hardware, where previously it was filling the 256MB CMA area with shaders and OOMing. Drops 712 shaders from shader-db.
This commit is contained in:
parent
62ea2461ed
commit
2350569a78
3 changed files with 81 additions and 25 deletions
|
|
@ -69,6 +69,7 @@
|
||||||
#define VC4_DIRTY_COMPILED_CS (1 << 23)
|
#define VC4_DIRTY_COMPILED_CS (1 << 23)
|
||||||
#define VC4_DIRTY_COMPILED_VS (1 << 24)
|
#define VC4_DIRTY_COMPILED_VS (1 << 24)
|
||||||
#define VC4_DIRTY_COMPILED_FS (1 << 25)
|
#define VC4_DIRTY_COMPILED_FS (1 << 25)
|
||||||
|
#define VC4_DIRTY_FS_INPUTS (1 << 26)
|
||||||
|
|
||||||
struct vc4_sampler_view {
|
struct vc4_sampler_view {
|
||||||
struct pipe_sampler_view base;
|
struct pipe_sampler_view base;
|
||||||
|
|
@ -123,6 +124,17 @@ struct vc4_ubo_range {
|
||||||
uint32_t size;
|
uint32_t size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct vc4_fs_inputs {
|
||||||
|
/**
|
||||||
|
* Array of the meanings of the VPM inputs this shader needs.
|
||||||
|
*
|
||||||
|
* It doesn't include those that aren't part of the VPM, like
|
||||||
|
* point/line coordinates.
|
||||||
|
*/
|
||||||
|
struct vc4_varying_slot *input_slots;
|
||||||
|
uint32_t num_inputs;
|
||||||
|
};
|
||||||
|
|
||||||
struct vc4_compiled_shader {
|
struct vc4_compiled_shader {
|
||||||
uint64_t program_id;
|
uint64_t program_id;
|
||||||
struct vc4_bo *bo;
|
struct vc4_bo *bo;
|
||||||
|
|
@ -152,13 +164,7 @@ struct vc4_compiled_shader {
|
||||||
uint8_t vattr_offsets[9];
|
uint8_t vattr_offsets[9];
|
||||||
uint8_t vattrs_live;
|
uint8_t vattrs_live;
|
||||||
|
|
||||||
/**
|
const struct vc4_fs_inputs *fs_inputs;
|
||||||
* Array of the meanings of the VPM inputs this shader needs.
|
|
||||||
*
|
|
||||||
* It doesn't include those that aren't part of the VPM, like
|
|
||||||
* point/line coordinates.
|
|
||||||
*/
|
|
||||||
struct vc4_varying_slot *input_slots;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct vc4_program_stateobj {
|
struct vc4_program_stateobj {
|
||||||
|
|
@ -270,6 +276,7 @@ struct vc4_context {
|
||||||
struct primconvert_context *primconvert;
|
struct primconvert_context *primconvert;
|
||||||
|
|
||||||
struct hash_table *fs_cache, *vs_cache;
|
struct hash_table *fs_cache, *vs_cache;
|
||||||
|
struct set *fs_inputs_set;
|
||||||
uint32_t next_uncompiled_program_id;
|
uint32_t next_uncompiled_program_id;
|
||||||
uint64_t next_compiled_program_id;
|
uint64_t next_compiled_program_id;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2100,8 +2100,8 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
|
||||||
break;
|
break;
|
||||||
case QSTAGE_VERT:
|
case QSTAGE_VERT:
|
||||||
emit_vert_end(c,
|
emit_vert_end(c,
|
||||||
vc4->prog.fs->input_slots,
|
c->vs_key->fs_inputs->input_slots,
|
||||||
vc4->prog.fs->num_inputs);
|
c->vs_key->fs_inputs->num_inputs);
|
||||||
break;
|
break;
|
||||||
case QSTAGE_COORD:
|
case QSTAGE_COORD:
|
||||||
emit_coord_end(c);
|
emit_coord_end(c);
|
||||||
|
|
@ -2207,6 +2207,13 @@ static void
|
||||||
vc4_setup_compiled_fs_inputs(struct vc4_context *vc4, struct vc4_compile *c,
|
vc4_setup_compiled_fs_inputs(struct vc4_context *vc4, struct vc4_compile *c,
|
||||||
struct vc4_compiled_shader *shader)
|
struct vc4_compiled_shader *shader)
|
||||||
{
|
{
|
||||||
|
struct vc4_fs_inputs inputs;
|
||||||
|
|
||||||
|
memset(&inputs, 0, sizeof(inputs));
|
||||||
|
inputs.input_slots = ralloc_array(shader,
|
||||||
|
struct vc4_varying_slot,
|
||||||
|
c->num_input_slots);
|
||||||
|
|
||||||
bool input_live[c->num_input_slots];
|
bool input_live[c->num_input_slots];
|
||||||
|
|
||||||
memset(input_live, 0, sizeof(input_live));
|
memset(input_live, 0, sizeof(input_live));
|
||||||
|
|
@ -2217,10 +2224,6 @@ vc4_setup_compiled_fs_inputs(struct vc4_context *vc4, struct vc4_compile *c,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
shader->input_slots = ralloc_array(shader,
|
|
||||||
struct vc4_varying_slot,
|
|
||||||
c->num_input_slots);
|
|
||||||
|
|
||||||
for (int i = 0; i < c->num_input_slots; i++) {
|
for (int i = 0; i < c->num_input_slots; i++) {
|
||||||
struct vc4_varying_slot *slot = &c->input_slots[i];
|
struct vc4_varying_slot *slot = &c->input_slots[i];
|
||||||
|
|
||||||
|
|
@ -2235,11 +2238,33 @@ vc4_setup_compiled_fs_inputs(struct vc4_context *vc4, struct vc4_compile *c,
|
||||||
slot->slot == VARYING_SLOT_COL1 ||
|
slot->slot == VARYING_SLOT_COL1 ||
|
||||||
slot->slot == VARYING_SLOT_BFC0 ||
|
slot->slot == VARYING_SLOT_BFC0 ||
|
||||||
slot->slot == VARYING_SLOT_BFC1) {
|
slot->slot == VARYING_SLOT_BFC1) {
|
||||||
shader->color_inputs |= (1 << shader->num_inputs);
|
shader->color_inputs |= (1 << inputs.num_inputs);
|
||||||
}
|
}
|
||||||
|
|
||||||
shader->input_slots[shader->num_inputs] = *slot;
|
inputs.input_slots[inputs.num_inputs] = *slot;
|
||||||
shader->num_inputs++;
|
inputs.num_inputs++;
|
||||||
|
}
|
||||||
|
shader->num_inputs = inputs.num_inputs;
|
||||||
|
|
||||||
|
/* Add our set of inputs to the set of all inputs seen. This way, we
|
||||||
|
* can have a single pointer that identifies an FS inputs set,
|
||||||
|
* allowing VS to avoid recompiling when the FS is recompiled (or a
|
||||||
|
* new one is bound using separate shader objects) but the inputs
|
||||||
|
* don't change.
|
||||||
|
*/
|
||||||
|
struct set_entry *entry = _mesa_set_search(vc4->fs_inputs_set, &inputs);
|
||||||
|
if (entry) {
|
||||||
|
shader->fs_inputs = entry->key;
|
||||||
|
ralloc_free(inputs.input_slots);
|
||||||
|
} else {
|
||||||
|
struct vc4_fs_inputs *alloc_inputs;
|
||||||
|
|
||||||
|
alloc_inputs = rzalloc(vc4->fs_inputs_set, struct vc4_fs_inputs);
|
||||||
|
memcpy(alloc_inputs, &inputs, sizeof(inputs));
|
||||||
|
ralloc_steal(alloc_inputs, inputs.input_slots);
|
||||||
|
_mesa_set_add(vc4->fs_inputs_set, alloc_inputs);
|
||||||
|
|
||||||
|
shader->fs_inputs = alloc_inputs;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2434,10 +2459,14 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
vc4->dirty |= VC4_DIRTY_COMPILED_FS;
|
vc4->dirty |= VC4_DIRTY_COMPILED_FS;
|
||||||
|
|
||||||
if (vc4->rasterizer->base.flatshade &&
|
if (vc4->rasterizer->base.flatshade &&
|
||||||
old_fs && vc4->prog.fs->color_inputs != old_fs->color_inputs) {
|
old_fs && vc4->prog.fs->color_inputs != old_fs->color_inputs) {
|
||||||
vc4->dirty |= VC4_DIRTY_FLAT_SHADE_FLAGS;
|
vc4->dirty |= VC4_DIRTY_FLAT_SHADE_FLAGS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (old_fs && vc4->prog.fs->fs_inputs != old_fs->fs_inputs)
|
||||||
|
vc4->dirty |= VC4_DIRTY_FS_INPUTS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -2451,14 +2480,14 @@ vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode)
|
||||||
VC4_DIRTY_VERTTEX |
|
VC4_DIRTY_VERTTEX |
|
||||||
VC4_DIRTY_VTXSTATE |
|
VC4_DIRTY_VTXSTATE |
|
||||||
VC4_DIRTY_UNCOMPILED_VS |
|
VC4_DIRTY_UNCOMPILED_VS |
|
||||||
VC4_DIRTY_COMPILED_FS))) {
|
VC4_DIRTY_FS_INPUTS))) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(key, 0, sizeof(*key));
|
memset(key, 0, sizeof(*key));
|
||||||
vc4_setup_shared_key(vc4, &key->base, &vc4->verttex);
|
vc4_setup_shared_key(vc4, &key->base, &vc4->verttex);
|
||||||
key->base.shader_state = vc4->prog.bind_vs;
|
key->base.shader_state = vc4->prog.bind_vs;
|
||||||
key->compiled_fs_id = vc4->prog.fs->program_id;
|
key->fs_inputs = vc4->prog.fs->fs_inputs;
|
||||||
key->clamp_color = vc4->rasterizer->base.clamp_vertex_color;
|
key->clamp_color = vc4->rasterizer->base.clamp_vertex_color;
|
||||||
|
|
||||||
for (int i = 0; i < ARRAY_SIZE(key->attr_formats); i++)
|
for (int i = 0; i < ARRAY_SIZE(key->attr_formats); i++)
|
||||||
|
|
@ -2477,7 +2506,7 @@ vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode)
|
||||||
|
|
||||||
key->is_coord = true;
|
key->is_coord = true;
|
||||||
/* Coord shaders don't care what the FS inputs are. */
|
/* Coord shaders don't care what the FS inputs are. */
|
||||||
key->compiled_fs_id = 0;
|
key->fs_inputs = NULL;
|
||||||
struct vc4_compiled_shader *cs =
|
struct vc4_compiled_shader *cs =
|
||||||
vc4_get_compiled_shader(vc4, QSTAGE_COORD, &key->base);
|
vc4_get_compiled_shader(vc4, QSTAGE_COORD, &key->base);
|
||||||
if (cs != vc4->prog.cs) {
|
if (cs != vc4->prog.cs) {
|
||||||
|
|
@ -2517,6 +2546,29 @@ vs_cache_compare(const void *key1, const void *key2)
|
||||||
return memcmp(key1, key2, sizeof(struct vc4_vs_key)) == 0;
|
return memcmp(key1, key2, sizeof(struct vc4_vs_key)) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
fs_inputs_hash(const void *key)
|
||||||
|
{
|
||||||
|
const struct vc4_fs_inputs *inputs = key;
|
||||||
|
|
||||||
|
return _mesa_hash_data(inputs->input_slots,
|
||||||
|
sizeof(*inputs->input_slots) *
|
||||||
|
inputs->num_inputs);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
fs_inputs_compare(const void *key1, const void *key2)
|
||||||
|
{
|
||||||
|
const struct vc4_fs_inputs *inputs1 = key1;
|
||||||
|
const struct vc4_fs_inputs *inputs2 = key2;
|
||||||
|
|
||||||
|
return (inputs1->num_inputs == inputs2->num_inputs &&
|
||||||
|
memcmp(inputs1->input_slots,
|
||||||
|
inputs2->input_slots,
|
||||||
|
sizeof(*inputs1->input_slots) *
|
||||||
|
inputs1->num_inputs) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
delete_from_cache_if_matches(struct hash_table *ht,
|
delete_from_cache_if_matches(struct hash_table *ht,
|
||||||
struct hash_entry *entry,
|
struct hash_entry *entry,
|
||||||
|
|
@ -2582,6 +2634,8 @@ vc4_program_init(struct pipe_context *pctx)
|
||||||
fs_cache_compare);
|
fs_cache_compare);
|
||||||
vc4->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash,
|
vc4->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash,
|
||||||
vs_cache_compare);
|
vs_cache_compare);
|
||||||
|
vc4->fs_inputs_set = _mesa_set_create(pctx, fs_inputs_hash,
|
||||||
|
fs_inputs_compare);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
||||||
|
|
@ -352,12 +352,7 @@ struct vc4_fs_key {
|
||||||
struct vc4_vs_key {
|
struct vc4_vs_key {
|
||||||
struct vc4_key base;
|
struct vc4_key base;
|
||||||
|
|
||||||
/**
|
const struct vc4_fs_inputs *fs_inputs;
|
||||||
* This is a proxy for the array of FS input semantics, which is
|
|
||||||
* larger than we would want to put in the key.
|
|
||||||
*/
|
|
||||||
uint64_t compiled_fs_id;
|
|
||||||
|
|
||||||
enum pipe_format attr_formats[8];
|
enum pipe_format attr_formats[8];
|
||||||
bool is_coord;
|
bool is_coord;
|
||||||
bool per_vertex_point_size;
|
bool per_vertex_point_size;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue