panfrost: Prepare things to get rid of panfrost_shader_state.tripipe

panfrost_shader_state.tripipe is used as a template for shader_meta
desc emission, but shader_meta desc preparation time should be negligible
compared to desc emission time (remember we are writing to non-cacheable
memory here). Let's prepare for generating the the shader_meta desc
entirely at draw time by adding the necessary fields to
panfrost_shader_state.

Note that we might brink back some sort of shader_meta desc caching at
some point, but let's simplify things a bit for now.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4083>
This commit is contained in:
Boris Brezillon 2020-03-05 15:17:31 +01:00
parent e94076f8f5
commit 55e014336f
3 changed files with 33 additions and 21 deletions

View file

@ -81,9 +81,11 @@ panfrost_shader_compile(
state->bo = panfrost_bo_create(screen, size, PAN_BO_EXECUTE);
memcpy(state->bo->cpu, dst, size);
meta->shader = state->bo->gpu | program.first_tag;
state->first_tag = program.first_tag;
} else {
/* No shader. Use dummy tag to avoid INSTR_INVALID_ENC */
meta->shader = 0x0 | 1;
state->first_tag = 1;
}
util_dynarray_fini(&program.compiled);
@ -101,19 +103,19 @@ panfrost_shader_compile(
switch (stage) {
case MESA_SHADER_VERTEX:
meta->attribute_count = util_bitcount64(s->info.inputs_read);
meta->varying_count = util_bitcount64(s->info.outputs_written);
state->attribute_count = util_bitcount64(s->info.inputs_read);
state->varying_count = util_bitcount64(s->info.outputs_written);
if (vertex_id)
meta->attribute_count = MAX2(meta->attribute_count, PAN_VERTEX_ID + 1);
state->attribute_count = MAX2(state->attribute_count, PAN_VERTEX_ID + 1);
if (instance_id)
meta->attribute_count = MAX2(meta->attribute_count, PAN_INSTANCE_ID + 1);
state->attribute_count = MAX2(state->attribute_count, PAN_INSTANCE_ID + 1);
break;
case MESA_SHADER_FRAGMENT:
meta->attribute_count = 0;
meta->varying_count = util_bitcount64(s->info.inputs_read);
state->attribute_count = 0;
state->varying_count = util_bitcount64(s->info.inputs_read);
if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
state->writes_depth = true;
if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
@ -121,8 +123,8 @@ panfrost_shader_compile(
break;
case MESA_SHADER_COMPUTE:
/* TODO: images */
meta->attribute_count = 0;
meta->varying_count = 0;
state->attribute_count = 0;
state->varying_count = 0;
state->shared_size = s->info.cs.shared_size;
break;
default:
@ -140,7 +142,11 @@ panfrost_shader_compile(
/* Separate as primary uniform count is truncated */
state->uniform_count = program.uniform_count;
state->uniform_cutoff = program.uniform_cutoff;
state->work_reg_count = program.work_register_count;
meta->attribute_count = state->attribute_count;
meta->varying_count = state->varying_count;
meta->midgard1.flags_hi = 8; /* XXX */
unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1);
@ -148,7 +154,7 @@ panfrost_shader_compile(
unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4);
/* Iterate the varyings and emit the corresponding descriptor */
for (unsigned i = 0; i < meta->varying_count; ++i) {
for (unsigned i = 0; i < state->varying_count; ++i) {
unsigned location = program.varyings[i];
/* Default to a vec4 varying */

View file

@ -192,6 +192,9 @@ struct panfrost_shader_state {
/* Non-descript information */
int uniform_count;
unsigned uniform_cutoff;
unsigned work_reg_count;
unsigned attribute_count;
bool can_discard;
bool writes_point_size;
bool writes_depth;
@ -202,6 +205,8 @@ struct panfrost_shader_state {
unsigned stack_size;
unsigned shared_size;
unsigned int varying_count;
struct mali_attr_meta varyings[PIPE_MAX_ATTRIBS];
gl_varying_slot varyings_loc[PIPE_MAX_ATTRIBS];
struct pipe_stream_output_info stream_output;
@ -219,6 +224,7 @@ struct panfrost_shader_state {
/* Should we enable helper invocations */
bool helper_invocations;
unsigned first_tag;
struct panfrost_bo *bo;
};

View file

@ -108,7 +108,7 @@ panfrost_emit_varying_meta(
{
struct mali_attr_meta *out = (struct mali_attr_meta *) outptr;
for (unsigned i = 0; i < ss->tripipe->varying_count; ++i) {
for (unsigned i = 0; i < ss->varying_count; ++i) {
gl_varying_slot location = ss->varyings_loc[i];
int index = -1;
@ -186,8 +186,8 @@ panfrost_emit_varying_descriptor(
/* Allocate the varying descriptor */
size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count;
size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count;
size_t vs_size = sizeof(struct mali_attr_meta) * vs->varying_count;
size_t fs_size = sizeof(struct mali_attr_meta) * fs->varying_count;
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
struct panfrost_transfer trans = panfrost_allocate_transient(batch,
@ -200,7 +200,7 @@ panfrost_emit_varying_descriptor(
* not, use the provided stream out information to determine the
* offset, since it was already linked for us. */
for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
for (unsigned i = 0; i < vs->varying_count; i++) {
gl_varying_slot loc = vs->varyings_loc[i];
bool special = is_special_varying(loc);
@ -222,12 +222,12 @@ panfrost_emit_varying_descriptor(
/* Link up with fragment varyings */
bool reads_point_coord = fs->reads_point_coord;
for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
for (unsigned i = 0; i < fs->varying_count; i++) {
gl_varying_slot loc = fs->varyings_loc[i];
signed vs_idx = -1;
/* Link up */
for (unsigned j = 0; j < vs->tripipe->varying_count; ++j) {
for (unsigned j = 0; j < vs->varying_count; ++j) {
if (vs->varyings_loc[j] == loc) {
vs_idx = j;
break;
@ -252,7 +252,7 @@ panfrost_emit_varying_descriptor(
/* Figure out how many streamout buffers could be bound */
unsigned so_count = ctx->streamout.num_targets;
for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
for (unsigned i = 0; i < vs->varying_count; i++) {
gl_varying_slot loc = vs->varyings_loc[i];
bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
@ -331,7 +331,7 @@ panfrost_emit_varying_descriptor(
struct mali_attr_meta *ovs = (struct mali_attr_meta *) (trans.cpu);
struct mali_attr_meta *ofs = (struct mali_attr_meta *) (trans.cpu + vs_size);
for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
for (unsigned i = 0; i < vs->varying_count; i++) {
gl_varying_slot loc = vs->varyings_loc[i];
bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
@ -349,7 +349,7 @@ panfrost_emit_varying_descriptor(
signed fs_idx = -1;
/* Link up */
for (unsigned j = 0; j < fs->tripipe->varying_count; ++j) {
for (unsigned j = 0; j < fs->varying_count; ++j) {
if (fs->varyings_loc[j] == loc) {
fs_idx = j;
break;
@ -364,7 +364,7 @@ panfrost_emit_varying_descriptor(
}
/* Replace point sprite */
for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
for (unsigned i = 0; i < fs->varying_count; i++) {
/* If we have a point sprite replacement, handle that here. We
* have to translate location first. TODO: Flip y in shader.
* We're already keying ... just time crunch .. */
@ -398,12 +398,12 @@ panfrost_emit_varying_descriptor(
varyings[i].elements |= MALI_ATTR_LINEAR;
varyings[i].size += align;
for (unsigned v = 0; v < vs->tripipe->varying_count; ++v) {
for (unsigned v = 0; v < vs->varying_count; ++v) {
if (ovs[v].index == i)
ovs[v].src_offset = vs->varyings[v].src_offset + align;
}
for (unsigned f = 0; f < fs->tripipe->varying_count; ++f) {
for (unsigned f = 0; f < fs->varying_count; ++f) {
if (ofs[f].index == i)
ofs[f].src_offset = fs->varyings[f].src_offset + align;
}