diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 8f9771ba3bc..620425196ba 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -323,16 +323,22 @@ panfrost_emit_compute_shader(struct panfrost_context *ctx, if (dev->quirks & IS_BIFROST) { struct mali_bifrost_properties_packed prop; + struct mali_preload_vertex_packed preload; pan_pack(&prop, BIFROST_PROPERTIES, cfg) { cfg.unknown = 0x800000; /* XXX */ cfg.uniform_buffer_count = panfrost_ubo_count(ctx, st); } - memcpy(&meta->bifrost_props, &prop, sizeof(prop)); + /* TODO: True compute shaders */ + pan_pack(&preload, PRELOAD_VERTEX, cfg) { + cfg.uniform_count = ss->uniform_count; + cfg.vertex_id = true; + cfg.instance_id = true; + } - meta->bifrost2.preload_regs = 0xC0; - meta->bifrost2.uniform_count = ss->uniform_count; + memcpy(&meta->bifrost_props, &prop, sizeof(prop)); + memcpy(&meta->bifrost_preload, &preload, sizeof(preload)); } else { struct mali_midgard_properties_packed prop; @@ -572,6 +578,7 @@ panfrost_emit_frag_shader(struct panfrost_context *ctx, if (dev->quirks & IS_BIFROST) { struct mali_bifrost_properties_packed prop; + struct mali_preload_fragment_packed preload; bool no_blend = true; @@ -584,12 +591,13 @@ panfrost_emit_frag_shader(struct panfrost_context *ctx, cfg.early_z_enable = !fs->can_discard && !fs->writes_depth && no_blend; } + pan_pack(&preload, PRELOAD_FRAGMENT, cfg) { + cfg.uniform_count = fs->uniform_count; + cfg.fragment_position = fs->reads_frag_coord; + } + memcpy(&fragmeta->bifrost_props, &prop, sizeof(prop)); - - fragmeta->bifrost2.preload_regs = 0x1; - SET_BIT(fragmeta->bifrost2.preload_regs, 0x10, fs->reads_frag_coord); - - fragmeta->bifrost2.uniform_count = fs->uniform_count; + memcpy(&fragmeta->bifrost_preload, &preload, sizeof(preload)); } else { struct mali_midgard_properties_packed prop; diff --git a/src/panfrost/bifrost/test/bi_submit.c b/src/panfrost/bifrost/test/bi_submit.c index 3688e2be1a5..542ed72c16c 100644 --- a/src/panfrost/bifrost/test/bi_submit.c +++ b/src/panfrost/bifrost/test/bi_submit.c @@ -176,12 +176,7 @@ bit_vertex(struct panfrost_device *dev, panfrost_program prog, .attribute_count = 1, .varying_count = 1, .bifrost_props = { .opaque = { 0x80020001 } }, - .bifrost2 = { - .unk3 = 0x0, - .preload_regs = 0xc0, - .uniform_count = sz_ubo / 16, - .unk4 = 0x0, - }, + .bifrost_preload = { .opaque = { (sz_ubo / 16) << 15 } }, }; memcpy(shader_desc->cpu, &meta, sizeof(meta)); diff --git a/src/panfrost/include/panfrost-job.h b/src/panfrost/include/panfrost-job.h index af70f56be3a..e2267069860 100644 --- a/src/panfrost/include/panfrost-job.h +++ b/src/panfrost/include/panfrost-job.h @@ -326,52 +326,7 @@ struct mali_shader_meta { struct mali_stencil_packed stencil_back; union { - struct { - u32 unk3 : 7; - /* On Bifrost, some system values are preloaded in - * registers R55-R62 by the thread dispatcher prior to - * the start of shader execution. This is a bitfield - * with one entry for each register saying which - * registers need to be preloaded. Right now, the known - * values are: - * - * Vertex/compute: - * - R55 : gl_LocalInvocationID.xy - * - R56 : gl_LocalInvocationID.z + unknown in high 16 bits - * - R57 : gl_WorkGroupID.x - * - R58 : gl_WorkGroupID.y - * - R59 : gl_WorkGroupID.z - * - R60 : gl_GlobalInvocationID.x - * - R61 : gl_GlobalInvocationID.y/gl_VertexID (without base) - * - R62 : gl_GlobalInvocationID.z/gl_InstanceID (without base) - * - * Fragment: - * - R55 : unknown, never seen (but the bit for this is - * always set?) - * - R56 : unknown (bit always unset) - * - R57 : gl_PrimitiveID - * - R58 : gl_FrontFacing in low bit, potentially other stuff - * - R59 : u16 fragment coordinates (used to compute - * gl_FragCoord.xy, together with sample positions) - * - R60 : gl_SampleMask (used in epilog, so pretty - * much always used, but the bit is always 0 -- is - * this just always pushed?) - * - R61 : gl_SampleMaskIn and gl_SampleID, used by - * varying interpolation. - * - R62 : unknown (bit always unset). - * - * Later GPUs (starting with Mali-G52?) support - * preloading float varyings into r0-r7. This is - * indicated by setting 0x40. There is no distinction - * here between 1 varying and 2. - */ - u32 preload_regs : 8; - /* In units of 8 bytes or 64 bits, since the - * uniform/const port loads 64 bits at a time. - */ - u32 uniform_count : 7; - u32 unk4 : 10; // = 2 - } bifrost2; + struct mali_preload_packed bifrost_preload; struct { u32 unknown2_7; } midgard2; diff --git a/src/panfrost/lib/decode.c b/src/panfrost/lib/decode.c index 6b2e71ae12b..fcfc5907451 100644 --- a/src/panfrost/lib/decode.c +++ b/src/panfrost/lib/decode.c @@ -1731,6 +1731,7 @@ pandecode_vertex_tiler_postfix_pre( struct MALI_MIDGARD_PROPERTIES midg_props; struct MALI_BIFROST_PROPERTIES bi_props; + struct MALI_PRELOAD bi_preload; pandecode_log("struct mali_shader_meta shader_meta_%"PRIx64"_%d%s = {\n", p->shader, job_no, suffix); pandecode_indent++; @@ -1745,7 +1746,10 @@ pandecode_vertex_tiler_postfix_pre( uint32_t opaque = s->bifrost_props.opaque[0]; MALI_BIFROST_PROPERTIES_unpack((const uint8_t *) &opaque, &bi_props); - uniform_count = s->bifrost2.uniform_count; + opaque = s->bifrost_preload.opaque[0]; + MALI_PRELOAD_unpack((const uint8_t *) &opaque, &bi_preload); + + uniform_count = bi_preload.uniform_count; uniform_buffer_count = bi_props.uniform_buffer_count; } else { uint32_t opaque = s->midgard_props.opaque[0]; @@ -1767,6 +1771,24 @@ pandecode_vertex_tiler_postfix_pre( else MALI_MIDGARD_PROPERTIES_print(pandecode_dump_stream, &midg_props, 2); + if (is_bifrost) { + uint32_t opaque = s->bifrost_preload.opaque[0]; + switch (job_type) { + case MALI_JOB_TYPE_VERTEX: + DUMP_CL("Preload", PRELOAD_VERTEX, &opaque, 2); + break; + case MALI_JOB_TYPE_TILER: + DUMP_CL("Preload", PRELOAD_FRAGMENT, &opaque, 2); + break; + case MALI_JOB_TYPE_COMPUTE: + DUMP_CL("Preload", PRELOAD_COMPUTE, &opaque, 2); + break; + default: + DUMP_CL("Preload", PRELOAD, &opaque, 2); + break; + } + } + if (s->depth_units || s->depth_factor) { pandecode_prop("depth_factor = %f", s->depth_factor); pandecode_prop("depth_units = %f", s->depth_units); @@ -1809,18 +1831,7 @@ pandecode_vertex_tiler_postfix_pre( DUMP_CL("Stencil front", STENCIL, &s->stencil_front, 1); DUMP_CL("Stencil back", STENCIL, &s->stencil_back, 1); - if (is_bifrost) { - pandecode_log(".bifrost2 = {\n"); - pandecode_indent++; - - pandecode_prop("unk3 = 0x%" PRIx32, s->bifrost2.unk3); - pandecode_prop("preload_regs = 0x%" PRIx32, s->bifrost2.preload_regs); - pandecode_prop("uniform_count = %" PRId32, s->bifrost2.uniform_count); - pandecode_prop("unk4 = 0x%" PRIx32, s->bifrost2.unk4); - - pandecode_indent--; - pandecode_log("},\n"); - } else if (s->midgard2.unknown2_7) { + if (!is_bifrost && s->midgard2.unknown2_7) { pandecode_log(".midgard2 = {\n"); pandecode_indent++; diff --git a/src/panfrost/lib/midgard.xml b/src/panfrost/lib/midgard.xml index 352398b515f..d6fa7e4a134 100644 --- a/src/panfrost/lib/midgard.xml +++ b/src/panfrost/lib/midgard.xml @@ -363,6 +363,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +