diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 7fad87f7e6d..15680c000bc 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -4456,9 +4456,12 @@ prepare_shader(struct panfrost_compiled_shader *state, else if (vs) cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF; #endif - +#if PAN_ARCH >= 15 + cfg.register_count = state->info.work_reg_count; +#else cfg.register_allocation = pan_register_allocation(state->info.work_reg_count); +#endif cfg.binary = state->bin.gpu; cfg.preload.r48_r63 = (state->info.preload >> 48); cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info); @@ -4476,8 +4479,12 @@ prepare_shader(struct panfrost_compiled_shader *state, #if PAN_ARCH < 12 cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF; #endif +#if PAN_ARCH >= 15 + cfg.register_count = state->info.work_reg_count; +#else cfg.register_allocation = pan_register_allocation(state->info.work_reg_count); +#endif cfg.binary = state->bin.gpu + state->info.vs.no_psiz_offset; cfg.preload.r48_r63 = (state->info.preload >> 48); cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info); diff --git a/src/gallium/drivers/panfrost/pan_fb_preload.c b/src/gallium/drivers/panfrost/pan_fb_preload.c index 172398b6ec8..be4c2c9965e 100644 --- a/src/gallium/drivers/panfrost/pan_fb_preload.c +++ b/src/gallium/drivers/panfrost/pan_fb_preload.c @@ -1105,7 +1105,11 @@ pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, struct pan_pool *pool, pan_cast_and_pack(spd.cpu, SHADER_PROGRAM, cfg) { cfg.stage = MALI_SHADER_STAGE_FRAGMENT; cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL; +#if PAN_ARCH >= 15 + cfg.register_count = preload_shader->info.work_reg_count; +#else cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD; +#endif cfg.binary = preload_shader->address; cfg.preload.r48_r63 = preload_shader->info.preload >> 48; } diff --git a/src/gallium/drivers/panfrost/pan_precomp.c b/src/gallium/drivers/panfrost/pan_precomp.c index da2d3f51f7d..9ca77b32dd7 100644 --- a/src/gallium/drivers/panfrost/pan_precomp.c +++ b/src/gallium/drivers/panfrost/pan_precomp.c @@ -98,8 +98,12 @@ panfrost_precomp_shader_create( pan_cast_and_pack(spd.cpu, SHADER_PROGRAM, cfg) { cfg.stage = pan_shader_stage(&res->info); +#if PAN_ARCH >= 15 + cfg.register_count = res->info.work_reg_count; +#else cfg.register_allocation = pan_register_allocation(res->info.work_reg_count); +#endif cfg.binary = res->code_ptr; cfg.preload.r48_r63 = (res->info.preload >> 48); cfg.flush_to_zero_mode = panfrost_ftz_mode(&res->info); @@ -326,7 +330,17 @@ GENX(panfrost_launch_precomp)(struct panfrost_batch *batch, uint64_t fau_ptr = push_uniforms.gpu | (fau_count << 56); cs_move64_to(b, cs_sr_reg64(b, COMPUTE, FAU_0), fau_ptr); +#if PAN_ARCH >= 15 + struct mali_shader_program_pointer_packed spp; + pan_pack(&spp, SHADER_PROGRAM_POINTER, ctx) { + ctx.register_count = shader->info.work_reg_count; + ctx.pointer = shader->state_ptr; + } + uint64_t ptr = ((uint64_t)spp.opaque[1] << 32) | spp.opaque[0]; + cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SPD_0), ptr); +#else cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SPD_0), shader->state_ptr); +#endif cs_move64_to(b, cs_sr_reg64(b, COMPUTE, TSD_0), tsd); /* Global attribute offset */ diff --git a/src/panfrost/genxml/decode_csf.c b/src/panfrost/genxml/decode_csf.c index 10f062cebda..7c43991f64d 100644 --- a/src/panfrost/genxml/decode_csf.c +++ b/src/panfrost/genxml/decode_csf.c @@ -651,8 +651,19 @@ pandecode_run_compute(struct pandecode_context *ctx, FILE *fp, if (fau) GENX(pandecode_fau)(ctx, fau & BITFIELD64_MASK(48), fau >> 56, "FAU"); - GENX(pandecode_shader) - (ctx, cs_get_u64(qctx, reg_spd), "Shader", qctx->gpu_id); + uint64_t addr = cs_get_u64(qctx, reg_spd); +#if PAN_ARCH >= 15 + const struct mali_shader_program_pointer_packed spp_packed = { + .opaque[0] = addr & 0xFFFFFFFF, + .opaque[1] = (addr >> 32) & 0xFFFFFFFF, + }; + pan_unpack(&spp_packed, SHADER_PROGRAM_POINTER, spp) + ; + DUMP_UNPACKED(ctx, SHADER_PROGRAM_POINTER, spp, + "Shader Program Pointer (%" PRIx64 "):\n", addr); + addr = spp.pointer; +#endif + GENX(pandecode_shader)(ctx, addr, "Shader", qctx->gpu_id); DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, reg_tsd), "Local Storage @%" PRIx64 ":\n", cs_get_u64(qctx, reg_tsd)); @@ -693,8 +704,19 @@ pandecode_run_compute_indirect(struct pandecode_context *ctx, FILE *fp, if (fau) GENX(pandecode_fau)(ctx, fau & BITFIELD64_MASK(48), fau >> 56, "FAU"); - GENX(pandecode_shader) - (ctx, cs_get_u64(qctx, reg_spd), "Shader", qctx->gpu_id); + uint64_t addr = cs_get_u64(qctx, reg_spd); +#if PAN_ARCH >= 15 + const struct mali_shader_program_pointer_packed spp_packed = { + .opaque[0] = addr & 0xFFFFFFFF, + .opaque[1] = (addr >> 32) & 0xFFFFFFFF, + }; + pan_unpack(&spp_packed, SHADER_PROGRAM_POINTER, spp) + ; + DUMP_UNPACKED(ctx, SHADER_PROGRAM_POINTER, spp, + "Shader Program Pointer (%" PRIx64 "):\n", addr); + addr = spp.pointer; +#endif + GENX(pandecode_shader)(ctx, addr, "Shader", qctx->gpu_id); DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, reg_tsd), "Local Storage @%" PRIx64 ":\n", cs_get_u64(qctx, reg_tsd)); diff --git a/src/panfrost/genxml/v15.xml b/src/panfrost/genxml/v15.xml index 2b35043f964..983834f16e4 100644 --- a/src/panfrost/genxml/v15.xml +++ b/src/panfrost/genxml/v15.xml @@ -2040,14 +2040,20 @@ + - + + + + + + diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c index 1fd8e437d49..8de3de939b0 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c @@ -209,9 +209,20 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) cs_move64_to(b, cs_reg64(b, PANVK_COMPUTE_FAU), fau_ptr); } - if (compute_state_dirty(cmdbuf, CS)) + if (compute_state_dirty(cmdbuf, CS)) { +#if PAN_ARCH >= 15 + struct mali_shader_program_pointer_packed spp; + pan_pack(&spp, SHADER_PROGRAM_POINTER, ctx) { + ctx.register_count = cs->info.work_reg_count; + ctx.pointer = panvk_priv_mem_dev_addr(cs->spd); + } + uint64_t ptr = ((uint64_t)spp.opaque[1] << 32) | spp.opaque[0]; + cs_move64_to(b, cs_reg64(b, PANVK_COMPUTE_SPD), ptr); +#else cs_move64_to(b, cs_reg64(b, PANVK_COMPUTE_SPD), panvk_priv_mem_dev_addr(cs->spd)); +#endif + } cs_move64_to(b, cs_reg64(b, PANVK_COMPUTE_TSD), tsd); diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c index 56f6c546217..386f2b317a5 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c @@ -82,8 +82,18 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx, uint64_t fau_ptr = push_uniforms.gpu | (fau_count << 56); cs_move64_to(b, cs_reg64(b, PANVK_PRECOMP_FAU), fau_ptr); +#if PAN_ARCH >= 15 + struct mali_shader_program_pointer_packed spp; + pan_pack(&spp, SHADER_PROGRAM_POINTER, ctx) { + ctx.register_count = shader->info.work_reg_count; + ctx.pointer = panvk_priv_mem_dev_addr(shader->spd); + } + uint64_t ptr = ((uint64_t)spp.opaque[1] << 32) | spp.opaque[0]; + cs_move64_to(b, cs_reg64(b, PANVK_PRECOMP_SPD), ptr); +#else cs_move64_to(b, cs_reg64(b, PANVK_PRECOMP_SPD), panvk_priv_mem_dev_addr(shader->spd)); +#endif cs_move64_to(b, cs_reg64(b, PANVK_PRECOMP_TSD), tsd); diff --git a/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c b/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c index 3bd5eda41f0..afa2692bde0 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c @@ -239,8 +239,12 @@ get_frame_shader(struct panvk_device *dev, panvk_priv_mem_write_desc(shader->spd, 0, SHADER_PROGRAM, cfg) { cfg.stage = MALI_SHADER_STAGE_FRAGMENT; cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL; +#if PAN_ARCH >= 15 + cfg.register_count = shader->info.work_reg_count; +#else cfg.register_allocation = pan_register_allocation(shader->info.work_reg_count); +#endif cfg.binary = panvk_priv_mem_dev_addr(shader->code_mem); cfg.preload.r48_r63 = shader->info.preload >> 48; } diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 03da805a49c..927a72cd7c9 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -1180,8 +1180,12 @@ panvk_shader_upload(struct panvk_device *dev, cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF; #endif +#if PAN_ARCH >= 15 + cfg.register_count = shader->info.work_reg_count; +#else cfg.register_allocation = pan_register_allocation(shader->info.work_reg_count); +#endif cfg.binary = panvk_shader_variant_get_dev_addr(shader); cfg.preload.r48_r63 = (shader->info.preload >> 48); cfg.flush_to_zero_mode = shader_ftz_mode(shader); @@ -1199,8 +1203,12 @@ panvk_shader_upload(struct panvk_device *dev, panvk_priv_mem_write_desc(shader->spds.all_points, 0, SHADER_PROGRAM, cfg) { cfg.stage = pan_shader_stage(&shader->info); +#if PAN_ARCH >= 15 + cfg.register_count = shader->info.work_reg_count; +#else cfg.register_allocation = pan_register_allocation(shader->info.work_reg_count); +#endif cfg.binary = panvk_shader_variant_get_dev_addr(shader); cfg.preload.r48_r63 = (shader->info.preload >> 48); cfg.flush_to_zero_mode = shader_ftz_mode(shader); @@ -1214,8 +1222,12 @@ panvk_shader_upload(struct panvk_device *dev, panvk_priv_mem_write_desc(shader->spds.all_triangles, 0, SHADER_PROGRAM, cfg) { cfg.stage = pan_shader_stage(&shader->info); +#if PAN_ARCH >= 15 + cfg.register_count = shader->info.work_reg_count; +#else cfg.register_allocation = pan_register_allocation(shader->info.work_reg_count); +#endif cfg.binary = panvk_shader_variant_get_dev_addr(shader) + shader->info.vs.no_psiz_offset; cfg.preload.r48_r63 = (shader->info.preload >> 48);