diff --git a/src/gallium/drivers/panfrost/pan_shader.c b/src/gallium/drivers/panfrost/pan_shader.c index 4a3681d81fd..70b44878083 100644 --- a/src/gallium/drivers/panfrost/pan_shader.c +++ b/src/gallium/drivers/panfrost/pan_shader.c @@ -18,6 +18,7 @@ #include "pan_compiler.h" #include "pan_nir.h" #include "pan_trace.h" +#include "compiler/bifrost/bifrost_compile.h" #include "shader_enums.h" static struct panfrost_uncompiled_shader * @@ -179,6 +180,10 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir, key->vs.noperspective_varyings); } + if (dev->arch >= 9 && mesa_shader_stage_is_compute(s->info.stage)) { + out->info.cs.allow_merging_workgroups = valhall_can_merge_workgroups(s); + } + NIR_PASS(_, s, panfrost_nir_lower_sysvals, dev->arch, &out->sysvals); /* For now, we only allow pushing the default UBO 0, and the sysval UBO (if diff --git a/src/panfrost/clc/pan_compile.c b/src/panfrost/clc/pan_compile.c index 9d8026be231..2a1eb7cd264 100644 --- a/src/panfrost/clc/pan_compile.c +++ b/src/panfrost/clc/pan_compile.c @@ -439,6 +439,11 @@ main(int argc, const char **argv) struct util_dynarray shader_binary; struct pan_shader_info shader_info = {0}; shader_binary = UTIL_DYNARRAY_INIT; + + if (target_arch >= 9) + shader_info.cs.allow_merging_workgroups = + valhall_can_merge_workgroups(s); + pan_shader_compile(clone, &inputs, &shader_binary, &shader_info); assert(shader_info.push.count * 4 <= diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.h b/src/panfrost/compiler/bifrost/bifrost_compile.h index 45fc6526383..5b5b8ff55d4 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.h +++ b/src/panfrost/compiler/bifrost/bifrost_compile.h @@ -73,6 +73,8 @@ void bifrost_compile_shader_nir(nir_shader *nir, struct util_dynarray *binary, struct pan_shader_info *info); +bool valhall_can_merge_workgroups(nir_shader *nir); + #define VALHAL_EX_FIFO_VARYING_BITS \ (VARYING_BIT_PSIZ | VARYING_BIT_LAYER | VARYING_BIT_PRIMITIVE_ID) diff --git a/src/panfrost/compiler/bifrost/bifrost_nir.c b/src/panfrost/compiler/bifrost/bifrost_nir.c index 413d24dfa39..891503ec01d 100644 --- a/src/panfrost/compiler/bifrost/bifrost_nir.c +++ b/src/panfrost/compiler/bifrost/bifrost_nir.c @@ -579,8 +579,8 @@ bi_lower_subgroups(nir_builder *b, nir_intrinsic_instr *intr, void *data) * visible. This is true if neither shared memory nor BARRIER instructions are * used. The hardware may be able to optimize compute shaders that set this * flag. */ -static bool -bi_can_merge_workgroups(nir_shader *nir) +bool +valhall_can_merge_workgroups(nir_shader *nir) { if (nir->info.shared_size != 0) return false; @@ -1152,8 +1152,5 @@ bifrost_compile_shader_nir(nir_shader *nir, bi_compile_variant(nir, inputs, binary, info, BI_IDVS_NONE); } - if (mesa_shader_stage_is_compute(nir->info.stage)) - info->cs.allow_merging_workgroups = bi_can_merge_workgroups(nir); - info->ubo_mask &= (1 << nir->info.num_ubos) - 1; } diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index d4bb15a1c9b..ac2e62a2d9f 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -38,6 +38,7 @@ #include "vk_ycbcr_conversion.h" #include "compiler/bifrost/bifrost_nir.h" +#include "compiler/bifrost/bifrost_compile.h" #include "compiler/pan_compiler.h" #include "compiler/pan_nir.h" #include "pan_shader.h" @@ -1431,6 +1432,11 @@ panvk_compile_shader(struct panvk_device *dev, nir_shader *nir = info->nir; +#if PAN_ARCH >= 9 + variant->info.cs.allow_merging_workgroups = + valhall_can_merge_workgroups(nir); +#endif + panvk_lower_nir(dev, nir, info->set_layout_count, info->set_layouts, info->robustness, state, &variant->desc_info);