panvk/csf: take merged workgroups into account for divergence

Merging workgroups affects divergence analysis, since subgroups can now
contain extra threads from other workgroups. We already have divergence
analysis flags to handle this case, but since the compiler options memory
is static, we need to define an entirely separate option set for merged
vs non-merged workgroups.

In gallium, we don't have to switch options because opengl requires
uniformity over the entire dispatch in application shaders.

Signed-off-by: Olivia Lee <olivia.lee@collabora.com>
Reviewed-by: Eric R. Smith <eric.smith@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38586>
This commit is contained in:
Olivia Lee 2025-12-09 17:08:35 -08:00 committed by Marge Bot
parent c42e124a66
commit e9ca69b807
10 changed files with 39 additions and 21 deletions

View file

@ -439,9 +439,10 @@ pan_preload_get_shader(struct pan_fb_preload_cache *cache,
key->surfaces[i].samples);
}
const nir_shader_compiler_options *compiler_options =
pan_get_nir_shader_compiler_options(PAN_ARCH, false);
nir_builder b = nir_builder_init_simple_shader(
MESA_SHADER_FRAGMENT, pan_get_nir_shader_compiler_options(PAN_ARCH),
"pan_preload(%s)", sig);
MESA_SHADER_FRAGMENT, compiler_options, "pan_preload(%s)", sig);
nir_def *barycentric = nir_load_barycentric(
&b, nir_intrinsic_load_barycentric_pixel, INTERP_MODE_SMOOTH);

View file

@ -183,8 +183,10 @@ panfrost_create_afbc_size_shader(struct panfrost_screen *screen,
unsigned align = key->afbc.align;
struct panfrost_device *dev = pan_device(&screen->base);
const nir_shader_compiler_options *compiler_options =
pan_get_nir_shader_compiler_options(dev->arch, false);
nir_builder b = nir_builder_init_simple_shader(
MESA_SHADER_COMPUTE, pan_get_nir_shader_compiler_options(dev->arch),
MESA_SHADER_COMPUTE, compiler_options,
"panfrost_afbc_size(uncompressed_size=%u, align=%u)",
key->afbc.uncompressed_size, align);
@ -220,9 +222,10 @@ panfrost_create_afbc_pack_shader(struct panfrost_screen *screen,
{
unsigned align = key->afbc.align;
struct panfrost_device *dev = pan_device(&screen->base);
const nir_shader_compiler_options *compiler_options =
pan_get_nir_shader_compiler_options(dev->arch, false);
nir_builder b = nir_builder_init_simple_shader(
MESA_SHADER_COMPUTE, pan_get_nir_shader_compiler_options(dev->arch),
"panfrost_afbc_pack");
MESA_SHADER_COMPUTE, compiler_options, "panfrost_afbc_pack");
panfrost_afbc_add_info_ubo(pack, b);
@ -265,9 +268,10 @@ panfrost_create_mtk_tiled_detile_shader(
{
const struct panfrost_device *device = &screen->dev;
bool tint_yuv = (device->debug & PAN_DBG_YUV) != 0;
const nir_shader_compiler_options *compiler_options =
pan_get_nir_shader_compiler_options(device->arch, false);
nir_builder b = nir_builder_init_simple_shader(
MESA_SHADER_COMPUTE, pan_get_nir_shader_compiler_options(device->arch),
"panfrost_mtk_detile");
MESA_SHADER_COMPUTE, compiler_options, "panfrost_mtk_detile");
b.shader->info.workgroup_size[0] = 4;
b.shader->info.workgroup_size[1] = 16;
b.shader->info.workgroup_size[2] = 1;

View file

@ -1148,7 +1148,7 @@ panfrost_create_screen(int fd, const struct pipe_screen_config *config,
for (unsigned i = 0; i <= MESA_SHADER_COMPUTE; i++)
screen->base.nir_options[i] =
pan_get_nir_shader_compiler_options(dev->arch);
pan_get_nir_shader_compiler_options(dev->arch, false);
switch (dev->arch) {
case 4:

View file

@ -78,8 +78,8 @@ bool valhall_can_merge_workgroups(nir_shader *nir);
#define VALHAL_EX_FIFO_VARYING_BITS \
(VARYING_BIT_PSIZ | VARYING_BIT_LAYER | VARYING_BIT_PRIMITIVE_ID)
#define DEFINE_OPTIONS(arch) \
static const nir_shader_compiler_options bifrost_nir_options_v##arch = { \
#define DEFINE_OPTIONS(name, arch, merge_workgroups) \
static const nir_shader_compiler_options name = { \
.lower_scmp = true, \
.lower_flrp16 = true, \
.lower_flrp32 = true, \
@ -149,10 +149,17 @@ bool valhall_can_merge_workgroups(nir_shader *nir);
.has_udot_4x8_sat = arch >= 9, \
.has_sdot_4x8 = arch >= 9, \
.has_sdot_4x8_sat = arch >= 9, \
\
.divergence_analysis_options = merge_workgroups ? \
(nir_divergence_across_subgroups | \
nir_divergence_multiple_workgroup_per_compute_subgroup) \
: 0, \
};
DEFINE_OPTIONS(6);
DEFINE_OPTIONS(9);
DEFINE_OPTIONS(11);
DEFINE_OPTIONS(bifrost_nir_options_v6, 6, false);
DEFINE_OPTIONS(bifrost_nir_options_v9, 9, false);
DEFINE_OPTIONS(bifrost_nir_options_v9_merge_wg, 9, true);
DEFINE_OPTIONS(bifrost_nir_options_v11, 11, false);
DEFINE_OPTIONS(bifrost_nir_options_v11_merge_wg, 11, true);
#endif

View file

@ -34,22 +34,26 @@ pan_want_debug_info(unsigned arch)
}
const nir_shader_compiler_options *
pan_get_nir_shader_compiler_options(unsigned arch)
pan_get_nir_shader_compiler_options(unsigned arch, bool merge_wg)
{
switch (arch) {
case 4:
case 5:
assert(!merge_wg);
return &midgard_nir_options;
case 6:
case 7:
assert(!merge_wg);
return &bifrost_nir_options_v6;
case 9:
case 10:
return &bifrost_nir_options_v9;
return merge_wg ? &bifrost_nir_options_v9_merge_wg :
&bifrost_nir_options_v9;
case 11:
case 12:
case 13:
return &bifrost_nir_options_v11;
return merge_wg ? &bifrost_nir_options_v11_merge_wg :
&bifrost_nir_options_v11;
default:
assert(!"Unsupported arch");
return NULL;

View file

@ -21,7 +21,7 @@ bool pan_will_dump_shaders(unsigned arch);
bool pan_want_debug_info(unsigned arch);
const nir_shader_compiler_options *
pan_get_nir_shader_compiler_options(unsigned arch);
pan_get_nir_shader_compiler_options(unsigned arch, bool merge_wg);
void pan_preprocess_nir(nir_shader *nir, uint64_t gpu_id);
void pan_optimize_nir(nir_shader *nir, uint64_t gpu_id);

View file

@ -804,8 +804,10 @@ GENX(pan_blend_create_shader)(const struct pan_blend_state *state,
get_equation_str(rt_state, equation_str, sizeof(equation_str));
const nir_shader_compiler_options *compiler_options =
pan_get_nir_shader_compiler_options(PAN_ARCH, false);
nir_builder builder = nir_builder_init_simple_shader(
MESA_SHADER_FRAGMENT, pan_get_nir_shader_compiler_options(PAN_ARCH),
MESA_SHADER_FRAGMENT, compiler_options,
"pan_blend(rt=%d,fmt=%s,nr_samples=%d,%s=%s)", rt,
util_format_name(rt_state->format), rt_state->nr_samples,
state->logicop_enable ? "logicop" : "equation",

View file

@ -332,7 +332,7 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev)
nir_builder b = nir_builder_init_simple_shader(
MESA_SHADER_COMPUTE,
pan_get_nir_shader_compiler_options(
pan_arch(phys_dev->kmod.dev->props.gpu_id)),
pan_arch(phys_dev->kmod.dev->props.gpu_id), false),
"%s", "desc_copy");
/* We actually customize that at execution time to issue the

View file

@ -206,7 +206,7 @@ get_frame_shader(struct panvk_device *dev,
goto out;
const struct nir_shader_compiler_options *nir_options =
pan_get_nir_shader_compiler_options(PAN_ARCH);
pan_get_nir_shader_compiler_options(PAN_ARCH, false);
nir_shader *nir = GENX(pan_get_fb_shader)(&key->key, nir_options);
NIR_PASS(_, nir, nir_shader_instructions_pass, lower_instr,

View file

@ -345,7 +345,7 @@ panvk_get_nir_options(UNUSED struct vk_physical_device *vk_pdev,
{
struct panvk_physical_device *phys_dev = to_panvk_physical_device(vk_pdev);
return pan_get_nir_shader_compiler_options(
pan_arch(phys_dev->kmod.dev->props.gpu_id));
pan_arch(phys_dev->kmod.dev->props.gpu_id), false);
}
static struct spirv_to_nir_options