From 79d02047b88d59ea6cfea1688b656d88796ed32d Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Tue, 9 Sep 2025 18:24:08 +0200 Subject: [PATCH] intel: switch to new subgroup size info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Iván Briano Acked-by: Timur Kristóf Part-of: --- src/intel/blorp/blorp_brw.c | 7 ++-- src/intel/compiler/brw_compile_fs.cpp | 11 +++--- src/intel/compiler/brw_nir.c | 32 ++++------------- src/intel/compiler/brw_simd_selection.cpp | 7 ++-- src/intel/compiler/elk/elk_nir.c | 34 ++++--------------- src/intel/compiler/elk/elk_simd_selection.cpp | 8 ++--- src/intel/vulkan/anv_shader_compile.c | 26 +++++--------- src/intel/vulkan/anv_util.c | 8 ++--- src/intel/vulkan_hasvk/anv_pipeline.c | 19 +++-------- 9 files changed, 44 insertions(+), 108 deletions(-) diff --git a/src/intel/blorp/blorp_brw.c b/src/intel/blorp/blorp_brw.c index 94b1a009b16..653fcd47b40 100644 --- a/src/intel/blorp/blorp_brw.c +++ b/src/intel/blorp/blorp_brw.c @@ -36,8 +36,11 @@ blorp_compile_fs_brw(struct blorp_context *blorp, void *mem_ctx, brw_preprocess_nir(compiler, nir, &opts); nir_remove_dead_variables(nir, nir_var_shader_in, NULL); nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - if (is_fast_clear || use_repclear) - nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_16; + if (is_fast_clear || use_repclear) { + nir->info.api_subgroup_size = 16; + nir->info.max_subgroup_size = 16; + nir->info.min_subgroup_size = 16; + } struct brw_wm_prog_key wm_key; memset(&wm_key, 0, sizeof(wm_key)); diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp index c184225f546..2299899b5e8 100644 --- a/src/intel/compiler/brw_compile_fs.cpp +++ b/src/intel/compiler/brw_compile_fs.cpp @@ -1576,8 +1576,7 @@ brw_compile_fs(const struct brw_compiler *compiler, * data clear shaders. */ const unsigned reqd_dispatch_width = brw_required_dispatch_width(&nir->info); - assert(reqd_dispatch_width == SUBGROUP_SIZE_VARYING || - reqd_dispatch_width == SUBGROUP_SIZE_REQUIRE_16); + assert(reqd_dispatch_width == 0 || reqd_dispatch_width == 16); /* Limit identified when first variant is compiled, see * brw_shader::limit_dispatch_width(). @@ -1750,7 +1749,7 @@ brw_compile_fs(const struct brw_compiler *compiler, } else { if ((!has_spilled && dispatch_width_limit >= 16 && INTEL_SIMD(FS, 16)) || - reqd_dispatch_width == SUBGROUP_SIZE_REQUIRE_16) { + reqd_dispatch_width == 16) { /* Try a SIMD16 compile */ brw_shader_params shader_params = base_shader_params; shader_params.dispatch_width = 16; @@ -1783,7 +1782,7 @@ brw_compile_fs(const struct brw_compiler *compiler, /* Currently, the compiler only supports SIMD32 on SNB+ */ if (!has_spilled && dispatch_width_limit >= 32 && - reqd_dispatch_width == SUBGROUP_SIZE_VARYING && + reqd_dispatch_width == 0 && !simd16_failed && INTEL_SIMD(FS, 32) && !prog_data->base.ray_queries) { /* Try a SIMD32 compile */ @@ -1818,7 +1817,7 @@ brw_compile_fs(const struct brw_compiler *compiler, if (devinfo->ver >= 12 && !has_spilled && max_polygons >= 2 && !key->coarse_pixel && - reqd_dispatch_width == SUBGROUP_SIZE_VARYING) { + reqd_dispatch_width == 0) { if (devinfo->ver >= 20 && max_polygons >= 4 && dispatch_width_limit >= 32 && @@ -1890,7 +1889,7 @@ brw_compile_fs(const struct brw_compiler *compiler, /* When the caller compiles a repclear or fast clear shader, they * want SIMD16-only. */ - if (reqd_dispatch_width == SUBGROUP_SIZE_REQUIRE_16) + if (reqd_dispatch_width == 16) v8.reset(); brw_generator g(compiler, ¶ms->base, &prog_data->base, diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 9d495ffa3ca..0cd4e8dae88 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -2426,12 +2426,11 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, static unsigned get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size) { - switch (info->subgroup_size) { - case SUBGROUP_SIZE_API_CONSTANT: - /* We have to use the global constant size. */ - return BRW_SUBGROUP_SIZE; - - case SUBGROUP_SIZE_UNIFORM: + if (info->api_subgroup_size) { + /* We have to use the global/required constant size. */ + assert(info->api_subgroup_size >= 8 && info->api_subgroup_size <= 32); + return info->api_subgroup_size; + } else if (info->api_subgroup_size_draw_uniform) { /* It has to be uniform across all invocations but can vary per stage * if we want. This gives us a bit more freedom. * @@ -2441,8 +2440,7 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size) * to be uniform across invocations. */ return max_subgroup_size; - - case SUBGROUP_SIZE_VARYING: + } else { /* The subgroup size is allowed to be fully varying. For geometry * stages, we know it's always 8 which is max_subgroup_size so we can * return that. For compute, brw_nir_apply_key is called once per @@ -2454,25 +2452,7 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size) * size. */ return info->stage == MESA_SHADER_FRAGMENT ? 0 : max_subgroup_size; - - case SUBGROUP_SIZE_REQUIRE_4: - UNREACHABLE("Unsupported subgroup size type"); - - case SUBGROUP_SIZE_REQUIRE_8: - case SUBGROUP_SIZE_REQUIRE_16: - case SUBGROUP_SIZE_REQUIRE_32: - /* These enum values are expressly chosen to be equal to the subgroup - * size that they require. - */ - return info->subgroup_size; - - case SUBGROUP_SIZE_FULL_SUBGROUPS: - case SUBGROUP_SIZE_REQUIRE_64: - case SUBGROUP_SIZE_REQUIRE_128: - break; } - - UNREACHABLE("Invalid subgroup size type"); } unsigned diff --git a/src/intel/compiler/brw_simd_selection.cpp b/src/intel/compiler/brw_simd_selection.cpp index 7498353a51a..e9ae4d3ae95 100644 --- a/src/intel/compiler/brw_simd_selection.cpp +++ b/src/intel/compiler/brw_simd_selection.cpp @@ -30,11 +30,8 @@ unsigned brw_required_dispatch_width(const struct shader_info *info) { - if ((int)info->subgroup_size >= (int)SUBGROUP_SIZE_REQUIRE_8) { - /* These enum values are expressly chosen to be equal to the subgroup - * size that they require. - */ - return (unsigned)info->subgroup_size; + if (info->min_subgroup_size == info->max_subgroup_size) { + return info->max_subgroup_size; } else { return 0; } diff --git a/src/intel/compiler/elk/elk_nir.c b/src/intel/compiler/elk/elk_nir.c index 4eff336b3b1..a94fe9d90d9 100644 --- a/src/intel/compiler/elk/elk_nir.c +++ b/src/intel/compiler/elk/elk_nir.c @@ -1683,12 +1683,11 @@ elk_nir_apply_sampler_key(nir_shader *nir, static unsigned get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size) { - switch (info->subgroup_size) { - case SUBGROUP_SIZE_API_CONSTANT: - /* We have to use the global constant size. */ - return ELK_SUBGROUP_SIZE; - - case SUBGROUP_SIZE_UNIFORM: + if (info->api_subgroup_size) { + /* We have to use the global/required constant size. */ + assert(info->api_subgroup_size >= 8 && info->api_subgroup_size <= 32); + return info->api_subgroup_size; + } else if (info->api_subgroup_size_draw_uniform) { /* It has to be uniform across all invocations but can vary per stage * if we want. This gives us a bit more freedom. * @@ -1698,8 +1697,7 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size) * to be uniform across invocations. */ return max_subgroup_size; - - case SUBGROUP_SIZE_VARYING: + } else { /* The subgroup size is allowed to be fully varying. For geometry * stages, we know it's always 8 which is max_subgroup_size so we can * return that. For compute, elk_nir_apply_key is called once per @@ -1711,27 +1709,7 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size) * size. */ return info->stage == MESA_SHADER_FRAGMENT ? 0 : max_subgroup_size; - - case SUBGROUP_SIZE_REQUIRE_4: - UNREACHABLE("Unsupported subgroup size type"); - - case SUBGROUP_SIZE_REQUIRE_8: - case SUBGROUP_SIZE_REQUIRE_16: - case SUBGROUP_SIZE_REQUIRE_32: - assert(mesa_shader_stage_uses_workgroup(info->stage) || - (info->stage >= MESA_SHADER_RAYGEN && info->stage <= MESA_SHADER_CALLABLE)); - /* These enum values are expressly chosen to be equal to the subgroup - * size that they require. - */ - return info->subgroup_size; - - case SUBGROUP_SIZE_FULL_SUBGROUPS: - case SUBGROUP_SIZE_REQUIRE_64: - case SUBGROUP_SIZE_REQUIRE_128: - break; } - - UNREACHABLE("Invalid subgroup size type"); } unsigned diff --git a/src/intel/compiler/elk/elk_simd_selection.cpp b/src/intel/compiler/elk/elk_simd_selection.cpp index a418c48b7f8..f737171a8e0 100644 --- a/src/intel/compiler/elk/elk_simd_selection.cpp +++ b/src/intel/compiler/elk/elk_simd_selection.cpp @@ -30,12 +30,8 @@ unsigned elk_required_dispatch_width(const struct shader_info *info) { - if ((int)info->subgroup_size >= (int)SUBGROUP_SIZE_REQUIRE_8) { - assert(mesa_shader_stage_uses_workgroup(info->stage)); - /* These enum values are expressly chosen to be equal to the subgroup - * size that they require. - */ - return (unsigned)info->subgroup_size; + if (info->min_subgroup_size == info->max_subgroup_size) { + return info->max_subgroup_size; } else { return 0; } diff --git a/src/intel/vulkan/anv_shader_compile.c b/src/intel/vulkan/anv_shader_compile.c index b4c44b13055..e4b583a688f 100644 --- a/src/intel/vulkan/anv_shader_compile.c +++ b/src/intel/vulkan/anv_shader_compile.c @@ -652,29 +652,21 @@ anv_fixup_subgroup_size(struct anv_instance *instance, struct shader_info *info) */ if (instance->assume_full_subgroups && info->uses_wide_subgroup_intrinsics && - info->subgroup_size == SUBGROUP_SIZE_API_CONSTANT && + info->api_subgroup_size == BRW_SUBGROUP_SIZE && local_size && - local_size % BRW_SUBGROUP_SIZE == 0) - info->subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS; - - /* If the client requests that we dispatch full subgroups but doesn't - * allow us to pick a subgroup size, we have to smash it to the API - * value of 32. Performance will likely be terrible in this case but - * there's nothing we can do about that. The client should have chosen - * a size. - */ - if (info->subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS) - info->subgroup_size = - instance->assume_full_subgroups != 0 ? - instance->assume_full_subgroups : BRW_SUBGROUP_SIZE; + local_size % BRW_SUBGROUP_SIZE == 0) { + info->max_subgroup_size = BRW_SUBGROUP_SIZE; + info->min_subgroup_size = BRW_SUBGROUP_SIZE; + } /* Cooperative matrix extension requires that all invocations in a subgroup * be active. As a result, when the application does not request a specific * subgroup size, we must use SIMD32. */ if (info->stage == MESA_SHADER_COMPUTE && info->cs.has_cooperative_matrix && - info->subgroup_size < SUBGROUP_SIZE_REQUIRE_8) { - info->subgroup_size = BRW_SUBGROUP_SIZE; + info->max_subgroup_size > info->min_subgroup_size) { + info->api_subgroup_size = info->max_subgroup_size; + info->min_subgroup_size = info->max_subgroup_size; } } @@ -1244,7 +1236,7 @@ anv_shader_lower_nir(struct anv_device *device, if (nir->info.stage == MESA_SHADER_COMPUTE && nir->info.cs.has_cooperative_matrix) { anv_fixup_subgroup_size(pdevice->instance, &nir->info); - NIR_PASS(_, nir, brw_nir_lower_cmat, nir->info.subgroup_size); + NIR_PASS(_, nir, brw_nir_lower_cmat, nir->info.api_subgroup_size); NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_function_temp, 16); } diff --git a/src/intel/vulkan/anv_util.c b/src/intel/vulkan/anv_util.c index efe0d8909d3..7b85aff1951 100644 --- a/src/intel/vulkan/anv_util.c +++ b/src/intel/vulkan/anv_util.c @@ -360,10 +360,10 @@ anv_device_init_rt_shaders(struct anv_device *device) nir_shader *trampoline_nir = brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx); - if (device->info->ver >= 20) - trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_16; - else - trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8; + unsigned require_size = device->info->ver >= 20 ? 16 : 8; + trampoline_nir->info.api_subgroup_size = require_size; + trampoline_nir->info.max_subgroup_size = require_size; + trampoline_nir->info.min_subgroup_size = require_size; struct brw_cs_prog_data trampoline_prog_data = { .uses_btd_stack_ids = true, diff --git a/src/intel/vulkan_hasvk/anv_pipeline.c b/src/intel/vulkan_hasvk/anv_pipeline.c index aa3863962dd..50a2dc24486 100644 --- a/src/intel/vulkan_hasvk/anv_pipeline.c +++ b/src/intel/vulkan_hasvk/anv_pipeline.c @@ -1516,21 +1516,12 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, */ if (device->physical->instance->assume_full_subgroups && stage.nir->info.uses_wide_subgroup_intrinsics && - stage.nir->info.subgroup_size == SUBGROUP_SIZE_API_CONSTANT && + stage.nir->info.api_subgroup_size == ELK_SUBGROUP_SIZE && local_size && - local_size % ELK_SUBGROUP_SIZE == 0) - stage.nir->info.subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS; - - /* If the client requests that we dispatch full subgroups but doesn't - * allow us to pick a subgroup size, we have to smash it to the API - * value of 32. Performance will likely be terrible in this case but - * there's nothing we can do about that. The client should have chosen - * a size. - */ - if (stage.nir->info.subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS) - stage.nir->info.subgroup_size = - device->physical->instance->assume_full_subgroups != 0 ? - device->physical->instance->assume_full_subgroups : ELK_SUBGROUP_SIZE; + local_size % ELK_SUBGROUP_SIZE == 0) { + stage.nir->info.max_subgroup_size = ELK_SUBGROUP_SIZE; + stage.nir->info.min_subgroup_size = ELK_SUBGROUP_SIZE; + } stage.num_stats = 1;