From 80b2355b394be8bfca62bd127ef5d11088a7fb19 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 18 Sep 2024 14:24:41 -0700 Subject: [PATCH] intel/brw: Allow specifying a required subgroup size for fragment shaders. On older hardware the "use_rep_send" compile parameter was being implicitly used to request the compilation of the SIMD16 variant of clear pixel shaders that require it due to hardware restrictions. However starting on Gfx12+ this flag is never set since replicated data clears are no longer supported, but BLORP still implicitly relies on the SIMD16 variant being generated even though there's no way for BLORP to explicitly request it. This doesn't cause much of a problem right now since brw_compile_fs() typically generates a SIMD16 kernel unless the SIMD8 kernel spills or SIMD debugging flags are enabled, but it won't work reliably on Xe3+ since we'll start using SIMD32 more aggressively. In order to avoid these issues use the standard required subgroup_size parameter from shader_info to signal that the SIMD16 variant of the shader is needed by the caller. Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/compiler/brw_compile_fs.cpp | 29 ++++++++++++++++------- src/intel/compiler/brw_nir.c | 2 -- src/intel/compiler/brw_simd_selection.cpp | 1 - 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp index bd923b03350..65939b69316 100644 --- a/src/intel/compiler/brw_compile_fs.cpp +++ b/src/intel/compiler/brw_compile_fs.cpp @@ -1576,6 +1576,14 @@ brw_compile_fs(const struct brw_compiler *compiler, brw_nir_populate_wm_prog_data(nir, compiler->devinfo, key, prog_data, params->mue_map); + /* Either an unrestricted or a fixed SIMD16 subgroup size are + * allowed -- The latter is needed for fast clear and replicated + * data clear shaders. + */ + const unsigned reqd_dispatch_width = brw_required_dispatch_width(&nir->info); + assert(reqd_dispatch_width == SUBGROUP_SIZE_VARYING || + reqd_dispatch_width == SUBGROUP_SIZE_REQUIRE_16); + std::unique_ptr v8, v16, v32, vmulti; cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL, *simd32_cfg = NULL, *multi_cfg = NULL; @@ -1613,9 +1621,9 @@ brw_compile_fs(const struct brw_compiler *compiler, " pixel shading.\n"); } - if (!has_spilled && - (!v8 || v8->max_dispatch_width >= 16) && - (INTEL_SIMD(FS, 16) || params->use_rep_send)) { + if ((!has_spilled && (!v8 || v8->max_dispatch_width >= 16) && + INTEL_SIMD(FS, 16)) || + reqd_dispatch_width == SUBGROUP_SIZE_REQUIRE_16) { /* Try a SIMD16 compile */ v16 = std::make_unique(compiler, ¶ms->base, key, prog_data, nir, 16, 1, @@ -1645,9 +1653,9 @@ brw_compile_fs(const struct brw_compiler *compiler, /* Currently, the compiler only supports SIMD32 on SNB+ */ if (!has_spilled && (!v8 || v8->max_dispatch_width >= 32) && - (!v16 || v16->max_dispatch_width >= 32) && !params->use_rep_send && - !simd16_failed && - INTEL_SIMD(FS, 32)) { + (!v16 || v16->max_dispatch_width >= 32) && + reqd_dispatch_width == SUBGROUP_SIZE_VARYING && + !simd16_failed && INTEL_SIMD(FS, 32)) { /* Try a SIMD32 compile */ v32 = std::make_unique(compiler, ¶ms->base, key, prog_data, nir, 32, 1, @@ -1680,7 +1688,8 @@ brw_compile_fs(const struct brw_compiler *compiler, } if (devinfo->ver >= 12 && !has_spilled && - params->max_polygons >= 2 && !key->coarse_pixel) { + params->max_polygons >= 2 && !key->coarse_pixel && + reqd_dispatch_width == SUBGROUP_SIZE_VARYING) { fs_visitor *vbase = v8 ? v8.get() : v16 ? v16.get() : v32.get(); assert(vbase); @@ -1749,8 +1758,10 @@ brw_compile_fs(const struct brw_compiler *compiler, } } - /* When the caller requests a repclear shader, they want SIMD16-only */ - if (params->use_rep_send) + /* When the caller compiles a repclear or fast clear shader, they + * want SIMD16-only. + */ + if (reqd_dispatch_width == SUBGROUP_SIZE_REQUIRE_16) simd8_cfg = NULL; brw_generator g(compiler, ¶ms->base, &prog_data->base, diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 77b20ce95cb..987ff15666e 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -1999,8 +1999,6 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size) case SUBGROUP_SIZE_REQUIRE_8: case SUBGROUP_SIZE_REQUIRE_16: case SUBGROUP_SIZE_REQUIRE_32: - assert(gl_shader_stage_uses_workgroup(info->stage) || - (info->stage >= MESA_SHADER_RAYGEN && info->stage <= MESA_SHADER_CALLABLE)); /* These enum values are expressly chosen to be equal to the subgroup * size that they require. */ diff --git a/src/intel/compiler/brw_simd_selection.cpp b/src/intel/compiler/brw_simd_selection.cpp index 86ed1c28e84..907b38c3f12 100644 --- a/src/intel/compiler/brw_simd_selection.cpp +++ b/src/intel/compiler/brw_simd_selection.cpp @@ -31,7 +31,6 @@ unsigned brw_required_dispatch_width(const struct shader_info *info) { if ((int)info->subgroup_size >= (int)SUBGROUP_SIZE_REQUIRE_8) { - assert(gl_shader_stage_uses_workgroup(info->stage)); /* These enum values are expressly chosen to be equal to the subgroup * size that they require. */