intel/brw: Allow specifying a required subgroup size for fragment shaders.

On older hardware the "use_rep_send" compile parameter was being
implicitly used to request the compilation of the SIMD16 variant of
clear pixel shaders that require it due to hardware restrictions.

However starting on Gfx12+ this flag is never set since replicated
data clears are no longer supported, but BLORP still implicitly relies
on the SIMD16 variant being generated even though there's no way for
BLORP to explicitly request it.  This doesn't cause much of a problem
right now since brw_compile_fs() typically generates a SIMD16 kernel
unless the SIMD8 kernel spills or SIMD debugging flags are enabled,
but it won't work reliably on Xe3+ since we'll start using SIMD32 more
aggressively.

In order to avoid these issues use the standard required subgroup_size
parameter from shader_info to signal that the SIMD16 variant of the
shader is needed by the caller.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32664>
This commit is contained in:
Francisco Jerez 2024-09-18 14:24:41 -07:00 committed by Marge Bot
parent a736757275
commit 80b2355b39
3 changed files with 20 additions and 12 deletions

View file

@ -1576,6 +1576,14 @@ brw_compile_fs(const struct brw_compiler *compiler,
brw_nir_populate_wm_prog_data(nir, compiler->devinfo, key, prog_data,
params->mue_map);
/* Either an unrestricted or a fixed SIMD16 subgroup size are
* allowed -- The latter is needed for fast clear and replicated
* data clear shaders.
*/
const unsigned reqd_dispatch_width = brw_required_dispatch_width(&nir->info);
assert(reqd_dispatch_width == SUBGROUP_SIZE_VARYING ||
reqd_dispatch_width == SUBGROUP_SIZE_REQUIRE_16);
std::unique_ptr<fs_visitor> v8, v16, v32, vmulti;
cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL, *simd32_cfg = NULL,
*multi_cfg = NULL;
@ -1613,9 +1621,9 @@ brw_compile_fs(const struct brw_compiler *compiler,
" pixel shading.\n");
}
if (!has_spilled &&
(!v8 || v8->max_dispatch_width >= 16) &&
(INTEL_SIMD(FS, 16) || params->use_rep_send)) {
if ((!has_spilled && (!v8 || v8->max_dispatch_width >= 16) &&
INTEL_SIMD(FS, 16)) ||
reqd_dispatch_width == SUBGROUP_SIZE_REQUIRE_16) {
/* Try a SIMD16 compile */
v16 = std::make_unique<fs_visitor>(compiler, &params->base, key,
prog_data, nir, 16, 1,
@ -1645,9 +1653,9 @@ brw_compile_fs(const struct brw_compiler *compiler,
/* Currently, the compiler only supports SIMD32 on SNB+ */
if (!has_spilled &&
(!v8 || v8->max_dispatch_width >= 32) &&
(!v16 || v16->max_dispatch_width >= 32) && !params->use_rep_send &&
!simd16_failed &&
INTEL_SIMD(FS, 32)) {
(!v16 || v16->max_dispatch_width >= 32) &&
reqd_dispatch_width == SUBGROUP_SIZE_VARYING &&
!simd16_failed && INTEL_SIMD(FS, 32)) {
/* Try a SIMD32 compile */
v32 = std::make_unique<fs_visitor>(compiler, &params->base, key,
prog_data, nir, 32, 1,
@ -1680,7 +1688,8 @@ brw_compile_fs(const struct brw_compiler *compiler,
}
if (devinfo->ver >= 12 && !has_spilled &&
params->max_polygons >= 2 && !key->coarse_pixel) {
params->max_polygons >= 2 && !key->coarse_pixel &&
reqd_dispatch_width == SUBGROUP_SIZE_VARYING) {
fs_visitor *vbase = v8 ? v8.get() : v16 ? v16.get() : v32.get();
assert(vbase);
@ -1749,8 +1758,10 @@ brw_compile_fs(const struct brw_compiler *compiler,
}
}
/* When the caller requests a repclear shader, they want SIMD16-only */
if (params->use_rep_send)
/* When the caller compiles a repclear or fast clear shader, they
* want SIMD16-only.
*/
if (reqd_dispatch_width == SUBGROUP_SIZE_REQUIRE_16)
simd8_cfg = NULL;
brw_generator g(compiler, &params->base, &prog_data->base,

View file

@ -1999,8 +1999,6 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
case SUBGROUP_SIZE_REQUIRE_8:
case SUBGROUP_SIZE_REQUIRE_16:
case SUBGROUP_SIZE_REQUIRE_32:
assert(gl_shader_stage_uses_workgroup(info->stage) ||
(info->stage >= MESA_SHADER_RAYGEN && info->stage <= MESA_SHADER_CALLABLE));
/* These enum values are expressly chosen to be equal to the subgroup
* size that they require.
*/

View file

@ -31,7 +31,6 @@ unsigned
brw_required_dispatch_width(const struct shader_info *info)
{
if ((int)info->subgroup_size >= (int)SUBGROUP_SIZE_REQUIRE_8) {
assert(gl_shader_stage_uses_workgroup(info->stage));
/* These enum values are expressly chosen to be equal to the subgroup
* size that they require.
*/