mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-31 09:50:08 +01:00
intel: switch to new subgroup size info
Reviewed-by: Iván Briano <ivan.briano@intel.com> Acked-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37258>
This commit is contained in:
parent
04d3b3bde5
commit
79d02047b8
9 changed files with 44 additions and 108 deletions
|
|
@ -36,8 +36,11 @@ blorp_compile_fs_brw(struct blorp_context *blorp, void *mem_ctx,
|
|||
brw_preprocess_nir(compiler, nir, &opts);
|
||||
nir_remove_dead_variables(nir, nir_var_shader_in, NULL);
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
if (is_fast_clear || use_repclear)
|
||||
nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_16;
|
||||
if (is_fast_clear || use_repclear) {
|
||||
nir->info.api_subgroup_size = 16;
|
||||
nir->info.max_subgroup_size = 16;
|
||||
nir->info.min_subgroup_size = 16;
|
||||
}
|
||||
|
||||
struct brw_wm_prog_key wm_key;
|
||||
memset(&wm_key, 0, sizeof(wm_key));
|
||||
|
|
|
|||
|
|
@ -1576,8 +1576,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
* data clear shaders.
|
||||
*/
|
||||
const unsigned reqd_dispatch_width = brw_required_dispatch_width(&nir->info);
|
||||
assert(reqd_dispatch_width == SUBGROUP_SIZE_VARYING ||
|
||||
reqd_dispatch_width == SUBGROUP_SIZE_REQUIRE_16);
|
||||
assert(reqd_dispatch_width == 0 || reqd_dispatch_width == 16);
|
||||
|
||||
/* Limit identified when first variant is compiled, see
|
||||
* brw_shader::limit_dispatch_width().
|
||||
|
|
@ -1750,7 +1749,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
|
||||
} else {
|
||||
if ((!has_spilled && dispatch_width_limit >= 16 && INTEL_SIMD(FS, 16)) ||
|
||||
reqd_dispatch_width == SUBGROUP_SIZE_REQUIRE_16) {
|
||||
reqd_dispatch_width == 16) {
|
||||
/* Try a SIMD16 compile */
|
||||
brw_shader_params shader_params = base_shader_params;
|
||||
shader_params.dispatch_width = 16;
|
||||
|
|
@ -1783,7 +1782,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
/* Currently, the compiler only supports SIMD32 on SNB+ */
|
||||
if (!has_spilled &&
|
||||
dispatch_width_limit >= 32 &&
|
||||
reqd_dispatch_width == SUBGROUP_SIZE_VARYING &&
|
||||
reqd_dispatch_width == 0 &&
|
||||
!simd16_failed && INTEL_SIMD(FS, 32) &&
|
||||
!prog_data->base.ray_queries) {
|
||||
/* Try a SIMD32 compile */
|
||||
|
|
@ -1818,7 +1817,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
|
||||
if (devinfo->ver >= 12 && !has_spilled &&
|
||||
max_polygons >= 2 && !key->coarse_pixel &&
|
||||
reqd_dispatch_width == SUBGROUP_SIZE_VARYING) {
|
||||
reqd_dispatch_width == 0) {
|
||||
|
||||
if (devinfo->ver >= 20 && max_polygons >= 4 &&
|
||||
dispatch_width_limit >= 32 &&
|
||||
|
|
@ -1890,7 +1889,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
/* When the caller compiles a repclear or fast clear shader, they
|
||||
* want SIMD16-only.
|
||||
*/
|
||||
if (reqd_dispatch_width == SUBGROUP_SIZE_REQUIRE_16)
|
||||
if (reqd_dispatch_width == 16)
|
||||
v8.reset();
|
||||
|
||||
brw_generator g(compiler, ¶ms->base, &prog_data->base,
|
||||
|
|
|
|||
|
|
@ -2426,12 +2426,11 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
|
|||
static unsigned
|
||||
get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
|
||||
{
|
||||
switch (info->subgroup_size) {
|
||||
case SUBGROUP_SIZE_API_CONSTANT:
|
||||
/* We have to use the global constant size. */
|
||||
return BRW_SUBGROUP_SIZE;
|
||||
|
||||
case SUBGROUP_SIZE_UNIFORM:
|
||||
if (info->api_subgroup_size) {
|
||||
/* We have to use the global/required constant size. */
|
||||
assert(info->api_subgroup_size >= 8 && info->api_subgroup_size <= 32);
|
||||
return info->api_subgroup_size;
|
||||
} else if (info->api_subgroup_size_draw_uniform) {
|
||||
/* It has to be uniform across all invocations but can vary per stage
|
||||
* if we want. This gives us a bit more freedom.
|
||||
*
|
||||
|
|
@ -2441,8 +2440,7 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
|
|||
* to be uniform across invocations.
|
||||
*/
|
||||
return max_subgroup_size;
|
||||
|
||||
case SUBGROUP_SIZE_VARYING:
|
||||
} else {
|
||||
/* The subgroup size is allowed to be fully varying. For geometry
|
||||
* stages, we know it's always 8 which is max_subgroup_size so we can
|
||||
* return that. For compute, brw_nir_apply_key is called once per
|
||||
|
|
@ -2454,25 +2452,7 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
|
|||
* size.
|
||||
*/
|
||||
return info->stage == MESA_SHADER_FRAGMENT ? 0 : max_subgroup_size;
|
||||
|
||||
case SUBGROUP_SIZE_REQUIRE_4:
|
||||
UNREACHABLE("Unsupported subgroup size type");
|
||||
|
||||
case SUBGROUP_SIZE_REQUIRE_8:
|
||||
case SUBGROUP_SIZE_REQUIRE_16:
|
||||
case SUBGROUP_SIZE_REQUIRE_32:
|
||||
/* These enum values are expressly chosen to be equal to the subgroup
|
||||
* size that they require.
|
||||
*/
|
||||
return info->subgroup_size;
|
||||
|
||||
case SUBGROUP_SIZE_FULL_SUBGROUPS:
|
||||
case SUBGROUP_SIZE_REQUIRE_64:
|
||||
case SUBGROUP_SIZE_REQUIRE_128:
|
||||
break;
|
||||
}
|
||||
|
||||
UNREACHABLE("Invalid subgroup size type");
|
||||
}
|
||||
|
||||
unsigned
|
||||
|
|
|
|||
|
|
@ -30,11 +30,8 @@
|
|||
unsigned
|
||||
brw_required_dispatch_width(const struct shader_info *info)
|
||||
{
|
||||
if ((int)info->subgroup_size >= (int)SUBGROUP_SIZE_REQUIRE_8) {
|
||||
/* These enum values are expressly chosen to be equal to the subgroup
|
||||
* size that they require.
|
||||
*/
|
||||
return (unsigned)info->subgroup_size;
|
||||
if (info->min_subgroup_size == info->max_subgroup_size) {
|
||||
return info->max_subgroup_size;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1683,12 +1683,11 @@ elk_nir_apply_sampler_key(nir_shader *nir,
|
|||
static unsigned
|
||||
get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
|
||||
{
|
||||
switch (info->subgroup_size) {
|
||||
case SUBGROUP_SIZE_API_CONSTANT:
|
||||
/* We have to use the global constant size. */
|
||||
return ELK_SUBGROUP_SIZE;
|
||||
|
||||
case SUBGROUP_SIZE_UNIFORM:
|
||||
if (info->api_subgroup_size) {
|
||||
/* We have to use the global/required constant size. */
|
||||
assert(info->api_subgroup_size >= 8 && info->api_subgroup_size <= 32);
|
||||
return info->api_subgroup_size;
|
||||
} else if (info->api_subgroup_size_draw_uniform) {
|
||||
/* It has to be uniform across all invocations but can vary per stage
|
||||
* if we want. This gives us a bit more freedom.
|
||||
*
|
||||
|
|
@ -1698,8 +1697,7 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
|
|||
* to be uniform across invocations.
|
||||
*/
|
||||
return max_subgroup_size;
|
||||
|
||||
case SUBGROUP_SIZE_VARYING:
|
||||
} else {
|
||||
/* The subgroup size is allowed to be fully varying. For geometry
|
||||
* stages, we know it's always 8 which is max_subgroup_size so we can
|
||||
* return that. For compute, elk_nir_apply_key is called once per
|
||||
|
|
@ -1711,27 +1709,7 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
|
|||
* size.
|
||||
*/
|
||||
return info->stage == MESA_SHADER_FRAGMENT ? 0 : max_subgroup_size;
|
||||
|
||||
case SUBGROUP_SIZE_REQUIRE_4:
|
||||
UNREACHABLE("Unsupported subgroup size type");
|
||||
|
||||
case SUBGROUP_SIZE_REQUIRE_8:
|
||||
case SUBGROUP_SIZE_REQUIRE_16:
|
||||
case SUBGROUP_SIZE_REQUIRE_32:
|
||||
assert(mesa_shader_stage_uses_workgroup(info->stage) ||
|
||||
(info->stage >= MESA_SHADER_RAYGEN && info->stage <= MESA_SHADER_CALLABLE));
|
||||
/* These enum values are expressly chosen to be equal to the subgroup
|
||||
* size that they require.
|
||||
*/
|
||||
return info->subgroup_size;
|
||||
|
||||
case SUBGROUP_SIZE_FULL_SUBGROUPS:
|
||||
case SUBGROUP_SIZE_REQUIRE_64:
|
||||
case SUBGROUP_SIZE_REQUIRE_128:
|
||||
break;
|
||||
}
|
||||
|
||||
UNREACHABLE("Invalid subgroup size type");
|
||||
}
|
||||
|
||||
unsigned
|
||||
|
|
|
|||
|
|
@ -30,12 +30,8 @@
|
|||
unsigned
|
||||
elk_required_dispatch_width(const struct shader_info *info)
|
||||
{
|
||||
if ((int)info->subgroup_size >= (int)SUBGROUP_SIZE_REQUIRE_8) {
|
||||
assert(mesa_shader_stage_uses_workgroup(info->stage));
|
||||
/* These enum values are expressly chosen to be equal to the subgroup
|
||||
* size that they require.
|
||||
*/
|
||||
return (unsigned)info->subgroup_size;
|
||||
if (info->min_subgroup_size == info->max_subgroup_size) {
|
||||
return info->max_subgroup_size;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -652,29 +652,21 @@ anv_fixup_subgroup_size(struct anv_instance *instance, struct shader_info *info)
|
|||
*/
|
||||
if (instance->assume_full_subgroups &&
|
||||
info->uses_wide_subgroup_intrinsics &&
|
||||
info->subgroup_size == SUBGROUP_SIZE_API_CONSTANT &&
|
||||
info->api_subgroup_size == BRW_SUBGROUP_SIZE &&
|
||||
local_size &&
|
||||
local_size % BRW_SUBGROUP_SIZE == 0)
|
||||
info->subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
|
||||
|
||||
/* If the client requests that we dispatch full subgroups but doesn't
|
||||
* allow us to pick a subgroup size, we have to smash it to the API
|
||||
* value of 32. Performance will likely be terrible in this case but
|
||||
* there's nothing we can do about that. The client should have chosen
|
||||
* a size.
|
||||
*/
|
||||
if (info->subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS)
|
||||
info->subgroup_size =
|
||||
instance->assume_full_subgroups != 0 ?
|
||||
instance->assume_full_subgroups : BRW_SUBGROUP_SIZE;
|
||||
local_size % BRW_SUBGROUP_SIZE == 0) {
|
||||
info->max_subgroup_size = BRW_SUBGROUP_SIZE;
|
||||
info->min_subgroup_size = BRW_SUBGROUP_SIZE;
|
||||
}
|
||||
|
||||
/* Cooperative matrix extension requires that all invocations in a subgroup
|
||||
* be active. As a result, when the application does not request a specific
|
||||
* subgroup size, we must use SIMD32.
|
||||
*/
|
||||
if (info->stage == MESA_SHADER_COMPUTE && info->cs.has_cooperative_matrix &&
|
||||
info->subgroup_size < SUBGROUP_SIZE_REQUIRE_8) {
|
||||
info->subgroup_size = BRW_SUBGROUP_SIZE;
|
||||
info->max_subgroup_size > info->min_subgroup_size) {
|
||||
info->api_subgroup_size = info->max_subgroup_size;
|
||||
info->min_subgroup_size = info->max_subgroup_size;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1244,7 +1236,7 @@ anv_shader_lower_nir(struct anv_device *device,
|
|||
if (nir->info.stage == MESA_SHADER_COMPUTE &&
|
||||
nir->info.cs.has_cooperative_matrix) {
|
||||
anv_fixup_subgroup_size(pdevice->instance, &nir->info);
|
||||
NIR_PASS(_, nir, brw_nir_lower_cmat, nir->info.subgroup_size);
|
||||
NIR_PASS(_, nir, brw_nir_lower_cmat, nir->info.api_subgroup_size);
|
||||
NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_function_temp, 16);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -360,10 +360,10 @@ anv_device_init_rt_shaders(struct anv_device *device)
|
|||
nir_shader *trampoline_nir =
|
||||
brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
|
||||
|
||||
if (device->info->ver >= 20)
|
||||
trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_16;
|
||||
else
|
||||
trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8;
|
||||
unsigned require_size = device->info->ver >= 20 ? 16 : 8;
|
||||
trampoline_nir->info.api_subgroup_size = require_size;
|
||||
trampoline_nir->info.max_subgroup_size = require_size;
|
||||
trampoline_nir->info.min_subgroup_size = require_size;
|
||||
|
||||
struct brw_cs_prog_data trampoline_prog_data = {
|
||||
.uses_btd_stack_ids = true,
|
||||
|
|
|
|||
|
|
@ -1516,21 +1516,12 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
|||
*/
|
||||
if (device->physical->instance->assume_full_subgroups &&
|
||||
stage.nir->info.uses_wide_subgroup_intrinsics &&
|
||||
stage.nir->info.subgroup_size == SUBGROUP_SIZE_API_CONSTANT &&
|
||||
stage.nir->info.api_subgroup_size == ELK_SUBGROUP_SIZE &&
|
||||
local_size &&
|
||||
local_size % ELK_SUBGROUP_SIZE == 0)
|
||||
stage.nir->info.subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
|
||||
|
||||
/* If the client requests that we dispatch full subgroups but doesn't
|
||||
* allow us to pick a subgroup size, we have to smash it to the API
|
||||
* value of 32. Performance will likely be terrible in this case but
|
||||
* there's nothing we can do about that. The client should have chosen
|
||||
* a size.
|
||||
*/
|
||||
if (stage.nir->info.subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS)
|
||||
stage.nir->info.subgroup_size =
|
||||
device->physical->instance->assume_full_subgroups != 0 ?
|
||||
device->physical->instance->assume_full_subgroups : ELK_SUBGROUP_SIZE;
|
||||
local_size % ELK_SUBGROUP_SIZE == 0) {
|
||||
stage.nir->info.max_subgroup_size = ELK_SUBGROUP_SIZE;
|
||||
stage.nir->info.min_subgroup_size = ELK_SUBGROUP_SIZE;
|
||||
}
|
||||
|
||||
stage.num_stats = 1;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue