diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 8fb3c3b4d75..1b7b243727e 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -535,6 +535,9 @@ public: const unsigned dispatch_width; /**< 8, 16 or 32 */ unsigned max_dispatch_width; + /* The API selected subgroup size */ + unsigned api_subgroup_size; /**< 0, 8, 16, 32 */ + struct shader_stats shader_stats; brw::fs_builder bld; diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index a443bcbe2a5..470936549e9 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -5326,7 +5326,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr * FS), bound the invocation to the dispatch size. */ fs_reg bound_invocation; - if (bld.dispatch_width() < bld.shader->nir->info.subgroup_size) { + if (api_subgroup_size == 0 || + bld.dispatch_width() < api_subgroup_size) { bound_invocation = bld.vgrf(BRW_REGISTER_TYPE_UD); bld.AND(bound_invocation, invocation, brw_imm_ud(dispatch_width - 1)); } else { diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index d62296676d0..79865657ea2 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -29,6 +29,7 @@ */ #include "brw_eu.h" #include "brw_fs.h" +#include "brw_nir.h" #include "compiler/glsl_types.h" using namespace brw; @@ -1362,9 +1363,14 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, performance_analysis(this), needs_register_pressure(needs_register_pressure), dispatch_width(dispatch_width), + api_subgroup_size(brw_nir_api_subgroup_size(shader, dispatch_width)), bld(fs_builder(this, dispatch_width).at_end()) { init(); + assert(api_subgroup_size == 0 || + api_subgroup_size == 8 || + api_subgroup_size == 16 || + api_subgroup_size == 32); } fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, @@ -1382,9 +1388,14 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, performance_analysis(this), needs_register_pressure(needs_register_pressure), dispatch_width(8), + api_subgroup_size(brw_nir_api_subgroup_size(shader, dispatch_width)), bld(fs_builder(this, dispatch_width).at_end()) { init(); + assert(api_subgroup_size == 0 || + api_subgroup_size == 8 || + api_subgroup_size == 16 || + api_subgroup_size == 32); } void diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 6c9feafc478..8df2c0ef563 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -1689,6 +1689,13 @@ get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size) unreachable("Invalid subgroup size type"); } +unsigned +brw_nir_api_subgroup_size(const nir_shader *nir, + unsigned hw_subgroup_size) +{ + return get_subgroup_size(&nir->info, hw_subgroup_size); +} + void brw_nir_apply_key(nir_shader *nir, const struct brw_compiler *compiler, diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 269731a642a..423b5588787 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -160,6 +160,9 @@ void brw_nir_apply_key(nir_shader *nir, unsigned max_subgroup_size, bool is_scalar); +unsigned brw_nir_api_subgroup_size(const nir_shader *nir, + unsigned hw_subgroup_size); + enum brw_conditional_mod brw_cmod_for_nir_comparison(nir_op op); enum lsc_opcode lsc_aop_for_nir_intrinsic(const nir_intrinsic_instr *atomic); enum brw_reg_type brw_type_for_nir_type(const struct intel_device_info *devinfo,