diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index fb78e8d5a46..28555157059 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -17,12 +17,6 @@ #include "vl/vl_video_buffer.h" #include -#if LLVM_AVAILABLE -#include /* for LLVM_VERSION_MAJOR */ -#else -#define LLVM_VERSION_MAJOR 0 -#endif - /* The capabilities reported by the kernel has priority over the existing logic in si_get_video_param */ #define QUERYABLE_KERNEL (sscreen->info.is_amdgpu && \ @@ -1690,23 +1684,4 @@ void si_init_screen_get_functions(struct si_screen *sscreen) options->support_indirect_outputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL); options->varying_expression_max_cost = si_varying_expression_max_cost; options->varying_estimate_instr_cost = si_varying_estimate_instr_cost; - - nir_lower_subgroups_options *lower_subgroups_options = sscreen->nir_lower_subgroups_options; - lower_subgroups_options->subgroup_size = 64; - lower_subgroups_options->ballot_bit_size = 64; - lower_subgroups_options->ballot_components = 1; - lower_subgroups_options->lower_to_scalar = true; - lower_subgroups_options->lower_subgroup_masks = true; - lower_subgroups_options->lower_relative_shuffle = true; - lower_subgroups_options->lower_rotate_to_shuffle = !sscreen->use_aco; - lower_subgroups_options->lower_shuffle_to_32bit = true; - lower_subgroups_options->lower_vote_eq = true; - lower_subgroups_options->lower_vote_bool_eq = true; - lower_subgroups_options->lower_quad_broadcast_dynamic = true; - lower_subgroups_options->lower_quad_broadcast_dynamic_to_const = sscreen->info.gfx_level <= GFX7; - lower_subgroups_options->lower_shuffle_to_swizzle_amd = true; - lower_subgroups_options->lower_ballot_bit_count_to_mbcnt_amd = true; - lower_subgroups_options->lower_inverse_ballot = !sscreen->use_aco && LLVM_VERSION_MAJOR < 17; - lower_subgroups_options->lower_boolean_reduce = true; - lower_subgroups_options->lower_boolean_shuffle = true; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 19149dc2c35..33b68d05edc 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1066,7 +1066,6 @@ static void si_destroy_screen(struct pipe_screen *pscreen) sscreen->ws->destroy(sscreen->ws); FREE(sscreen->nir_options); - FREE(sscreen->nir_lower_subgroups_options); FREE(sscreen); } @@ -1235,7 +1234,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, sscreen->b.finalize_nir = si_finalize_nir; sscreen->nir_options = CALLOC_STRUCT(nir_shader_compiler_options); - sscreen->nir_lower_subgroups_options = CALLOC_STRUCT(nir_lower_subgroups_options); si_init_screen_get_functions(sscreen); si_init_screen_buffer_functions(sscreen); @@ -1272,7 +1270,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, si_init_gs_info(sscreen); if (!si_init_shader_cache(sscreen)) { FREE(sscreen->nir_options); - FREE(sscreen->nir_lower_subgroups_options); FREE(sscreen); return NULL; } @@ -1329,7 +1326,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL)) { si_destroy_shader_cache(sscreen); FREE(sscreen->nir_options); - FREE(sscreen->nir_lower_subgroups_options); FREE(sscreen); glsl_type_singleton_decref(); return NULL; @@ -1341,7 +1337,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL)) { si_destroy_shader_cache(sscreen); FREE(sscreen->nir_options); - FREE(sscreen->nir_lower_subgroups_options); FREE(sscreen); glsl_type_singleton_decref(); return NULL; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index a8d5fe80f82..abcc6236f6e 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -527,7 +527,6 @@ struct si_screen { struct radeon_info info; struct nir_shader_compiler_options *nir_options; - struct nir_lower_subgroups_options *nir_lower_subgroups_options; uint64_t debug_flags; char renderer_string[183]; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 9bac34f6ba3..30d85bc8e15 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -20,6 +20,12 @@ #include "util/ralloc.h" #include "util/u_upload_mgr.h" +#if LLVM_AVAILABLE +#include /* for LLVM_VERSION_MAJOR */ +#else +#define LLVM_VERSION_MAJOR 0 +#endif + static const char scratch_rsrc_dword0_symbol[] = "SCRATCH_RSRC_DWORD0"; static const char scratch_rsrc_dword1_symbol[] = "SCRATCH_RSRC_DWORD1"; @@ -1957,9 +1963,6 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir) NIR_PASS_V(nir, ac_nir_lower_ngg_gs, &options); } - /* may generate some subgroup op like ballot */ - NIR_PASS_V(nir, nir_lower_subgroups, sel->screen->nir_lower_subgroups_options); - /* may generate some vector output store */ NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL); } @@ -2480,6 +2483,30 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, progress = true; } + assert(shader->wave_size == 32 || shader->wave_size == 64); + + NIR_PASS(progress, nir, nir_lower_subgroups, + &(struct nir_lower_subgroups_options) { + .subgroup_size = shader->wave_size, + .ballot_bit_size = shader->wave_size, + .ballot_components = 1, + .lower_to_scalar = true, + .lower_subgroup_masks = true, + .lower_relative_shuffle = true, + .lower_rotate_to_shuffle = !sel->info.base.use_aco_amd, + .lower_shuffle_to_32bit = true, + .lower_vote_eq = true, + .lower_vote_bool_eq = true, + .lower_quad_broadcast_dynamic = true, + .lower_quad_broadcast_dynamic_to_const = sel->screen->info.gfx_level <= GFX7, + .lower_shuffle_to_swizzle_amd = true, + .lower_ballot_bit_count_to_mbcnt_amd = true, + .lower_inverse_ballot = !sel->info.base.use_aco_amd && LLVM_VERSION_MAJOR < 17, + .lower_boolean_reduce = true, + .lower_boolean_shuffle = true, + }); + + NIR_PASS(progress, nir, nir_lower_pack); NIR_PASS(progress, nir, nir_lower_int64); NIR_PASS(progress, nir, nir_opt_idiv_const, 8); NIR_PASS(progress, nir, nir_lower_idiv, @@ -3595,6 +3622,7 @@ nir_shader *si_get_prev_stage_nir_shader(struct si_shader *shader, */ prev_shader->key.ge.opt.kill_outputs = 0; prev_shader->is_monolithic = true; + prev_shader->wave_size = shader->wave_size; si_init_shader_args(prev_shader, args); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 634c2189497..3fca476cceb 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -125,7 +125,6 @@ extern "C" { struct nir_shader; struct nir_instr; -struct nir_lower_subgroups_options; #define SI_NUM_INTERP 32 #define SI_MAX_ATTRIBS 16 diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 471e4555c95..3c234e59693 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -307,8 +307,6 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) NIR_PASS_V(nir, ac_nir_lower_sin_cos); - NIR_PASS_V(nir, nir_lower_subgroups, sscreen->nir_lower_subgroups_options); - /* Lower load constants to scalar and then clean up the mess */ NIR_PASS_V(nir, nir_lower_load_const_to_scalar); NIR_PASS_V(nir, nir_lower_var_copies);