mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-10 08:10:14 +01:00
radeonsi: lower subgroup ops after wave size is known
We use wave 32 sometime so should not use static subgroup size of 64. Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30610>
This commit is contained in:
parent
31dfb04fd3
commit
0f937426cc
6 changed files with 31 additions and 37 deletions
|
|
@ -17,12 +17,6 @@
|
|||
#include "vl/vl_video_buffer.h"
|
||||
#include <sys/utsname.h>
|
||||
|
||||
#if LLVM_AVAILABLE
|
||||
#include <llvm/Config/llvm-config.h> /* for LLVM_VERSION_MAJOR */
|
||||
#else
|
||||
#define LLVM_VERSION_MAJOR 0
|
||||
#endif
|
||||
|
||||
/* The capabilities reported by the kernel has priority
|
||||
over the existing logic in si_get_video_param */
|
||||
#define QUERYABLE_KERNEL (sscreen->info.is_amdgpu && \
|
||||
|
|
@ -1690,23 +1684,4 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
|
|||
options->support_indirect_outputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
|
||||
options->varying_expression_max_cost = si_varying_expression_max_cost;
|
||||
options->varying_estimate_instr_cost = si_varying_estimate_instr_cost;
|
||||
|
||||
nir_lower_subgroups_options *lower_subgroups_options = sscreen->nir_lower_subgroups_options;
|
||||
lower_subgroups_options->subgroup_size = 64;
|
||||
lower_subgroups_options->ballot_bit_size = 64;
|
||||
lower_subgroups_options->ballot_components = 1;
|
||||
lower_subgroups_options->lower_to_scalar = true;
|
||||
lower_subgroups_options->lower_subgroup_masks = true;
|
||||
lower_subgroups_options->lower_relative_shuffle = true;
|
||||
lower_subgroups_options->lower_rotate_to_shuffle = !sscreen->use_aco;
|
||||
lower_subgroups_options->lower_shuffle_to_32bit = true;
|
||||
lower_subgroups_options->lower_vote_eq = true;
|
||||
lower_subgroups_options->lower_vote_bool_eq = true;
|
||||
lower_subgroups_options->lower_quad_broadcast_dynamic = true;
|
||||
lower_subgroups_options->lower_quad_broadcast_dynamic_to_const = sscreen->info.gfx_level <= GFX7;
|
||||
lower_subgroups_options->lower_shuffle_to_swizzle_amd = true;
|
||||
lower_subgroups_options->lower_ballot_bit_count_to_mbcnt_amd = true;
|
||||
lower_subgroups_options->lower_inverse_ballot = !sscreen->use_aco && LLVM_VERSION_MAJOR < 17;
|
||||
lower_subgroups_options->lower_boolean_reduce = true;
|
||||
lower_subgroups_options->lower_boolean_shuffle = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1066,7 +1066,6 @@ static void si_destroy_screen(struct pipe_screen *pscreen)
|
|||
|
||||
sscreen->ws->destroy(sscreen->ws);
|
||||
FREE(sscreen->nir_options);
|
||||
FREE(sscreen->nir_lower_subgroups_options);
|
||||
FREE(sscreen);
|
||||
}
|
||||
|
||||
|
|
@ -1235,7 +1234,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
|||
sscreen->b.finalize_nir = si_finalize_nir;
|
||||
|
||||
sscreen->nir_options = CALLOC_STRUCT(nir_shader_compiler_options);
|
||||
sscreen->nir_lower_subgroups_options = CALLOC_STRUCT(nir_lower_subgroups_options);
|
||||
|
||||
si_init_screen_get_functions(sscreen);
|
||||
si_init_screen_buffer_functions(sscreen);
|
||||
|
|
@ -1272,7 +1270,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
|||
si_init_gs_info(sscreen);
|
||||
if (!si_init_shader_cache(sscreen)) {
|
||||
FREE(sscreen->nir_options);
|
||||
FREE(sscreen->nir_lower_subgroups_options);
|
||||
FREE(sscreen);
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -1329,7 +1326,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
|||
UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL)) {
|
||||
si_destroy_shader_cache(sscreen);
|
||||
FREE(sscreen->nir_options);
|
||||
FREE(sscreen->nir_lower_subgroups_options);
|
||||
FREE(sscreen);
|
||||
glsl_type_singleton_decref();
|
||||
return NULL;
|
||||
|
|
@ -1341,7 +1337,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
|||
UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL)) {
|
||||
si_destroy_shader_cache(sscreen);
|
||||
FREE(sscreen->nir_options);
|
||||
FREE(sscreen->nir_lower_subgroups_options);
|
||||
FREE(sscreen);
|
||||
glsl_type_singleton_decref();
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -527,7 +527,6 @@ struct si_screen {
|
|||
|
||||
struct radeon_info info;
|
||||
struct nir_shader_compiler_options *nir_options;
|
||||
struct nir_lower_subgroups_options *nir_lower_subgroups_options;
|
||||
uint64_t debug_flags;
|
||||
char renderer_string[183];
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,12 @@
|
|||
#include "util/ralloc.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
|
||||
#if LLVM_AVAILABLE
|
||||
#include <llvm/Config/llvm-config.h> /* for LLVM_VERSION_MAJOR */
|
||||
#else
|
||||
#define LLVM_VERSION_MAJOR 0
|
||||
#endif
|
||||
|
||||
static const char scratch_rsrc_dword0_symbol[] = "SCRATCH_RSRC_DWORD0";
|
||||
|
||||
static const char scratch_rsrc_dword1_symbol[] = "SCRATCH_RSRC_DWORD1";
|
||||
|
|
@ -1957,9 +1963,6 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
|
|||
NIR_PASS_V(nir, ac_nir_lower_ngg_gs, &options);
|
||||
}
|
||||
|
||||
/* may generate some subgroup op like ballot */
|
||||
NIR_PASS_V(nir, nir_lower_subgroups, sel->screen->nir_lower_subgroups_options);
|
||||
|
||||
/* may generate some vector output store */
|
||||
NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
|
||||
}
|
||||
|
|
@ -2480,6 +2483,30 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
|
|||
progress = true;
|
||||
}
|
||||
|
||||
assert(shader->wave_size == 32 || shader->wave_size == 64);
|
||||
|
||||
NIR_PASS(progress, nir, nir_lower_subgroups,
|
||||
&(struct nir_lower_subgroups_options) {
|
||||
.subgroup_size = shader->wave_size,
|
||||
.ballot_bit_size = shader->wave_size,
|
||||
.ballot_components = 1,
|
||||
.lower_to_scalar = true,
|
||||
.lower_subgroup_masks = true,
|
||||
.lower_relative_shuffle = true,
|
||||
.lower_rotate_to_shuffle = !sel->info.base.use_aco_amd,
|
||||
.lower_shuffle_to_32bit = true,
|
||||
.lower_vote_eq = true,
|
||||
.lower_vote_bool_eq = true,
|
||||
.lower_quad_broadcast_dynamic = true,
|
||||
.lower_quad_broadcast_dynamic_to_const = sel->screen->info.gfx_level <= GFX7,
|
||||
.lower_shuffle_to_swizzle_amd = true,
|
||||
.lower_ballot_bit_count_to_mbcnt_amd = true,
|
||||
.lower_inverse_ballot = !sel->info.base.use_aco_amd && LLVM_VERSION_MAJOR < 17,
|
||||
.lower_boolean_reduce = true,
|
||||
.lower_boolean_shuffle = true,
|
||||
});
|
||||
|
||||
NIR_PASS(progress, nir, nir_lower_pack);
|
||||
NIR_PASS(progress, nir, nir_lower_int64);
|
||||
NIR_PASS(progress, nir, nir_opt_idiv_const, 8);
|
||||
NIR_PASS(progress, nir, nir_lower_idiv,
|
||||
|
|
@ -3595,6 +3622,7 @@ nir_shader *si_get_prev_stage_nir_shader(struct si_shader *shader,
|
|||
*/
|
||||
prev_shader->key.ge.opt.kill_outputs = 0;
|
||||
prev_shader->is_monolithic = true;
|
||||
prev_shader->wave_size = shader->wave_size;
|
||||
|
||||
si_init_shader_args(prev_shader, args);
|
||||
|
||||
|
|
|
|||
|
|
@ -125,7 +125,6 @@ extern "C" {
|
|||
|
||||
struct nir_shader;
|
||||
struct nir_instr;
|
||||
struct nir_lower_subgroups_options;
|
||||
|
||||
#define SI_NUM_INTERP 32
|
||||
#define SI_MAX_ATTRIBS 16
|
||||
|
|
|
|||
|
|
@ -307,8 +307,6 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
|
|||
|
||||
NIR_PASS_V(nir, ac_nir_lower_sin_cos);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_subgroups, sscreen->nir_lower_subgroups_options);
|
||||
|
||||
/* Lower load constants to scalar and then clean up the mess */
|
||||
NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
|
||||
NIR_PASS_V(nir, nir_lower_var_copies);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue