mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 20:08:06 +02:00
radeonsi: lower subgroup ops after wave size is known
We use wave 32 sometime so should not use static subgroup size of 64. Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30610>
This commit is contained in:
parent
31dfb04fd3
commit
0f937426cc
6 changed files with 31 additions and 37 deletions
|
|
@ -17,12 +17,6 @@
|
||||||
#include "vl/vl_video_buffer.h"
|
#include "vl/vl_video_buffer.h"
|
||||||
#include <sys/utsname.h>
|
#include <sys/utsname.h>
|
||||||
|
|
||||||
#if LLVM_AVAILABLE
|
|
||||||
#include <llvm/Config/llvm-config.h> /* for LLVM_VERSION_MAJOR */
|
|
||||||
#else
|
|
||||||
#define LLVM_VERSION_MAJOR 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* The capabilities reported by the kernel has priority
|
/* The capabilities reported by the kernel has priority
|
||||||
over the existing logic in si_get_video_param */
|
over the existing logic in si_get_video_param */
|
||||||
#define QUERYABLE_KERNEL (sscreen->info.is_amdgpu && \
|
#define QUERYABLE_KERNEL (sscreen->info.is_amdgpu && \
|
||||||
|
|
@ -1690,23 +1684,4 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
|
||||||
options->support_indirect_outputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
|
options->support_indirect_outputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
|
||||||
options->varying_expression_max_cost = si_varying_expression_max_cost;
|
options->varying_expression_max_cost = si_varying_expression_max_cost;
|
||||||
options->varying_estimate_instr_cost = si_varying_estimate_instr_cost;
|
options->varying_estimate_instr_cost = si_varying_estimate_instr_cost;
|
||||||
|
|
||||||
nir_lower_subgroups_options *lower_subgroups_options = sscreen->nir_lower_subgroups_options;
|
|
||||||
lower_subgroups_options->subgroup_size = 64;
|
|
||||||
lower_subgroups_options->ballot_bit_size = 64;
|
|
||||||
lower_subgroups_options->ballot_components = 1;
|
|
||||||
lower_subgroups_options->lower_to_scalar = true;
|
|
||||||
lower_subgroups_options->lower_subgroup_masks = true;
|
|
||||||
lower_subgroups_options->lower_relative_shuffle = true;
|
|
||||||
lower_subgroups_options->lower_rotate_to_shuffle = !sscreen->use_aco;
|
|
||||||
lower_subgroups_options->lower_shuffle_to_32bit = true;
|
|
||||||
lower_subgroups_options->lower_vote_eq = true;
|
|
||||||
lower_subgroups_options->lower_vote_bool_eq = true;
|
|
||||||
lower_subgroups_options->lower_quad_broadcast_dynamic = true;
|
|
||||||
lower_subgroups_options->lower_quad_broadcast_dynamic_to_const = sscreen->info.gfx_level <= GFX7;
|
|
||||||
lower_subgroups_options->lower_shuffle_to_swizzle_amd = true;
|
|
||||||
lower_subgroups_options->lower_ballot_bit_count_to_mbcnt_amd = true;
|
|
||||||
lower_subgroups_options->lower_inverse_ballot = !sscreen->use_aco && LLVM_VERSION_MAJOR < 17;
|
|
||||||
lower_subgroups_options->lower_boolean_reduce = true;
|
|
||||||
lower_subgroups_options->lower_boolean_shuffle = true;
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1066,7 +1066,6 @@ static void si_destroy_screen(struct pipe_screen *pscreen)
|
||||||
|
|
||||||
sscreen->ws->destroy(sscreen->ws);
|
sscreen->ws->destroy(sscreen->ws);
|
||||||
FREE(sscreen->nir_options);
|
FREE(sscreen->nir_options);
|
||||||
FREE(sscreen->nir_lower_subgroups_options);
|
|
||||||
FREE(sscreen);
|
FREE(sscreen);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1235,7 +1234,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
||||||
sscreen->b.finalize_nir = si_finalize_nir;
|
sscreen->b.finalize_nir = si_finalize_nir;
|
||||||
|
|
||||||
sscreen->nir_options = CALLOC_STRUCT(nir_shader_compiler_options);
|
sscreen->nir_options = CALLOC_STRUCT(nir_shader_compiler_options);
|
||||||
sscreen->nir_lower_subgroups_options = CALLOC_STRUCT(nir_lower_subgroups_options);
|
|
||||||
|
|
||||||
si_init_screen_get_functions(sscreen);
|
si_init_screen_get_functions(sscreen);
|
||||||
si_init_screen_buffer_functions(sscreen);
|
si_init_screen_buffer_functions(sscreen);
|
||||||
|
|
@ -1272,7 +1270,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
||||||
si_init_gs_info(sscreen);
|
si_init_gs_info(sscreen);
|
||||||
if (!si_init_shader_cache(sscreen)) {
|
if (!si_init_shader_cache(sscreen)) {
|
||||||
FREE(sscreen->nir_options);
|
FREE(sscreen->nir_options);
|
||||||
FREE(sscreen->nir_lower_subgroups_options);
|
|
||||||
FREE(sscreen);
|
FREE(sscreen);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
@ -1329,7 +1326,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
||||||
UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL)) {
|
UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL)) {
|
||||||
si_destroy_shader_cache(sscreen);
|
si_destroy_shader_cache(sscreen);
|
||||||
FREE(sscreen->nir_options);
|
FREE(sscreen->nir_options);
|
||||||
FREE(sscreen->nir_lower_subgroups_options);
|
|
||||||
FREE(sscreen);
|
FREE(sscreen);
|
||||||
glsl_type_singleton_decref();
|
glsl_type_singleton_decref();
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
@ -1341,7 +1337,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
||||||
UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL)) {
|
UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL)) {
|
||||||
si_destroy_shader_cache(sscreen);
|
si_destroy_shader_cache(sscreen);
|
||||||
FREE(sscreen->nir_options);
|
FREE(sscreen->nir_options);
|
||||||
FREE(sscreen->nir_lower_subgroups_options);
|
|
||||||
FREE(sscreen);
|
FREE(sscreen);
|
||||||
glsl_type_singleton_decref();
|
glsl_type_singleton_decref();
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
||||||
|
|
@ -527,7 +527,6 @@ struct si_screen {
|
||||||
|
|
||||||
struct radeon_info info;
|
struct radeon_info info;
|
||||||
struct nir_shader_compiler_options *nir_options;
|
struct nir_shader_compiler_options *nir_options;
|
||||||
struct nir_lower_subgroups_options *nir_lower_subgroups_options;
|
|
||||||
uint64_t debug_flags;
|
uint64_t debug_flags;
|
||||||
char renderer_string[183];
|
char renderer_string[183];
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,12 @@
|
||||||
#include "util/ralloc.h"
|
#include "util/ralloc.h"
|
||||||
#include "util/u_upload_mgr.h"
|
#include "util/u_upload_mgr.h"
|
||||||
|
|
||||||
|
#if LLVM_AVAILABLE
|
||||||
|
#include <llvm/Config/llvm-config.h> /* for LLVM_VERSION_MAJOR */
|
||||||
|
#else
|
||||||
|
#define LLVM_VERSION_MAJOR 0
|
||||||
|
#endif
|
||||||
|
|
||||||
static const char scratch_rsrc_dword0_symbol[] = "SCRATCH_RSRC_DWORD0";
|
static const char scratch_rsrc_dword0_symbol[] = "SCRATCH_RSRC_DWORD0";
|
||||||
|
|
||||||
static const char scratch_rsrc_dword1_symbol[] = "SCRATCH_RSRC_DWORD1";
|
static const char scratch_rsrc_dword1_symbol[] = "SCRATCH_RSRC_DWORD1";
|
||||||
|
|
@ -1957,9 +1963,6 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
|
||||||
NIR_PASS_V(nir, ac_nir_lower_ngg_gs, &options);
|
NIR_PASS_V(nir, ac_nir_lower_ngg_gs, &options);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* may generate some subgroup op like ballot */
|
|
||||||
NIR_PASS_V(nir, nir_lower_subgroups, sel->screen->nir_lower_subgroups_options);
|
|
||||||
|
|
||||||
/* may generate some vector output store */
|
/* may generate some vector output store */
|
||||||
NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
|
NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
|
||||||
}
|
}
|
||||||
|
|
@ -2480,6 +2483,30 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
|
||||||
progress = true;
|
progress = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(shader->wave_size == 32 || shader->wave_size == 64);
|
||||||
|
|
||||||
|
NIR_PASS(progress, nir, nir_lower_subgroups,
|
||||||
|
&(struct nir_lower_subgroups_options) {
|
||||||
|
.subgroup_size = shader->wave_size,
|
||||||
|
.ballot_bit_size = shader->wave_size,
|
||||||
|
.ballot_components = 1,
|
||||||
|
.lower_to_scalar = true,
|
||||||
|
.lower_subgroup_masks = true,
|
||||||
|
.lower_relative_shuffle = true,
|
||||||
|
.lower_rotate_to_shuffle = !sel->info.base.use_aco_amd,
|
||||||
|
.lower_shuffle_to_32bit = true,
|
||||||
|
.lower_vote_eq = true,
|
||||||
|
.lower_vote_bool_eq = true,
|
||||||
|
.lower_quad_broadcast_dynamic = true,
|
||||||
|
.lower_quad_broadcast_dynamic_to_const = sel->screen->info.gfx_level <= GFX7,
|
||||||
|
.lower_shuffle_to_swizzle_amd = true,
|
||||||
|
.lower_ballot_bit_count_to_mbcnt_amd = true,
|
||||||
|
.lower_inverse_ballot = !sel->info.base.use_aco_amd && LLVM_VERSION_MAJOR < 17,
|
||||||
|
.lower_boolean_reduce = true,
|
||||||
|
.lower_boolean_shuffle = true,
|
||||||
|
});
|
||||||
|
|
||||||
|
NIR_PASS(progress, nir, nir_lower_pack);
|
||||||
NIR_PASS(progress, nir, nir_lower_int64);
|
NIR_PASS(progress, nir, nir_lower_int64);
|
||||||
NIR_PASS(progress, nir, nir_opt_idiv_const, 8);
|
NIR_PASS(progress, nir, nir_opt_idiv_const, 8);
|
||||||
NIR_PASS(progress, nir, nir_lower_idiv,
|
NIR_PASS(progress, nir, nir_lower_idiv,
|
||||||
|
|
@ -3595,6 +3622,7 @@ nir_shader *si_get_prev_stage_nir_shader(struct si_shader *shader,
|
||||||
*/
|
*/
|
||||||
prev_shader->key.ge.opt.kill_outputs = 0;
|
prev_shader->key.ge.opt.kill_outputs = 0;
|
||||||
prev_shader->is_monolithic = true;
|
prev_shader->is_monolithic = true;
|
||||||
|
prev_shader->wave_size = shader->wave_size;
|
||||||
|
|
||||||
si_init_shader_args(prev_shader, args);
|
si_init_shader_args(prev_shader, args);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -125,7 +125,6 @@ extern "C" {
|
||||||
|
|
||||||
struct nir_shader;
|
struct nir_shader;
|
||||||
struct nir_instr;
|
struct nir_instr;
|
||||||
struct nir_lower_subgroups_options;
|
|
||||||
|
|
||||||
#define SI_NUM_INTERP 32
|
#define SI_NUM_INTERP 32
|
||||||
#define SI_MAX_ATTRIBS 16
|
#define SI_MAX_ATTRIBS 16
|
||||||
|
|
|
||||||
|
|
@ -307,8 +307,6 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
|
||||||
|
|
||||||
NIR_PASS_V(nir, ac_nir_lower_sin_cos);
|
NIR_PASS_V(nir, ac_nir_lower_sin_cos);
|
||||||
|
|
||||||
NIR_PASS_V(nir, nir_lower_subgroups, sscreen->nir_lower_subgroups_options);
|
|
||||||
|
|
||||||
/* Lower load constants to scalar and then clean up the mess */
|
/* Lower load constants to scalar and then clean up the mess */
|
||||||
NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
|
NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
|
||||||
NIR_PASS_V(nir, nir_lower_var_copies);
|
NIR_PASS_V(nir, nir_lower_var_copies);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue