r600/sfn: implement finalize_nir

Move shader key independent lowering and some optimizations
to finalize_nir.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19708>
This commit is contained in:
Gert Wollny 2022-11-09 18:48:21 +01:00 committed by Marge Bot
parent ec1cdc13d5
commit a81c50a214
3 changed files with 70 additions and 50 deletions

View file

@ -684,6 +684,9 @@ static struct pipe_resource *r600_resource_create(struct pipe_screen *screen,
return r600_resource_create_common(screen, templ);
}
char *
r600_finalize_nir(struct pipe_screen *screen, void *shader);
struct pipe_screen *r600_screen_create(struct radeon_winsys *ws,
const struct pipe_screen_config *config)
{
@ -700,6 +703,9 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws,
rscreen->b.b.get_shader_param = r600_get_shader_param;
rscreen->b.b.resource_create = r600_resource_create;
if (is_nir_enabled(&rscreen->b))
rscreen->b.b.finalize_nir = r600_finalize_nir;
if (!r600_common_screen_init(&rscreen->b, ws)) {
FREE(rscreen);
return NULL;

View file

@ -727,11 +727,49 @@ public:
~MallocPoolRelease() { r600::release_pool(); }
};
extern "C" char *
r600_finalize_nir(pipe_screen *screen, void *shader)
{
r600_screen *rs = (r600_screen *)screen;
nir_shader *nir = (nir_shader *)shader;
NIR_PASS_V(nir, nir_lower_regs_to_ssa);
nir_lower_idiv_options idiv_options = {0};
NIR_PASS_V(nir, nir_lower_idiv, &idiv_options);
NIR_PASS_V(nir, r600_nir_lower_trigen, rs->b.gfx_level);
NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
NIR_PASS_V(nir, nir_lower_undef_to_zero);
struct nir_lower_tex_options lower_tex_options = {0};
lower_tex_options.lower_txp = ~0u;
lower_tex_options.lower_txf_offset = true;
lower_tex_options.lower_invalid_implicit_lod = true;
lower_tex_options.lower_tg4_offsets = true;
NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
NIR_PASS_V(nir, r600_nir_lower_txl_txf_array_or_cube);
NIR_PASS_V(nir, r600_nir_lower_cube_to_2darray);
NIR_PASS_V(nir, r600_nir_lower_pack_unpack_2x16);
NIR_PASS_V(nir, r600_lower_shared_io);
NIR_PASS_V(nir, r600_nir_lower_atomics);
while (optimize_once(nir))
;
return NULL;
}
int
r600_shader_from_nir(struct r600_context *rctx,
struct r600_pipe_shader *pipeshader,
r600_shader_key *key)
{
MallocPoolRelease pool_release;
struct r600_pipe_shader_selector *sel = pipeshader->selector;
@ -748,78 +786,51 @@ r600_shader_from_nir(struct r600_context *rctx,
fprintf(stderr, "END PRE-OPT-NIR--------------------------------------\n\n");
}
auto sh = nir_shader_clone(sel->nir, sel->nir);
r600::sort_uniforms(sel->nir);
/* Cayman seems very crashy about accessing images that don't exists or are
* accessed out of range, this lowering seems to help (but it can also be
* another problem */
NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
nir_lower_idiv_options idiv_options = {0};
idiv_options.allow_fp16 = true;
NIR_PASS_V(sel->nir, nir_lower_idiv, &idiv_options);
NIR_PASS_V(sel->nir, r600_nir_lower_trigen, rctx->b.gfx_level);
NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false);
NIR_PASS_V(sel->nir, nir_lower_undef_to_zero);
if (lower_64bit)
NIR_PASS_V(sel->nir, nir_lower_int64);
while (optimize_once(sel->nir))
while (optimize_once(sh))
;
NIR_PASS_V(sel->nir, r600_lower_shared_io);
NIR_PASS_V(sel->nir, r600_nir_lower_atomics);
struct nir_lower_tex_options lower_tex_options = {0};
lower_tex_options.lower_txp = ~0u;
lower_tex_options.lower_txf_offset = true;
lower_tex_options.lower_invalid_implicit_lod = true;
lower_tex_options.lower_tg4_offsets = true;
if (sh->info.stage == MESA_SHADER_VERTEX)
NIR_PASS_V(sh, r600_vectorize_vs_inputs);
NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
NIR_PASS_V(sel->nir, r600_nir_lower_txl_txf_array_or_cube);
NIR_PASS_V(sel->nir, r600_nir_lower_cube_to_2darray);
NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16);
if (sel->nir->info.stage == MESA_SHADER_VERTEX)
NIR_PASS_V(sel->nir, r600_vectorize_vs_inputs);
if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
NIR_PASS_V(sel->nir, nir_lower_fragcoord_wtrans);
NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
if (sh->info.stage == MESA_SHADER_FRAGMENT) {
NIR_PASS_V(sh, nir_lower_fragcoord_wtrans);
NIR_PASS_V(sh, r600_lower_fs_out_to_vector);
}
nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in | nir_var_shader_out;
NIR_PASS_V(sel->nir, nir_opt_combine_stores, nir_var_shader_out);
NIR_PASS_V(sel->nir,
NIR_PASS_V(sh, nir_opt_combine_stores, nir_var_shader_out);
NIR_PASS_V(sh,
nir_lower_io,
io_modes,
r600_glsl_type_size,
nir_lower_io_lower_64bit_to_32);
if (sel->nir->info.stage == MESA_SHADER_FRAGMENT)
NIR_PASS_V(sel->nir, r600_lower_fs_pos_input);
if (sh->info.stage == MESA_SHADER_FRAGMENT)
NIR_PASS_V(sh, r600_lower_fs_pos_input);
/**/
if (lower_64bit)
NIR_PASS_V(sel->nir, nir_lower_indirect_derefs, nir_var_function_temp, 10);
NIR_PASS_V(sh, nir_lower_indirect_derefs, nir_var_function_temp, 10);
NIR_PASS_V(sel->nir, nir_opt_constant_folding);
NIR_PASS_V(sel->nir, nir_io_add_const_offset_to_base, io_modes);
NIR_PASS_V(sh, nir_opt_constant_folding);
NIR_PASS_V(sh, nir_io_add_const_offset_to_base, io_modes);
NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false);
NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
NIR_PASS_V(sh, nir_lower_phis_to_scalar, false);
if (lower_64bit)
NIR_PASS_V(sel->nir, r600::r600_nir_split_64bit_io);
NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false);
NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
NIR_PASS_V(sel->nir, nir_copy_prop);
NIR_PASS_V(sel->nir, nir_opt_dce);
NIR_PASS_V(sh, r600::r600_nir_split_64bit_io);
NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
NIR_PASS_V(sh, nir_lower_phis_to_scalar, false);
NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
NIR_PASS_V(sh, nir_copy_prop);
NIR_PASS_V(sh, nir_opt_dce);
auto sh = nir_shader_clone(sel->nir, sel->nir);
if (r600_is_last_vertex_stage(sh, *key))
r600_lower_clipvertex_to_clipdist(sh, sel->so);

View file

@ -132,6 +132,9 @@ r600_shader_from_nir(struct r600_context *rctx,
struct r600_pipe_shader *pipeshader,
union r600_shader_key *key);
char *
r600_finalize_nir(struct pipe_screen *screen, void *shader);
#ifdef __cplusplus
}
#endif