radeonsi: move/rewrite PS color input gathering for shader variants

This removes duplicated gathering from 3 places for shader variants,
and adds it where it should be, which is before late optimizations and
late lowering passes, which is where we want it for the radeonsi linker.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32910>
This commit is contained in:
Marek Olšák 2024-12-29 16:11:45 -05:00 committed by Marge Bot
parent 1a2f6cad57
commit 9e3033e071
5 changed files with 51 additions and 70 deletions

View file

@ -101,7 +101,7 @@ static void si_create_compute_state_async(void *job, void *gdata, int thread_ind
compiler = &sscreen->compiler[thread_index];
assert(program->ir_type == PIPE_SHADER_IR_NIR);
si_nir_scan_shader(sscreen, sel->nir, &sel->info);
si_nir_scan_shader(sscreen, sel->nir, &sel->info, false);
if (!sel->nir->info.use_aco_amd && !*compiler)
*compiler = si_create_llvm_compiler(sscreen);

View file

@ -36,9 +36,11 @@ static void si_fix_resource_usage(struct si_screen *sscreen, struct si_shader *s
/* Get the number of all interpolated inputs */
unsigned si_get_ps_num_interp(struct si_shader *ps)
{
unsigned num_colors = !!(ps->info.ps_colors_read & 0x0f) + !!(ps->info.ps_colors_read & 0xf0);
unsigned num_interp =
ps->info.num_ps_inputs + (ps->key.ps.part.prolog.color_two_side ? num_colors : 0);
unsigned num_interp = ps->info.num_ps_inputs;
/* Back colors are added by the PS prolog when needed. */
if (!ps->is_monolithic && ps->key.ps.part.prolog.color_two_side)
num_interp += !!(ps->info.ps_colors_read & 0x0f) + !!(ps->info.ps_colors_read & 0xf0);
assert(num_interp <= 32);
return MIN2(num_interp, 32);
@ -2234,7 +2236,7 @@ si_init_gs_output_info(struct si_shader_info *info, struct si_gs_output_info *ou
* better code or lower undesirable representations (like derefs). Lowering passes that prevent
* linking optimizations or destroy shader_info shouldn't be run here.
*/
static bool run_pre_link_optimization_passes(struct si_nir_shader_ctx *ctx, bool *opts_not_run)
static bool run_pre_link_optimization_passes(struct si_nir_shader_ctx *ctx)
{
struct si_shader *shader = ctx->shader;
struct si_shader_selector *sel = shader->selector;
@ -2299,6 +2301,12 @@ static bool run_pre_link_optimization_passes(struct si_nir_shader_ctx *ctx, bool
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
/* This uses the prolog/epilog keys, so only monolithic shaders can call this. */
if (shader->is_monolithic) {
/* This lowers load_color intrinsics to COLn/BFCn input loads and two-side color
* selection.
*/
if (sel->info.colors_read)
NIR_PASS(progress, nir, si_nir_lower_ps_color_input, &shader->key, &sel->info);
/* This eliminates system values and unused shader output components. */
ac_nir_lower_ps_early_options early_options = {
.force_center_interp_no_msaa = key->ps.part.prolog.force_persp_center_interp ||
@ -2343,37 +2351,6 @@ static bool run_pre_link_optimization_passes(struct si_nir_shader_ctx *ctx, bool
/* This adds discard. */
if (key->ps.part.prolog.poly_stipple)
NIR_PASS(progress, nir, si_nir_emit_polygon_stipple);
if (progress) {
si_nir_opts(sel->screen, nir, *opts_not_run);
*opts_not_run = false;
progress = false;
}
/* Uniform inlining can eliminate PS inputs, and colormask can remove PS outputs,
* which can also cause the elimination of PS inputs. Remove holes after removed PS inputs
* by renumbering them. This can only happen with monolithic PS.
*/
NIR_PASS_V(nir, nir_recompute_io_bases, nir_var_shader_in);
/* Two-side color selection and interpolation: Get the latest shader info because
* uniform inlining and colormask can fully eliminate color inputs.
*/
struct si_shader_info info;
si_nir_scan_shader(sel->screen, nir, &info);
/* We need to set this early for lowering nir_intrinsic_load_point_coord_maybe_flipped,
* which can only occur with monolithic PS.
*/
shader->info.num_ps_inputs = info.num_inputs;
shader->info.ps_colors_read = info.colors_read;
/* This lowers load_color intrinsics to COLn/BFCn input loads and two-side color selection.
* If uniform inlining eliminated color inputs, it will just be dead code that will be
* eliminated later.
*/
if (info.colors_read)
NIR_PASS(progress, nir, si_nir_lower_ps_color_input, &shader->key, &info);
} else {
ac_nir_lower_ps_early_options early_options = {
.optimize_frag_coord = true,
@ -2712,7 +2689,7 @@ static void get_nir_shaders(struct si_shader *shader, struct si_linked_shaders *
for (unsigned i = 0; i < SI_NUM_LINKED_SHADERS; i++) {
if (linked->shader[i].nir) {
progress[i] = run_pre_link_optimization_passes(&linked->shader[i], &opts_not_run[i]);
progress[i] = run_pre_link_optimization_passes(&linked->shader[i]);
}
}
@ -2731,6 +2708,31 @@ static void get_nir_shaders(struct si_shader *shader, struct si_linked_shaders *
}
}
if (shader->selector->stage == MESA_SHADER_FRAGMENT) {
if (progress[1]) {
si_nir_opts(shader->selector->screen, linked->consumer.nir, opts_not_run[1]);
opts_not_run[1] = false;
progress[1] = false;
}
/* Remove holes after removed PS inputs by renumbering them. Holes can only occur with
* monolithic PS.
*/
if (shader->is_monolithic)
NIR_PASS_V(linked->consumer.nir, nir_recompute_io_bases, nir_var_shader_in);
struct si_shader_info info;
si_nir_scan_shader(shader->selector->screen, linked->consumer.nir, &info,
shader->is_monolithic);
shader->info.num_ps_inputs = info.num_inputs;
shader->info.ps_colors_read = info.colors_read;
/* A non-monolithic PS doesn't know if back colors are enabled, so copy 2 more. */
unsigned max_interp = MIN2(info.num_inputs + 2, SI_NUM_INTERP);
memcpy(shader->info.ps_inputs, info.input, max_interp * sizeof(info.input[0]));
}
for (unsigned i = 0; i < SI_NUM_LINKED_SHADERS; i++) {
if (linked->shader[i].nir) {
run_late_optimization_and_lowering_passes(&linked->shader[i], progress[i],
@ -2738,14 +2740,11 @@ static void get_nir_shaders(struct si_shader *shader, struct si_linked_shaders *
}
}
if (linked->producer.nir)
si_update_shader_binary_info(shader, linked->producer.nir);
/* TODO: gather this where other shader_info is gathered */
for (unsigned i = 0; i < SI_NUM_LINKED_SHADERS; i++) {
if (linked->shader[i].nir) {
struct si_shader_info info;
si_nir_scan_shader(shader->selector->screen, linked->shader[i].nir, &info);
si_nir_scan_shader(shader->selector->screen, linked->shader[i].nir, &info, true);
shader->info.uses_vmem_load_other |= info.uses_vmem_load_other;
shader->info.uses_vmem_sampler_or_bvh |= info.uses_vmem_sampler_or_bvh;
@ -2753,22 +2752,6 @@ static void get_nir_shaders(struct si_shader *shader, struct si_linked_shaders *
}
}
void si_update_shader_binary_info(struct si_shader *shader, nir_shader *nir)
{
struct si_shader_info info;
si_nir_scan_shader(shader->selector->screen, nir, &info);
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
/* Since uniform inlining can remove PS inputs, set the latest info about PS inputs here. */
shader->info.num_ps_inputs = info.num_inputs;
shader->info.ps_colors_read = info.colors_read;
/* A non-monolithic PS doesn't know if back colors are enabled, so copy 2 more. */
unsigned max_interp = MIN2(info.num_inputs + 2, SI_NUM_INTERP);
memcpy(shader->info.ps_inputs, info.input, max_interp * sizeof(info.input[0]));
}
}
/* Generate code for the hardware VS shader stage to go with a geometry shader */
static struct si_shader *
si_nir_generate_gs_copy_shader(struct si_screen *sscreen,
@ -3062,8 +3045,6 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
shader->info.vs_output_ps_input_cntl[i] = SI_PS_INPUT_CNTL_UNUSED;
shader->info.vs_output_ps_input_cntl[VARYING_SLOT_COL0] = SI_PS_INPUT_CNTL_UNUSED_COLOR0;
si_update_shader_binary_info(shader, nir);
/* uses_instanceid may be set by si_nir_lower_vs_inputs(). */
shader->info.uses_instanceid |= sel->info.uses_instanceid;
shader->info.private_mem_vgprs = DIV_ROUND_UP(nir->scratch_size, 4);

View file

@ -1051,7 +1051,6 @@ struct si_shader_part {
/* si_shader.c */
struct ac_rtld_binary;
void si_update_shader_binary_info(struct si_shader *shader, struct nir_shader *nir);
bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
struct si_shader *shader, struct util_debug_callback *debug);
bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
@ -1078,7 +1077,7 @@ unsigned si_get_shader_binary_size(struct si_screen *screen, struct si_shader *s
/* si_shader_info.c */
void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir,
struct si_shader_info *info);
struct si_shader_info *info, bool colors_lowered);
/* si_shader_nir.c */
void si_lower_mediump_io(struct nir_shader *nir);

View file

@ -59,7 +59,7 @@ static const nir_src *get_texture_src(nir_tex_instr *instr, nir_tex_src_type typ
}
static void scan_io_usage(const nir_shader *nir, struct si_shader_info *info,
nir_intrinsic_instr *intr, bool is_input)
nir_intrinsic_instr *intr, bool is_input, bool colors_lowered)
{
unsigned interp = INTERP_MODE_FLAT; /* load_input uses flat shading */
@ -129,8 +129,9 @@ static void scan_io_usage(const nir_shader *nir, struct si_shader_info *info,
/* Gather color PS inputs. We can only get here after lowering colors in monolithic
* shaders. This must match what we do for nir_intrinsic_load_color0/1.
*/
if (semantic == VARYING_SLOT_COL0 || semantic == VARYING_SLOT_COL1 ||
semantic == VARYING_SLOT_BFC0 || semantic == VARYING_SLOT_BFC1) {
if (!colors_lowered &&
(semantic == VARYING_SLOT_COL0 || semantic == VARYING_SLOT_COL1 ||
semantic == VARYING_SLOT_BFC0 || semantic == VARYING_SLOT_BFC1)) {
unsigned index = semantic == VARYING_SLOT_COL1 || semantic == VARYING_SLOT_BFC1;
info->colors_read |= mask << (index * 4);
return;
@ -311,7 +312,7 @@ static bool is_bindless_handle_indirect(nir_instr *src)
/* TODO: convert to nir_shader_instructions_pass */
static void scan_instruction(const struct nir_shader *nir, struct si_shader_info *info,
nir_instr *instr)
nir_instr *instr, bool colors_lowered)
{
if (instr->type == nir_instr_type_tex) {
nir_tex_instr *tex = nir_instr_as_tex(instr);
@ -502,13 +503,13 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
case nir_intrinsic_load_per_vertex_input:
case nir_intrinsic_load_input_vertex:
case nir_intrinsic_load_interpolated_input:
scan_io_usage(nir, info, intr, true);
scan_io_usage(nir, info, intr, true, colors_lowered);
break;
case nir_intrinsic_load_output:
case nir_intrinsic_load_per_vertex_output:
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_vertex_output:
scan_io_usage(nir, info, intr, false);
scan_io_usage(nir, info, intr, false, colors_lowered);
break;
case nir_intrinsic_load_deref:
case nir_intrinsic_store_deref:
@ -529,7 +530,7 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
}
void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir,
struct si_shader_info *info)
struct si_shader_info *info, bool colors_lowered)
{
bool force_use_aco = false;
if (sscreen->force_shader_use_aco) {
@ -656,7 +657,7 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir,
nir_function_impl *impl = nir_shader_get_entrypoint((nir_shader*)nir);
nir_foreach_block (block, impl) {
nir_foreach_instr (instr, block)
scan_instruction(nir, info, instr);
scan_instruction(nir, info, instr, colors_lowered);
}
if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL ||

View file

@ -3614,7 +3614,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
sel->nir = (nir_shader*)state->ir.nir;
}
si_nir_scan_shader(sscreen, sel->nir, &sel->info);
si_nir_scan_shader(sscreen, sel->nir, &sel->info, false);
sel->stage = sel->nir->info.stage;
sel->const_and_shader_buf_descriptors_index =