ac/nir: return the GS copy shader from ac_nir_lower_legacy_gs

This way we won't have to pass output info between the two functions.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35352>
This commit is contained in:
Marek Olšák 2025-05-31 11:14:30 -04:00 committed by Marge Bot
parent 98f3fc494e
commit 2c64cdc047
8 changed files with 96 additions and 100 deletions

View file

@ -269,20 +269,6 @@ typedef struct ac_nir_gs_output_info {
nir_alu_type (*types_16bit_hi)[4];
} ac_nir_gs_output_info;
nir_shader *
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
enum amd_gfx_level gfx_level,
uint32_t export_clipdist_mask,
bool write_pos_to_clipvertex,
bool pack_clip_cull_distances,
const uint8_t *param_offsets,
bool has_param_exports,
bool disable_streamout,
bool kill_pointsize,
bool kill_layer,
bool force_vrs,
ac_nir_gs_output_info *output_info);
bool
ac_nir_lower_legacy_vs(nir_shader *nir,
enum amd_gfx_level gfx_level,
@ -297,11 +283,29 @@ ac_nir_lower_legacy_vs(nir_shader *nir,
bool kill_layer,
bool force_vrs);
typedef struct {
bool has_gen_prim_query;
bool has_pipeline_stats_query;
ac_nir_gs_output_info *output_info;
enum amd_gfx_level gfx_level;
uint32_t export_clipdist_mask;
bool write_pos_to_clipvertex;
bool pack_clip_cull_distances;
const uint8_t *param_offsets;
bool has_param_exports;
bool disable_streamout;
bool kill_pointsize;
bool kill_layer;
bool force_vrs;
} ac_nir_lower_legacy_gs_options;
nir_shader *
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_options *options);
bool
ac_nir_lower_legacy_gs(nir_shader *nir,
bool has_gen_prim_query,
bool has_pipeline_stats_query,
ac_nir_gs_output_info *output_info);
ac_nir_lower_legacy_gs(nir_shader *nir, ac_nir_lower_legacy_gs_options *options,
nir_shader **gs_copy_shader);
/* This is a pre-link pass. It should only eliminate code and do lowering that mostly doesn't
* generate AMD-specific intrinsics.

View file

@ -11,21 +11,11 @@
#include "nir_xfb_info.h"
nir_shader *
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
enum amd_gfx_level gfx_level,
uint32_t export_clipdist_mask,
bool write_pos_to_clipvertex,
bool pack_clip_cull_distances,
const uint8_t *param_offsets,
bool has_param_exports,
bool disable_streamout,
bool kill_pointsize,
bool kill_layer,
bool force_vrs,
ac_nir_gs_output_info *output_info)
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_options *options)
{
nir_builder b = nir_builder_init_simple_shader(
MESA_SHADER_VERTEX, gs_nir->options, "gs_copy");
ac_nir_gs_output_info *output_info = options->output_info;
b.shader->info.outputs_written = gs_nir->info.outputs_written;
b.shader->info.outputs_written_16bit = gs_nir->info.outputs_written_16bit;
@ -34,7 +24,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
nir_xfb_info *info = ac_nir_get_sorted_xfb_info(gs_nir);
nir_def *stream_id = NULL;
if (!disable_streamout && info)
if (!options->disable_streamout && info)
stream_id = nir_ubfe_imm(&b, nir_load_streamout_config_amd(&b), 24, 2);
nir_def *vtx_offset = nir_imul_imm(&b, nir_load_vertex_id_zero_base(&b), 4);
@ -112,17 +102,18 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
if (stream == 0) {
uint64_t export_outputs = b.shader->info.outputs_written | VARYING_BIT_POS;
if (kill_pointsize)
if (options->kill_pointsize)
export_outputs &= ~VARYING_BIT_PSIZ;
if (kill_layer)
if (options->kill_layer)
export_outputs &= ~VARYING_BIT_LAYER;
ac_nir_export_position(&b, gfx_level, export_clipdist_mask, false, write_pos_to_clipvertex,
pack_clip_cull_distances, !has_param_exports, force_vrs, export_outputs,
ac_nir_export_position(&b, options->gfx_level, options->export_clipdist_mask, false,
options->write_pos_to_clipvertex, options->pack_clip_cull_distances,
!options->has_param_exports, options->force_vrs, export_outputs,
&out, NULL);
if (has_param_exports) {
ac_nir_export_parameters(&b, param_offsets,
if (options->has_param_exports) {
ac_nir_export_parameters(&b, options->param_offsets,
b.shader->info.outputs_written,
b.shader->info.outputs_written_16bit,
&out);

View file

@ -230,13 +230,11 @@ lower_legacy_gs_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, void *sta
}
bool
ac_nir_lower_legacy_gs(nir_shader *nir,
bool has_gen_prim_query,
bool has_pipeline_stats_query,
ac_nir_gs_output_info *output_info)
ac_nir_lower_legacy_gs(nir_shader *nir, ac_nir_lower_legacy_gs_options *options,
nir_shader **gs_copy_shader)
{
lower_legacy_gs_state s = {
.info = output_info,
.info = options->output_info,
};
unsigned num_vertices_per_primitive = 0;
@ -265,9 +263,9 @@ ac_nir_lower_legacy_gs(nir_shader *nir,
/* Emit shader query for mix use legacy/NGG GS */
bool progress = ac_nir_gs_shader_query(b,
has_gen_prim_query,
has_pipeline_stats_query,
has_pipeline_stats_query,
options->has_gen_prim_query,
options->has_pipeline_stats_query,
options->has_pipeline_stats_query,
num_vertices_per_primitive,
64,
s.vertex_count,
@ -286,5 +284,6 @@ ac_nir_lower_legacy_gs(nir_shader *nir,
nir_progress(progress, impl, nir_metadata_none);
*gs_copy_shader = ac_nir_create_gs_copy_shader(nir, options);
return true;
}

View file

@ -454,7 +454,17 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
.sysval_mask = stage->info.gs.output_usage_mask,
.varying_mask = stage->info.gs.output_usage_mask,
};
NIR_PASS(_, stage->nir, ac_nir_lower_legacy_gs, false, false, &gs_out_info);
ac_nir_lower_legacy_gs_options options = {
.has_gen_prim_query = false,
.has_pipeline_stats_query = false,
.output_info = &gs_out_info,
.gfx_level = pdev->info.gfx_level,
.export_clipdist_mask = stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask,
.param_offsets = stage->info.outinfo.vs_output_param_offset,
.has_param_exports = stage->info.outinfo.param_exports,
.force_vrs = stage->info.force_vrs_per_vertex,
};
NIR_PASS(_, stage->nir, ac_nir_lower_legacy_gs, &options, &stage->gs_copy_shader);
}
} else if (stage->stage == MESA_SHADER_FRAGMENT) {
ac_nir_lower_ps_late_options late_options = {

View file

@ -2265,16 +2265,7 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache
struct radv_instance *instance = radv_physical_device_instance(pdev);
const struct radv_shader_info *gs_info = &gs_stage->info;
ac_nir_gs_output_info output_info = {
.streams = gs_info->gs.output_streams,
.sysval_mask = gs_info->gs.output_usage_mask,
.varying_mask = gs_info->gs.output_usage_mask,
};
nir_shader *nir = ac_nir_create_gs_copy_shader(
gs_stage->nir, pdev->info.gfx_level, gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask, false,
false, gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false, false,
gs_info->force_vrs_per_vertex, &output_info);
nir_shader *nir = gs_stage->gs_copy_shader;
nir->info.internal = true;
nir_validate_shader(nir, "after ac_nir_create_gs_copy_shader");

View file

@ -272,6 +272,7 @@ struct radv_shader_stage {
unsigned char shader_sha1[20];
nir_shader *nir;
nir_shader *gs_copy_shader;
nir_shader *internal_nir; /* meta shaders */
struct radv_shader_info info;

View file

@ -1606,9 +1606,44 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
}
progress = true;
} else if (nir->info.stage == MESA_SHADER_GEOMETRY && !key->ge.as_ngg) {
STATIC_ASSERT(sizeof(ctx->temp_info.vs_output_param_offset[0]) == 1);
memset(ctx->temp_info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000,
sizeof(ctx->temp_info.vs_output_param_offset));
for (unsigned i = 0; i < sel->info.num_outputs; i++) {
unsigned semantic = sel->info.output_semantic[i];
/* Skip if no channel writes to stream 0. */
if (!nir_slot_is_varying(semantic, MESA_SHADER_FRAGMENT) ||
(sel->info.output_streams[i] & 0x03 && /* whether component 0 writes to non-zero stream */
sel->info.output_streams[i] & 0x0c && /* whether component 1 writes to non-zero stream */
sel->info.output_streams[i] & 0x30 && /* whether component 2 writes to non-zero stream */
sel->info.output_streams[i] & 0xc0)) /* whether component 3 writes to non-zero stream */
continue;
ctx->temp_info.vs_output_param_offset[semantic] = shader->info.nr_param_exports++;
}
si_init_gs_output_info(&sel->info, &ctx->temp_info);
NIR_PASS_V(nir, ac_nir_lower_legacy_gs, false, sel->screen->use_ngg,
&ctx->temp_info.gs_out_info);
unsigned clip_cull_mask =
(sel->info.clipdist_mask & ~shader->key.ge.opt.kill_clip_distances) | sel->info.culldist_mask;
ac_nir_lower_legacy_gs_options options = {
.has_gen_prim_query = false,
.has_pipeline_stats_query = sel->screen->use_ngg,
.output_info = &ctx->temp_info.gs_out_info,
.gfx_level = sel->screen->info.gfx_level,
.export_clipdist_mask = clip_cull_mask,
.param_offsets = ctx->temp_info.vs_output_param_offset,
.has_param_exports = shader->info.nr_param_exports,
.disable_streamout = !shader->info.num_streamout_vec4s,
.kill_pointsize = key->ge.opt.kill_pointsize,
.kill_layer = key->ge.opt.kill_layer,
.force_vrs = sel->screen->options.vrs2x2,
};
NIR_PASS(_, nir, ac_nir_lower_legacy_gs, &options, &ctx->gs_copy_shader);
progress = true;
} else if (nir->info.stage == MESA_SHADER_FRAGMENT && shader->is_monolithic) {
ac_nir_lower_ps_late_options late_options = {
@ -1865,14 +1900,11 @@ static struct si_shader *
si_nir_generate_gs_copy_shader(struct si_screen *sscreen,
struct ac_llvm_compiler *compiler,
struct si_shader *gs_shader,
struct si_temp_shader_variant_info *temp_info,
nir_shader *gs_nir,
nir_shader *gs_nir, nir_shader *gs_copy_shader,
struct util_debug_callback *debug)
{
struct si_shader *shader;
struct si_shader_selector *gs_selector = gs_shader->selector;
struct si_shader_info *gsinfo = &gs_selector->info;
union si_shader_key *gskey = &gs_shader->key;
shader = CALLOC_STRUCT(si_shader);
if (!shader)
@ -1886,43 +1918,10 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen,
shader->is_gs_copy_shader = true;
shader->wave_size = si_determine_wave_size(sscreen, shader);
shader->info.num_streamout_vec4s = gs_shader->info.num_streamout_vec4s;
shader->info.nr_pos_exports = si_get_nr_pos_exports(gs_selector, &gs_shader->key);
shader->info.nr_param_exports = gs_shader->info.nr_param_exports;
STATIC_ASSERT(sizeof(temp_info->vs_output_param_offset[0]) == 1);
memset(temp_info->vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000,
sizeof(temp_info->vs_output_param_offset));
for (unsigned i = 0; i < gsinfo->num_outputs; i++) {
unsigned semantic = gsinfo->output_semantic[i];
/* Skip if no channel writes to stream 0. */
if (!nir_slot_is_varying(semantic, MESA_SHADER_FRAGMENT) ||
(gsinfo->output_streams[i] & 0x03 && /* whether component 0 writes to non-zero stream */
gsinfo->output_streams[i] & 0x0c && /* whether component 1 writes to non-zero stream */
gsinfo->output_streams[i] & 0x30 && /* whether component 2 writes to non-zero stream */
gsinfo->output_streams[i] & 0xc0)) /* whether component 3 writes to non-zero stream */
continue;
temp_info->vs_output_param_offset[semantic] = shader->info.nr_param_exports++;
}
shader->info.nr_pos_exports = si_get_nr_pos_exports(gs_selector, gskey);
unsigned clip_cull_mask =
(gsinfo->clipdist_mask & ~gskey->ge.opt.kill_clip_distances) | gsinfo->culldist_mask;
nir_shader *nir =
ac_nir_create_gs_copy_shader(gs_nir,
sscreen->info.gfx_level,
clip_cull_mask,
false, false,
temp_info->vs_output_param_offset,
shader->info.nr_param_exports,
!gs_shader->info.num_streamout_vec4s,
gskey->ge.opt.kill_pointsize,
gskey->ge.opt.kill_layer,
sscreen->options.vrs2x2,
&temp_info->gs_out_info);
nir_shader *nir = gs_copy_shader;
struct si_linked_shaders linked;
memset(&linked, 0, sizeof(linked));
linked.consumer.nir = nir;
@ -2049,8 +2048,8 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
/* The GS copy shader is compiled next. */
if (nir->info.stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
shader->gs_copy_shader =
si_nir_generate_gs_copy_shader(sscreen, compiler, shader, &linked.consumer.temp_info,
nir, debug);
si_nir_generate_gs_copy_shader(sscreen, compiler, shader, nir,
linked.consumer.gs_copy_shader, debug);
if (!shader->gs_copy_shader) {
fprintf(stderr, "radeonsi: can't create GS copy shader\n");
ret = false;

View file

@ -64,6 +64,7 @@ struct si_nir_shader_ctx {
struct si_shader_args args;
struct si_temp_shader_variant_info temp_info;
nir_shader *nir;
nir_shader *gs_copy_shader;
bool free_nir;
};