ac/nir: don't pass radeon_info to NGG lowering

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40042>
This commit is contained in:
Rhys Perry 2026-02-26 15:24:17 +00:00 committed by Marge Bot
parent 36feec61c8
commit 5a8a7dbb22
6 changed files with 52 additions and 61 deletions

View file

@ -179,7 +179,7 @@ bool
ac_nir_lower_indirect_derefs(nir_shader *shader);
typedef struct {
const struct radeon_info *hw_info;
const struct ac_cu_info *cu_info;
unsigned max_workgroup_size;
unsigned wave_size;

View file

@ -27,6 +27,7 @@ typedef struct
typedef struct
{
enum amd_gfx_level gfx_level;
const ac_nir_lower_ngg_options *options;
nir_variable *position_value_var;
@ -122,7 +123,7 @@ ngg_nogs_init_vertex_indices_vars(nir_builder *b, nir_function_impl *impl, lower
nir_def *vtx;
if (s->options->hw_info->gfx_level >= GFX12) {
if (s->gfx_level >= GFX12) {
vtx = nir_ubfe_imm(b, nir_load_packed_passthrough_primitive_amd(b), 9 * v, 8);
} else if (s->options->passthrough) {
vtx = nir_ubfe_imm(b, nir_load_packed_passthrough_primitive_amd(b), 10 * v, 9);
@ -138,7 +139,7 @@ ngg_nogs_init_vertex_indices_vars(nir_builder *b, nir_function_impl *impl, lower
static nir_def *
emit_ngg_nogs_prim_exp_arg(nir_builder *b, lower_ngg_nogs_state *s)
{
if (s->options->hw_info->gfx_level >= GFX12 || s->options->passthrough) {
if (s->gfx_level >= GFX12 || s->options->passthrough) {
return nir_load_packed_passthrough_primitive_amd(b);
} else {
nir_def *vtx_idx[3] = {0};
@ -147,7 +148,7 @@ emit_ngg_nogs_prim_exp_arg(nir_builder *b, lower_ngg_nogs_state *s)
vtx_idx[v] = nir_load_var(b, s->gs_vtx_indices_vars[v]);
return ac_nir_pack_ngg_prim_exp_arg(b, s->options->num_vertices_per_primitive, vtx_idx, NULL,
s->options->hw_info->gfx_level);
s->gfx_level);
}
}
@ -202,7 +203,7 @@ emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *s, nir_def *arg)
.memory_semantics = NIR_MEMORY_ACQ_REL,
.memory_modes = nir_var_mem_shared);
unsigned edge_flag_bits = ac_get_all_edge_flag_bits(s->options->hw_info->gfx_level);
unsigned edge_flag_bits = ac_get_all_edge_flag_bits(s->gfx_level);
nir_def *mask = nir_imm_intN_t(b, ~edge_flag_bits, 32);
for (int i = 0; i < s->options->num_vertices_per_primitive; i++) {
@ -211,7 +212,7 @@ emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *s, nir_def *arg)
/* Edge flags share LDS with XFB. */
nir_def *edge = ac_nir_load_shared_xfb(b, addr, &s->out, VARYING_SLOT_EDGE, 0);
if (s->options->hw_info->gfx_level >= GFX12)
if (s->gfx_level >= GFX12)
mask = nir_ior(b, mask, nir_ishl_imm(b, edge, 8 + i * 9));
else
mask = nir_ior(b, mask, nir_ishl_imm(b, edge, 9 + i * 10));
@ -225,7 +226,7 @@ emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *s, nir_def *arg)
* GPUs without an attribute ring.
* Because this uses the export space, do it together with the primitive export.
*/
if (!s->options->hw_info->cu_info.has_attr_ring && s->options->export_primitive_id_per_prim) {
if (!s->options->cu_info->has_attr_ring && s->options->export_primitive_id_per_prim) {
const uint8_t offset = s->options->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
nir_def *prim_id = nir_load_primitive_id(b);
nir_def *undef = nir_undef(b, 1, 32);
@ -278,7 +279,7 @@ emit_ngg_nogs_prim_id_store_shared(nir_builder *b, lower_ngg_nogs_state *s)
static void
emit_ngg_nogs_prim_id_store_per_prim_to_attr_ring(nir_builder *b, lower_ngg_nogs_state *s)
{
assert(s->options->hw_info->cu_info.has_attr_ring);
assert(s->options->cu_info->has_attr_ring);
nir_def *is_gs_thread = nir_load_var(b, s->gs_exported_var);
nir_def *highest_gs_thread = nir_ufind_msb(b, nir_ballot(b, 1, s->options->wave_size, is_gs_thread));
@ -549,9 +550,8 @@ compact_vertices_after_culling(nir_builder *b,
nir_store_var(b, s->gs_vtx_indices_vars[v], exporter_vtx_indices[v], 0x1);
}
nir_def *prim_exp_arg =
ac_nir_pack_ngg_prim_exp_arg(b, s->options->num_vertices_per_primitive,
exporter_vtx_indices, NULL, s->options->hw_info->gfx_level);
nir_def *prim_exp_arg = ac_nir_pack_ngg_prim_exp_arg(
b, s->options->num_vertices_per_primitive, exporter_vtx_indices, NULL, s->gfx_level);
nir_store_var(b, s->prim_exp_arg_var, prim_exp_arg, 0x1u);
}
nir_pop_if(b, if_gs_accepted);
@ -1233,9 +1233,9 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
nir_if *if_wave_0 = nir_push_if(b, nir_ieq_imm(b, nir_load_subgroup_id(b), 0));
{
ac_nir_ngg_alloc_vertices_and_primitives(
b, num_live_vertices_in_workgroup, num_exported_prims,
s->options->hw_info->cu_info.has_ngg_fully_culled_bug);
ac_nir_ngg_alloc_vertices_and_primitives(b, num_live_vertices_in_workgroup,
num_exported_prims,
s->options->cu_info->has_ngg_fully_culled_bug);
}
nir_pop_if(b, if_wave_0);
@ -1363,11 +1363,10 @@ ngg_nogs_build_streamout(nir_builder *b, lower_ngg_nogs_state *s)
nir_def *buffer_offsets[4] = {0};
nir_def *so_buffer[4] = {0};
nir_def *tid_in_tg = nir_load_local_invocation_index(b);
ac_nir_ngg_build_streamout_buffer_info(b, info, s->options->hw_info->gfx_level, s->options->has_xfb_prim_query,
s->options->use_gfx12_xfb_intrinsic, nir_imm_int(b, 0), tid_in_tg,
gen_prim_per_stream,
so_buffer, buffer_offsets,
emit_prim_per_stream);
ac_nir_ngg_build_streamout_buffer_info(b, info, s->gfx_level, s->options->has_xfb_prim_query,
s->options->use_gfx12_xfb_intrinsic, nir_imm_int(b, 0),
tid_in_tg, gen_prim_per_stream, so_buffer, buffer_offsets,
emit_prim_per_stream);
/* Write out primitive data */
nir_if *if_emit = nir_push_if(b, nir_ilt(b, tid_in_tg, emit_prim_per_stream[0]));
@ -1500,7 +1499,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
nir_variable *gs_exported_var = nir_local_variable_create(impl, glsl_bool_type(), "gs_exported");
const bool wait_attr_ring =
options->has_param_exports && options->hw_info->cu_info.has_attr_ring_wait_bug;
options->has_param_exports && options->cu_info->has_attr_ring_wait_bug;
bool streamout_enabled = shader->xfb_info && !options->disable_streamout;
bool has_user_edgeflags =
options->use_edgeflags && (shader->info.outputs_written & VARYING_BIT_EDGE);
@ -1521,6 +1520,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
!(wait_attr_ring && options->export_primitive_id_per_prim);
lower_ngg_nogs_state state = {
.gfx_level = options->cu_info->gfx_level,
.options = options,
.early_prim_export = early_prim_export,
.streamout_enabled = streamout_enabled,
@ -1549,7 +1549,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
if (options->export_primitive_id_per_prim) {
/* The HW preloads the primitive ID to VGPRs of GS threads for VS, but not for TES. */
assert(shader->info.stage == MESA_SHADER_VERTEX);
assert(options->hw_info->gfx_level >= GFX10_3);
assert(state.gfx_level >= GFX10_3);
}
nir_builder builder = nir_builder_create(impl);
@ -1588,7 +1588,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
if (!options->can_cull) {
/* Newer chips can use PRIMGEN_PASSTHRU_NO_MSG to skip gs_alloc_req for NGG passthrough. */
if (!(options->passthrough && options->hw_info->cu_info.has_ngg_passthru_no_msg)) {
if (!(options->passthrough && options->cu_info->has_ngg_passthru_no_msg)) {
/* Allocate export space on wave 0 - confirm to the HW that we want to use all possible space */
nir_if *if_wave_0 = nir_push_if(b, nir_ieq_imm(b, nir_load_subgroup_id(b), 0));
{
@ -1635,7 +1635,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
/* Wait for GS threads to store primitive ID in LDS. */
nir_barrier(b, .execution_scope = SCOPE_WORKGROUP, .memory_scope = SCOPE_WORKGROUP,
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_shared);
} else if (options->export_primitive_id_per_prim && options->hw_info->cu_info.has_attr_ring) {
} else if (options->export_primitive_id_per_prim && options->cu_info->has_attr_ring) {
emit_ngg_nogs_prim_id_store_per_prim_to_attr_ring(b, &state);
}
@ -1646,7 +1646,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
* scheduling.
*/
nir_def *num_es_threads = NULL;
if (options->hw_info->cu_info.has_attr_ring && options->can_cull) {
if (options->cu_info->has_attr_ring && options->can_cull) {
nir_def *es_accepted_mask =
nir_ballot(b, 1, options->wave_size, nir_load_var(b, es_accepted_var));
num_es_threads = nir_bit_count(b, es_accepted_mask);
@ -1721,15 +1721,11 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
}
ac_nir_export_position(b, options->hw_info->gfx_level,
options->export_clipdist_mask,
options->can_cull,
options->write_pos_to_clipvertex,
!options->has_param_exports,
options->force_vrs,
export_outputs, &state.out, NULL);
ac_nir_export_position(b, state.gfx_level, options->export_clipdist_mask, options->can_cull,
options->write_pos_to_clipvertex, !options->has_param_exports,
options->force_vrs, export_outputs, &state.out, NULL);
if (options->has_param_exports && !options->hw_info->cu_info.has_attr_ring) {
if (options->has_param_exports && !options->cu_info->has_attr_ring) {
ac_nir_export_parameters(b, options->vs_output_param_offset,
b->shader->info.outputs_written,
b->shader->info.outputs_written_16bit,
@ -1739,7 +1735,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
if (if_pos_exports)
nir_pop_if(b, if_pos_exports);
if (options->has_param_exports && options->hw_info->cu_info.has_attr_ring) {
if (options->has_param_exports && options->cu_info->has_attr_ring) {
if (!pos_exports_in_cf) {
b->cursor = nir_after_cf_node(&if_es_thread->cf_node);
ac_nir_create_output_phis(b, b->shader->info.outputs_written, b->shader->info.outputs_written_16bit, &state.out);

View file

@ -301,8 +301,8 @@ ngg_gs_process_out_primitive(nir_builder *b,
nir_isub(b, vtx_indices[2], is_odd), vtx_indices[2]);
}
return ac_nir_pack_ngg_prim_exp_arg(b, s->num_vertices_per_primitive, vtx_indices,
is_null_prim, s->options->hw_info->gfx_level);
return ac_nir_pack_ngg_prim_exp_arg(b, s->num_vertices_per_primitive, vtx_indices, is_null_prim,
s->options->cu_info->gfx_level);
}
static void
@ -402,15 +402,12 @@ ngg_gs_emit_output(nir_builder *b, nir_def *max_num_out_vtx, nir_def *max_num_ou
nir_if *if_export_vertex = nir_push_if(b, if_process_vertex->condition.ssa);
{
ac_nir_export_position(b, s->options->hw_info->gfx_level,
s->options->export_clipdist_mask,
s->options->can_cull,
s->options->write_pos_to_clipvertex,
!s->options->has_param_exports,
s->options->force_vrs,
ac_nir_export_position(b, s->options->cu_info->gfx_level, s->options->export_clipdist_mask,
s->options->can_cull, s->options->write_pos_to_clipvertex,
!s->options->has_param_exports, s->options->force_vrs,
b->shader->info.outputs_written | VARYING_BIT_POS, &s->out, NULL);
if (s->options->has_param_exports && !s->options->hw_info->cu_info.has_attr_ring)
if (s->options->has_param_exports && !s->options->cu_info->has_attr_ring)
ac_nir_export_parameters(b, s->options->vs_output_param_offset,
b->shader->info.outputs_written,
b->shader->info.outputs_written_16bit,
@ -418,8 +415,8 @@ ngg_gs_emit_output(nir_builder *b, nir_def *max_num_out_vtx, nir_def *max_num_ou
}
nir_pop_if(b, if_export_vertex);
if (s->options->has_param_exports && s->options->hw_info->cu_info.has_attr_ring) {
if (s->options->hw_info->cu_info.has_attr_ring_wait_bug)
if (s->options->has_param_exports && s->options->cu_info->has_attr_ring) {
if (s->options->cu_info->has_attr_ring_wait_bug)
b->cursor = nir_after_cf_node_and_phis(&if_export_primitive->cf_node);
nir_def *vertices_in_wave = nir_bit_count(b, nir_ballot(b, 1, s->options->wave_size, if_process_vertex->condition.ssa));
@ -429,7 +426,7 @@ ngg_gs_emit_output(nir_builder *b, nir_def *max_num_out_vtx, nir_def *max_num_ou
b->shader->info.outputs_written_16bit,
&s->out, vertices_in_wave);
if (s->options->hw_info->cu_info.has_attr_ring_wait_bug) {
if (s->options->cu_info->has_attr_ring_wait_bug) {
/* Wait for attribute ring stores to finish. */
nir_barrier(b, .execution_scope = SCOPE_SUBGROUP,
.memory_scope = SCOPE_DEVICE,
@ -706,9 +703,10 @@ ngg_gs_build_streamout(nir_builder *b, lower_ngg_gs_state *s)
nir_def *so_buffer[4] = {0};
nir_def *buffer_info_scratch_base =
nir_iadd_imm_nuw(b, s->lds_addr_gs_out_vtx, num_streams * scratch_stride + scratch_base_off);
ac_nir_ngg_build_streamout_buffer_info(b, info, s->options->hw_info->gfx_level, s->options->has_xfb_prim_query,
s->options->use_gfx12_xfb_intrinsic, buffer_info_scratch_base, tid_in_tg,
gen_prim, so_buffer, buffer_offsets, emit_prim);
ac_nir_ngg_build_streamout_buffer_info(
b, info, s->options->cu_info->gfx_level, s->options->has_xfb_prim_query,
s->options->use_gfx12_xfb_intrinsic, buffer_info_scratch_base, tid_in_tg, gen_prim, so_buffer,
buffer_offsets, emit_prim);
u_foreach_bit(stream, info->streams_written) {
nir_def *can_emit = nir_ilt(b, export_seq[stream], emit_prim[stream]);
@ -763,8 +761,7 @@ ngg_gs_finale(nir_builder *b, lower_ngg_gs_state *s)
ac_nir_ngg_alloc_vertices_and_primitives(
b, max_vtxcnt, max_prmcnt,
b->shader->info.gs.vertices_out == 0 &&
s->options->hw_info->cu_info.has_ngg_fully_culled_bug);
b->shader->info.gs.vertices_out == 0 && s->options->cu_info->has_ngg_fully_culled_bug);
}
nir_pop_if(b, if_wave_0);
}
@ -813,9 +810,8 @@ ngg_gs_finale(nir_builder *b, lower_ngg_gs_state *s)
/* Allocate export space. We currently don't compact primitives, just use the maximum number. */
nir_if *if_wave_0 = nir_push_if(b, nir_ieq_imm(b, nir_load_subgroup_id(b), 0));
{
ac_nir_ngg_alloc_vertices_and_primitives(
b, workgroup_num_vertices, max_prmcnt,
s->options->hw_info->cu_info.has_ngg_fully_culled_bug);
ac_nir_ngg_alloc_vertices_and_primitives(b, workgroup_num_vertices, max_prmcnt,
s->options->cu_info->has_ngg_fully_culled_bug);
}
nir_pop_if(b, if_wave_0);

View file

@ -1351,10 +1351,9 @@ ac_nir_lower_ngg_mesh(nir_shader *shader, const ac_nir_lower_ngg_options *option
unsigned max_vertices = shader->info.mesh.max_vertices_out;
unsigned max_primitives = shader->info.mesh.max_primitives_out;
ms_out_mem_layout layout =
ms_calculate_output_layout(&options->hw_info->cu_info, shader->info.shared_size,
per_vertex_outputs, per_primitive_outputs, cross_invocation_access,
max_vertices, max_primitives, vertices_per_prim);
ms_out_mem_layout layout = ms_calculate_output_layout(
options->cu_info, shader->info.shared_size, per_vertex_outputs, per_primitive_outputs,
cross_invocation_access, max_vertices, max_primitives, vertices_per_prim);
shader->info.shared_size = layout.lds.total_size;
*out_needs_scratch_ring = layout.scratch_ring.vtx_attr.mask || layout.scratch_ring.prm_attr.mask;
@ -1370,7 +1369,7 @@ ac_nir_lower_ngg_mesh(nir_shader *shader, const ac_nir_lower_ngg_options *option
shader->info.workgroup_size[1] *
shader->info.workgroup_size[2];
bool fast_launch_2 = options->hw_info->cu_info.mesh_fast_launch_2;
bool fast_launch_2 = options->cu_info->mesh_fast_launch_2;
unsigned hw_workgroup_size = options->max_workgroup_size;
lower_ngg_ms_state state = {
@ -1385,7 +1384,7 @@ ac_nir_lower_ngg_mesh(nir_shader *shader, const ac_nir_lower_ngg_options *option
.insert_layer_output =
options->multiview && !(shader->info.outputs_written & VARYING_BIT_LAYER),
.uses_cull_flags = uses_cull,
.cu_info = &options->hw_info->cu_info,
.cu_info = options->cu_info,
.vert_multirow_export = fast_launch_2 && max_vertices > hw_workgroup_size,
.prim_multirow_export = fast_launch_2 && max_primitives > hw_workgroup_size,
.vs_output_param_offset = options->vs_output_param_offset,

View file

@ -963,7 +963,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
}
ac_nir_lower_ngg_options options = {0};
options.hw_info = &pdev->info;
options.cu_info = &pdev->info.cu_info;
options.max_workgroup_size = info->workgroup_size;
options.wave_size = info->wave_size;
options.export_clipdist_mask = info->outinfo.clip_dist_mask | info->outinfo.cull_dist_mask;

View file

@ -405,7 +405,7 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir,
assert(key->ge.as_ngg);
ac_nir_lower_ngg_options options = {
.hw_info = info,
.cu_info = &info->cu_info,
.max_workgroup_size = si_get_max_workgroup_size(shader),
.wave_size = shader->wave_size,
.export_clipdist_mask = shader->info.clipdist_mask | shader->info.culldist_mask,