ac/nir: Emit edge flag instructions conditionally.

They are not needed by RADV but will be needed by RadeonSI.

Fossil DB results on Sienna Cichlid (with NGGC on):
Totals from 56917 (44.24% of 128647) affected shaders:
VGPRs: 1982664 -> 1975936 (-0.34%); split: -0.43%, +0.09%
CodeSize: 152790880 -> 149510316 (-2.15%); split: -2.15%, +0.00%
MaxWaves: 1617984 -> 1621900 (+0.24%)
Instrs: 29272825 -> 28907038 (-1.25%); split: -1.26%, +0.01%
Latency: 128744182 -> 127565678 (-0.92%); split: -1.14%, +0.22%
InvThroughput: 20125915 -> 19805168 (-1.59%); split: -1.63%, +0.03%
VClause: 521312 -> 519804 (-0.29%); split: -0.77%, +0.48%
SClause: 688861 -> 688897 (+0.01%); split: -0.04%, +0.05%
Copies: 3205421 -> 3177799 (-0.86%); split: -1.68%, +0.82%
Branches: 1181457 -> 1183147 (+0.14%); split: -0.03%, +0.17%
PreVGPRs: 1626681 -> 1595406 (-1.92%)

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12998>
This commit is contained in:
Timur Kristóf 2021-09-17 21:36:21 +02:00
parent cb05c85abd
commit a7f2faea46
3 changed files with 11 additions and 5 deletions

View file

@ -111,6 +111,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader,
bool consider_passthrough,
bool export_prim_id,
bool provoking_vtx_last,
bool use_edgeflags,
uint32_t instance_rate_inputs);
void

View file

@ -51,6 +51,7 @@ typedef struct
bool passthrough;
bool export_prim_id;
bool early_prim_export;
bool use_edgeflags;
unsigned wave_size;
unsigned max_num_waves;
unsigned num_vertices_per_primitives;
@ -294,9 +295,10 @@ pervertex_lds_addr(nir_builder *b, nir_ssa_def *vertex_idx, unsigned per_vtx_byt
static nir_ssa_def *
emit_pack_ngg_prim_exp_arg(nir_builder *b, unsigned num_vertices_per_primitives,
nir_ssa_def *vertex_indices[3], nir_ssa_def *is_null_prim)
nir_ssa_def *vertex_indices[3], nir_ssa_def *is_null_prim,
bool use_edgeflags)
{
nir_ssa_def *arg = b->shader->info.stage == MESA_SHADER_VERTEX
nir_ssa_def *arg = use_edgeflags
? nir_build_load_initial_edgeflags_amd(b)
: nir_imm_int(b, 0);
@ -339,7 +341,7 @@ emit_ngg_nogs_prim_exp_arg(nir_builder *b, lower_ngg_nogs_state *st)
? ngg_input_primitive_vertex_index(b, 2)
: nir_imm_zero(b, 1, 32);
return emit_pack_ngg_prim_exp_arg(b, st->num_vertices_per_primitives, vtx_idx, NULL);
return emit_pack_ngg_prim_exp_arg(b, st->num_vertices_per_primitives, vtx_idx, NULL, st->use_edgeflags);
}
}
@ -741,7 +743,7 @@ compact_vertices_after_culling(nir_builder *b,
exporter_vtx_indices[v] = nir_u2u32(b, exporter_vtx_idx);
}
nir_ssa_def *prim_exp_arg = emit_pack_ngg_prim_exp_arg(b, 3, exporter_vtx_indices, NULL);
nir_ssa_def *prim_exp_arg = emit_pack_ngg_prim_exp_arg(b, 3, exporter_vtx_indices, NULL, nogs_state->use_edgeflags);
nir_store_var(b, prim_exp_arg_var, prim_exp_arg, 0x1u);
}
nir_pop_if(b, if_gs_accepted);
@ -1256,6 +1258,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader,
bool consider_passthrough,
bool export_prim_id,
bool provoking_vtx_last,
bool use_edgeflags,
uint32_t instance_rate_inputs)
{
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
@ -1276,6 +1279,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader,
.passthrough = passthrough,
.export_prim_id = export_prim_id,
.early_prim_export = exec_list_is_singular(&impl->body),
.use_edgeflags = use_edgeflags,
.num_vertices_per_primitives = num_vertices_per_primitives,
.provoking_vtx_idx = provoking_vtx_last ? (num_vertices_per_primitives - 1) : 0,
.position_value_var = position_value_var,
@ -1705,7 +1709,7 @@ ngg_gs_export_primitives(nir_builder *b, nir_ssa_def *max_num_out_prims, nir_ssa
}
}
nir_ssa_def *arg = emit_pack_ngg_prim_exp_arg(b, s->num_vertices_per_primitive, vtx_indices, is_null_prim);
nir_ssa_def *arg = emit_pack_ngg_prim_exp_arg(b, s->num_vertices_per_primitive, vtx_indices, is_null_prim, false);
nir_build_export_primitive_amd(b, arg);
nir_pop_if(b, if_prim_export_thread);
}

View file

@ -982,6 +982,7 @@ void radv_lower_ngg(struct radv_device *device, struct nir_shader *nir,
key->vs_common_out.as_ngg_passthrough,
key->vs_common_out.export_prim_id,
key->vs.provoking_vtx_last,
false,
key->vs.instance_rate_inputs);
info->has_ngg_culling = out_conf.can_cull;