mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-01 11:50:09 +01:00
ac/nir/ngg: Prepare deferred shader part before adding culling code.
The previous concept was to emit the non-deferred shader part first, including the culling code, and then modify the non-deferred part accordingly. This caused some issues because it was really impossible to tell which sysvals the deferred part needs after DCE, so we had to run an additional cleanup pass afterwards. The new concept is to prepare the deferred part first by applying reusable variables (from the non-deferred part) and run DCE. This opens the possibility to accurately gather info about what the deferred part needs. This idea is further expanded in the next commits. Fossil DB stats on Navi 21: Totals from 17 (0.02% of 79377) affected shaders: Instrs: 18063 -> 18064 (+0.01%) CodeSize: 93368 -> 93372 (+0.00%) Latency: 49889 -> 49899 (+0.02%); split: -0.01%, +0.03% SALU: 2416 -> 2417 (+0.04%) Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22073>
This commit is contained in:
parent
e9e58fa412
commit
e4c91c01e3
1 changed files with 49 additions and 28 deletions
|
|
@ -934,29 +934,22 @@ save_reusable_variables(nir_builder *b, lower_ngg_nogs_state *s)
|
|||
}
|
||||
|
||||
/**
|
||||
* Reuses suitable variables from the top part of the shader,
|
||||
* by deleting their stores from the bottom part.
|
||||
* Reuses suitable variables from the non-deferred (top) part of the shader,
|
||||
* by deleting their stores from the deferred (bottom) part.
|
||||
*/
|
||||
static void
|
||||
apply_reusable_variables(nir_builder *b, lower_ngg_nogs_state *s)
|
||||
apply_reusable_variables(nir_function_impl *impl, lower_ngg_nogs_state *s)
|
||||
{
|
||||
if (!u_vector_length(&s->reusable_nondeferred_variables)) {
|
||||
u_vector_finish(&s->reusable_nondeferred_variables);
|
||||
return;
|
||||
}
|
||||
|
||||
nir_foreach_block_reverse_safe(block, b->impl) {
|
||||
nir_foreach_block_reverse_safe(block, impl) {
|
||||
nir_foreach_instr_reverse_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
/* When we found any of these intrinsics, it means
|
||||
* we reached the top part and we must stop.
|
||||
*/
|
||||
if (intrin->intrinsic == nir_intrinsic_sendmsg_amd)
|
||||
goto done;
|
||||
|
||||
if (intrin->intrinsic != nir_intrinsic_store_deref)
|
||||
continue;
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
|
|
@ -972,7 +965,6 @@ apply_reusable_variables(nir_builder *b, lower_ngg_nogs_state *s)
|
|||
}
|
||||
}
|
||||
|
||||
done:
|
||||
u_vector_finish(&s->reusable_nondeferred_variables);
|
||||
}
|
||||
|
||||
|
|
@ -1056,6 +1048,35 @@ ngg_nogs_get_culling_pervertex_lds_size(gl_shader_stage stage,
|
|||
return (lds_es_arg_0 + num_repacked * 4u) | 4u;
|
||||
}
|
||||
|
||||
static nir_cf_list *
|
||||
prepare_shader_for_culling(nir_shader *shader, nir_function_impl *impl,
|
||||
nir_cf_list *original_extracted_cf, lower_ngg_nogs_state *s)
|
||||
{
|
||||
/* Reinsert a clone of the original shader code. */
|
||||
struct hash_table *orig_remap_table = _mesa_pointer_hash_table_create(NULL);
|
||||
nir_cf_list_clone_and_reinsert(original_extracted_cf, &impl->cf_node, nir_after_impl(impl), orig_remap_table);
|
||||
_mesa_hash_table_destroy(orig_remap_table, NULL);
|
||||
|
||||
/* Apply reusable variables. */
|
||||
apply_reusable_variables(impl, s);
|
||||
apply_repacked_pos_outputs(shader, s);
|
||||
|
||||
/* Cleanup. This is done so that we can accurately gather info from the deferred part. */
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
NIR_PASS(progress, shader, nir_opt_undef);
|
||||
NIR_PASS(progress, shader, nir_copy_prop);
|
||||
NIR_PASS(progress, shader, nir_opt_dce);
|
||||
NIR_PASS(progress, shader, nir_opt_dead_cf);
|
||||
} while (progress);
|
||||
|
||||
/* Extract the shader code again. This will be reinserted as the deferred shader part. */
|
||||
nir_cf_list *prepared_extracted = rzalloc(shader, nir_cf_list);
|
||||
nir_cf_extract(prepared_extracted, nir_before_impl(impl), nir_after_impl(impl));
|
||||
return prepared_extracted;
|
||||
}
|
||||
|
||||
static void
|
||||
add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_cf, lower_ngg_nogs_state *s)
|
||||
{
|
||||
|
|
@ -1113,10 +1134,8 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
|
|||
*/
|
||||
nir_store_var(b, s->position_value_var, nir_imm_vec4(b, 0.0f, 0.0f, 0.0f, 1.0f), 0xfu);
|
||||
|
||||
/* Now reinsert a clone of the shader code */
|
||||
struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
|
||||
nir_cf_list_clone_and_reinsert(original_extracted_cf, &if_es_thread->cf_node, b->cursor, remap_table);
|
||||
_mesa_hash_table_destroy(remap_table, NULL);
|
||||
/* Now reinsert the shader code. */
|
||||
nir_cf_reinsert(original_extracted_cf, b->cursor);
|
||||
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
|
||||
|
||||
/* Remember the current thread's shader arguments */
|
||||
|
|
@ -1651,9 +1670,16 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
|
|||
save_reusable_variables(b, &state);
|
||||
}
|
||||
|
||||
nir_cf_list extracted;
|
||||
nir_cf_extract(&extracted, nir_before_impl(impl),
|
||||
nir_cf_list *extracted = rzalloc(shader, nir_cf_list);
|
||||
nir_cf_extract(extracted, nir_before_impl(impl),
|
||||
nir_after_impl(impl));
|
||||
nir_cf_list *non_deferred_cf = NULL;
|
||||
|
||||
if (options->can_cull) {
|
||||
non_deferred_cf = extracted;
|
||||
extracted = prepare_shader_for_culling(shader, impl, extracted, &state);
|
||||
}
|
||||
|
||||
b->cursor = nir_before_impl(impl);
|
||||
|
||||
ngg_nogs_init_vertex_indices_vars(b, impl, &state);
|
||||
|
|
@ -1687,7 +1713,9 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
|
|||
else
|
||||
nir_store_var(b, prim_exp_arg_var, emit_ngg_nogs_prim_exp_arg(b, &state), 0x1u);
|
||||
} else {
|
||||
add_deferred_attribute_culling(b, &extracted, &state);
|
||||
add_deferred_attribute_culling(b, non_deferred_cf, &state);
|
||||
|
||||
ralloc_free(non_deferred_cf);
|
||||
b->cursor = nir_after_impl(impl);
|
||||
|
||||
if (state.early_prim_export)
|
||||
|
|
@ -1736,7 +1764,8 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
|
|||
nir_if *if_es_thread = nir_push_if(b, es_thread);
|
||||
{
|
||||
/* Run the actual shader */
|
||||
nir_cf_reinsert(&extracted, b->cursor);
|
||||
nir_cf_reinsert(extracted, b->cursor);
|
||||
ralloc_free(extracted);
|
||||
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
|
||||
|
||||
if (options->export_primitive_id)
|
||||
|
|
@ -1744,14 +1773,6 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
|
|||
}
|
||||
nir_pop_if(b, if_es_thread);
|
||||
|
||||
if (options->can_cull) {
|
||||
/* Replace uniforms. */
|
||||
apply_reusable_variables(b, &state);
|
||||
|
||||
/* Reuse the position value calculated in the non-deferred shader part. */
|
||||
apply_repacked_pos_outputs(shader, &state);
|
||||
}
|
||||
|
||||
/* Gather outputs data and types */
|
||||
ngg_nogs_gather_outputs(b, &if_es_thread->then_list, &state);
|
||||
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue