mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-22 04:10:40 +01:00
ac/nir: add an option to pack clip/cull distance components to remove holes
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35351>
This commit is contained in:
parent
6cd813810e
commit
39a9dce5fc
11 changed files with 31 additions and 7 deletions
|
|
@ -167,6 +167,12 @@ typedef struct {
|
|||
unsigned wave_size;
|
||||
uint8_t clip_cull_dist_mask;
|
||||
bool write_pos_to_clipvertex;
|
||||
/* Remove clip/cull distance components that are missing in clip_cull_dist_mask, improving
|
||||
* throughput by up to 50% (3 pos exports -> 2 pos exports). The caller shouldn't set no-op
|
||||
* components (>= 0) in clip_cull_dist_mask to remove those completely. No-op components
|
||||
* should be determined by nir_opt_clip_cull_const before this.
|
||||
*/
|
||||
bool pack_clip_cull_distances;
|
||||
const uint8_t *vs_output_param_offset; /* GFX11+ */
|
||||
bool has_param_exports;
|
||||
bool can_cull;
|
||||
|
|
@ -258,6 +264,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
|
|||
enum amd_gfx_level gfx_level,
|
||||
uint32_t clip_cull_mask,
|
||||
bool write_pos_to_clipvertex,
|
||||
bool pack_clip_cull_distances,
|
||||
const uint8_t *param_offsets,
|
||||
bool has_param_exports,
|
||||
bool disable_streamout,
|
||||
|
|
@ -271,6 +278,7 @@ ac_nir_lower_legacy_vs(nir_shader *nir,
|
|||
enum amd_gfx_level gfx_level,
|
||||
uint32_t clip_cull_mask,
|
||||
bool write_pos_to_clipvertex,
|
||||
bool pack_clip_cull_distances,
|
||||
const uint8_t *param_offsets,
|
||||
bool has_param_exports,
|
||||
bool export_primitive_id,
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
|
|||
enum amd_gfx_level gfx_level,
|
||||
uint32_t clip_cull_mask,
|
||||
bool write_pos_to_clipvertex,
|
||||
bool pack_clip_cull_distances,
|
||||
const uint8_t *param_offsets,
|
||||
bool has_param_exports,
|
||||
bool disable_streamout,
|
||||
|
|
@ -119,7 +120,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
|
|||
if (kill_layer)
|
||||
export_outputs &= ~VARYING_BIT_LAYER;
|
||||
|
||||
ac_nir_export_position(&b, gfx_level, clip_cull_mask, write_pos_to_clipvertex,
|
||||
ac_nir_export_position(&b, gfx_level, clip_cull_mask, write_pos_to_clipvertex, pack_clip_cull_distances,
|
||||
!has_param_exports, force_vrs, export_outputs, &out, NULL);
|
||||
|
||||
if (has_param_exports) {
|
||||
|
|
|
|||
|
|
@ -111,6 +111,7 @@ ac_nir_export_position(nir_builder *b,
|
|||
enum amd_gfx_level gfx_level,
|
||||
uint32_t clip_cull_mask,
|
||||
bool write_pos_to_clipvertex,
|
||||
bool pack_clip_cull_distances,
|
||||
bool no_param_export,
|
||||
bool force_vrs,
|
||||
uint64_t outputs_written,
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ ac_nir_lower_legacy_vs(nir_shader *nir,
|
|||
enum amd_gfx_level gfx_level,
|
||||
uint32_t clip_cull_mask,
|
||||
bool write_pos_to_clipvertex,
|
||||
bool pack_clip_cull_distances,
|
||||
const uint8_t *param_offsets,
|
||||
bool has_param_exports,
|
||||
bool export_primitive_id,
|
||||
|
|
@ -77,7 +78,7 @@ ac_nir_lower_legacy_vs(nir_shader *nir,
|
|||
if (kill_layer)
|
||||
export_outputs &= ~VARYING_BIT_LAYER;
|
||||
|
||||
ac_nir_export_position(&b, gfx_level, clip_cull_mask, write_pos_to_clipvertex,
|
||||
ac_nir_export_position(&b, gfx_level, clip_cull_mask, write_pos_to_clipvertex, pack_clip_cull_distances,
|
||||
!has_param_exports, force_vrs, export_outputs, &out, NULL);
|
||||
|
||||
if (has_param_exports) {
|
||||
|
|
|
|||
|
|
@ -1788,6 +1788,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
|
|||
ac_nir_export_position(b, options->hw_info->gfx_level,
|
||||
options->clip_cull_dist_mask,
|
||||
options->write_pos_to_clipvertex,
|
||||
options->pack_clip_cull_distances,
|
||||
!options->has_param_exports,
|
||||
options->force_vrs,
|
||||
export_outputs, &state.out, NULL);
|
||||
|
|
|
|||
|
|
@ -500,6 +500,7 @@ ngg_gs_emit_output(nir_builder *b, nir_def *max_num_out_vtx, nir_def *max_num_ou
|
|||
ac_nir_export_position(b, s->options->hw_info->gfx_level,
|
||||
s->options->clip_cull_dist_mask,
|
||||
s->options->write_pos_to_clipvertex,
|
||||
s->options->pack_clip_cull_distances,
|
||||
!s->options->has_param_exports,
|
||||
s->options->force_vrs,
|
||||
export_outputs, &s->out, NULL);
|
||||
|
|
|
|||
|
|
@ -887,7 +887,7 @@ emit_ms_vertex(nir_builder *b, nir_def *index, nir_def *row, bool exports, bool
|
|||
ms_emit_arrayed_outputs(b, index, per_vertex_outputs, s);
|
||||
|
||||
if (exports) {
|
||||
ac_nir_export_position(b, s->hw_info->gfx_level, s->clipdist_enable_mask, false,
|
||||
ac_nir_export_position(b, s->hw_info->gfx_level, s->clipdist_enable_mask, false, false,
|
||||
!s->has_param_exports, false,
|
||||
s->per_vertex_outputs | VARYING_BIT_POS, &s->out, row);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -229,6 +229,7 @@ ac_nir_export_position(nir_builder *b,
|
|||
enum amd_gfx_level gfx_level,
|
||||
uint32_t clip_cull_mask,
|
||||
bool write_pos_to_clipvertex,
|
||||
bool pack_clip_cull_distances,
|
||||
bool no_param_export,
|
||||
bool force_vrs,
|
||||
uint64_t outputs_written,
|
||||
|
|
@ -280,6 +281,16 @@ ac_nir_export_position(nir_builder *b,
|
|||
}
|
||||
}
|
||||
|
||||
/* If clip/cull distances are sparsely populated or some components are >= 0, pack them. */
|
||||
if (pack_clip_cull_distances) {
|
||||
unsigned num = 0;
|
||||
|
||||
u_foreach_bit(i, clip_cull_mask) {
|
||||
clip_dist[num++] = clip_dist[i];
|
||||
}
|
||||
clip_cull_mask = BITFIELD_MASK(num);
|
||||
}
|
||||
|
||||
if (outputs_written & VARYING_BIT_POS) {
|
||||
/* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
|
||||
* Setting valid_mask=1 prevents it and has no other effect.
|
||||
|
|
|
|||
|
|
@ -444,7 +444,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
} else if (is_last_vgt_stage) {
|
||||
if (stage->stage != MESA_SHADER_GEOMETRY) {
|
||||
NIR_PASS(_, stage->nir, ac_nir_lower_legacy_vs, gfx_level,
|
||||
stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask, false,
|
||||
stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask, false, false,
|
||||
stage->info.outinfo.vs_output_param_offset, stage->info.outinfo.param_exports,
|
||||
stage->info.outinfo.export_prim_id, false, false, false, stage->info.force_vrs_per_vertex);
|
||||
|
||||
|
|
|
|||
|
|
@ -2272,7 +2272,7 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache
|
|||
};
|
||||
nir_shader *nir = ac_nir_create_gs_copy_shader(
|
||||
gs_stage->nir, pdev->info.gfx_level, gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask, false,
|
||||
gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false, false,
|
||||
false, gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false, false,
|
||||
gs_info->force_vrs_per_vertex, &output_info);
|
||||
|
||||
nir->info.internal = true;
|
||||
|
|
|
|||
|
|
@ -1601,7 +1601,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
|
|||
NIR_PASS_V(nir, ac_nir_lower_legacy_vs,
|
||||
sel->screen->info.gfx_level,
|
||||
clip_cull_mask,
|
||||
false,
|
||||
false, false,
|
||||
ctx->temp_info.vs_output_param_offset,
|
||||
shader->info.nr_param_exports,
|
||||
shader->key.ge.mono.u.vs_export_prim_id,
|
||||
|
|
@ -1919,7 +1919,7 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen,
|
|||
ac_nir_create_gs_copy_shader(gs_nir,
|
||||
sscreen->info.gfx_level,
|
||||
clip_cull_mask,
|
||||
false,
|
||||
false, false,
|
||||
temp_info->vs_output_param_offset,
|
||||
shader->info.nr_param_exports,
|
||||
!gs_shader->info.num_streamout_vec4s,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue