mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 02:28:10 +02:00
ac/nir/ngg: Pass radeon_info to mesh shader lowering.
Same idea as the VS/TES and GS lowering: Make shader compilation decisions based on the features of the current GPU instead of ad-hoc deciding according to GFX level. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33218>
This commit is contained in:
parent
b8204c8df9
commit
f7305f776e
3 changed files with 33 additions and 32 deletions
|
|
@ -183,16 +183,16 @@ ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options)
|
|||
|
||||
void
|
||||
ac_nir_lower_ngg_mesh(nir_shader *shader,
|
||||
enum amd_gfx_level gfx_level,
|
||||
uint32_t clipdist_enable_mask,
|
||||
const uint8_t *vs_output_param_offset,
|
||||
bool has_param_exports,
|
||||
bool *out_needs_scratch_ring,
|
||||
unsigned wave_size,
|
||||
unsigned workgroup_size,
|
||||
bool multiview,
|
||||
bool has_query,
|
||||
bool fast_launch_2);
|
||||
const struct radeon_info *hw_info,
|
||||
uint32_t clipdist_enable_mask,
|
||||
const uint8_t *vs_output_param_offset,
|
||||
bool has_param_exports,
|
||||
bool *out_needs_scratch_ring,
|
||||
unsigned wave_size,
|
||||
unsigned workgroup_size,
|
||||
bool multiview,
|
||||
bool has_query,
|
||||
bool fast_launch_2);
|
||||
|
||||
void
|
||||
ac_nir_lower_task_outputs_to_mem(nir_shader *shader,
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include "ac_nir.h"
|
||||
#include "ac_nir_helpers.h"
|
||||
#include "ac_gpu_info.h"
|
||||
|
||||
#include "nir_builder.h"
|
||||
|
||||
|
|
@ -89,7 +90,7 @@ typedef struct
|
|||
|
||||
typedef struct
|
||||
{
|
||||
enum amd_gfx_level gfx_level;
|
||||
const struct radeon_info *hw_info;
|
||||
bool fast_launch_2;
|
||||
bool vert_multirow_export;
|
||||
bool prim_multirow_export;
|
||||
|
|
@ -799,7 +800,7 @@ ms_prim_exp_arg_ch1(nir_builder *b, nir_def *invocation_index, nir_def *num_vtx,
|
|||
indices[i] = nir_umin(b, indices[i], max_vtx_idx);
|
||||
}
|
||||
|
||||
return ac_nir_pack_ngg_prim_exp_arg(b, s->vertices_per_prim, indices, cull_flag, s->gfx_level);
|
||||
return ac_nir_pack_ngg_prim_exp_arg(b, s->vertices_per_prim, indices, cull_flag, s->hw_info->gfx_level);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
|
|
@ -826,7 +827,7 @@ ms_prim_exp_arg_ch2(nir_builder *b, uint64_t outputs_mask, lower_ngg_ms_state *s
|
|||
|
||||
if (outputs_mask & VARYING_BIT_LAYER) {
|
||||
nir_def *layer =
|
||||
nir_ishl_imm(b, s->out.outputs[VARYING_SLOT_LAYER][0], s->gfx_level >= GFX11 ? 0 : 17);
|
||||
nir_ishl_imm(b, s->out.outputs[VARYING_SLOT_LAYER][0], s->hw_info->gfx_level >= GFX11 ? 0 : 17);
|
||||
prim_exp_arg_ch2 = nir_ior(b, prim_exp_arg_ch2, layer);
|
||||
}
|
||||
|
||||
|
|
@ -890,7 +891,7 @@ emit_ms_vertex(nir_builder *b, nir_def *index, nir_def *row, bool exports, bool
|
|||
ms_emit_arrayed_outputs(b, index, per_vertex_outputs, s);
|
||||
|
||||
if (exports) {
|
||||
ac_nir_export_position(b, s->gfx_level, s->clipdist_enable_mask,
|
||||
ac_nir_export_position(b, s->hw_info->gfx_level, s->clipdist_enable_mask,
|
||||
!s->has_param_exports, false, true,
|
||||
s->per_vertex_outputs | VARYING_BIT_POS, &s->out, row);
|
||||
}
|
||||
|
|
@ -899,12 +900,12 @@ emit_ms_vertex(nir_builder *b, nir_def *index, nir_def *row, bool exports, bool
|
|||
/* Export generic attributes on GFX10.3
|
||||
* (On GFX11 they are already stored in the attribute ring.)
|
||||
*/
|
||||
if (s->has_param_exports && s->gfx_level == GFX10_3) {
|
||||
if (s->has_param_exports && s->hw_info->gfx_level == GFX10_3) {
|
||||
ac_nir_export_parameters(b, s->vs_output_param_offset, per_vertex_outputs, 0, &s->out);
|
||||
}
|
||||
|
||||
/* GFX11+: also store special outputs to the attribute ring so PS can load them. */
|
||||
if (s->gfx_level >= GFX11 && (per_vertex_outputs & MS_VERT_ARG_EXP_MASK))
|
||||
if (s->hw_info->gfx_level >= GFX11 && (per_vertex_outputs & MS_VERT_ARG_EXP_MASK))
|
||||
ms_emit_attribute_ring_output_stores(b, per_vertex_outputs & MS_VERT_ARG_EXP_MASK, index, s);
|
||||
}
|
||||
}
|
||||
|
|
@ -937,12 +938,12 @@ emit_ms_primitive(nir_builder *b, nir_def *index, nir_def *row, bool exports, bo
|
|||
/* Export generic attributes on GFX10.3
|
||||
* (On GFX11 they are already stored in the attribute ring.)
|
||||
*/
|
||||
if (s->has_param_exports && s->gfx_level == GFX10_3) {
|
||||
if (s->has_param_exports && s->hw_info->gfx_level == GFX10_3) {
|
||||
ac_nir_export_parameters(b, s->vs_output_param_offset, per_primitive_outputs, 0, &s->out);
|
||||
}
|
||||
|
||||
/* GFX11+: also store special outputs to the attribute ring so PS can load them. */
|
||||
if (s->gfx_level >= GFX11)
|
||||
if (s->hw_info->gfx_level >= GFX11)
|
||||
ms_emit_attribute_ring_output_stores(b, per_primitive_outputs & MS_PRIM_ARG_EXP_MASK, index, s);
|
||||
}
|
||||
}
|
||||
|
|
@ -1045,7 +1046,7 @@ emit_ms_finale(nir_builder *b, lower_ngg_ms_state *s)
|
|||
(per_vertex_outputs & MS_VERT_ARG_EXP_MASK) ||
|
||||
(per_primitive_outputs & MS_PRIM_ARG_EXP_MASK);
|
||||
|
||||
const bool wait_attr_ring = must_wait_attr_ring(s->gfx_level, has_special_param_exports);
|
||||
const bool wait_attr_ring = must_wait_attr_ring(s->hw_info->gfx_level, has_special_param_exports);
|
||||
|
||||
/* Export vertices. */
|
||||
if ((per_vertex_outputs & ~VARYING_BIT_POS) || !wait_attr_ring) {
|
||||
|
|
@ -1350,16 +1351,16 @@ ms_calculate_output_layout(enum amd_gfx_level gfx_level, unsigned api_shared_siz
|
|||
|
||||
void
|
||||
ac_nir_lower_ngg_mesh(nir_shader *shader,
|
||||
enum amd_gfx_level gfx_level,
|
||||
uint32_t clipdist_enable_mask,
|
||||
const uint8_t *vs_output_param_offset,
|
||||
bool has_param_exports,
|
||||
bool *out_needs_scratch_ring,
|
||||
unsigned wave_size,
|
||||
unsigned hw_workgroup_size,
|
||||
bool multiview,
|
||||
bool has_query,
|
||||
bool fast_launch_2)
|
||||
const struct radeon_info *hw_info,
|
||||
uint32_t clipdist_enable_mask,
|
||||
const uint8_t *vs_output_param_offset,
|
||||
bool has_param_exports,
|
||||
bool *out_needs_scratch_ring,
|
||||
unsigned wave_size,
|
||||
unsigned hw_workgroup_size,
|
||||
bool multiview,
|
||||
bool has_query,
|
||||
bool fast_launch_2)
|
||||
{
|
||||
unsigned vertices_per_prim =
|
||||
mesa_vertices_per_prim(shader->info.mesh.primitive_type);
|
||||
|
|
@ -1379,7 +1380,7 @@ ac_nir_lower_ngg_mesh(nir_shader *shader,
|
|||
unsigned max_primitives = shader->info.mesh.max_primitives_out;
|
||||
|
||||
ms_out_mem_layout layout = ms_calculate_output_layout(
|
||||
gfx_level, shader->info.shared_size, per_vertex_outputs, per_primitive_outputs,
|
||||
hw_info->gfx_level, shader->info.shared_size, per_vertex_outputs, per_primitive_outputs,
|
||||
cross_invocation_access, max_vertices, max_primitives, vertices_per_prim);
|
||||
|
||||
shader->info.shared_size = layout.lds.total_size;
|
||||
|
|
@ -1406,7 +1407,7 @@ ac_nir_lower_ngg_mesh(nir_shader *shader,
|
|||
.hw_workgroup_size = hw_workgroup_size,
|
||||
.insert_layer_output = multiview && !(shader->info.outputs_written & VARYING_BIT_LAYER),
|
||||
.uses_cull_flags = uses_cull,
|
||||
.gfx_level = gfx_level,
|
||||
.hw_info = hw_info,
|
||||
.fast_launch_2 = fast_launch_2,
|
||||
.vert_multirow_export = fast_launch_2 && max_vertices > hw_workgroup_size,
|
||||
.prim_multirow_export = fast_launch_2 && max_primitives > hw_workgroup_size,
|
||||
|
|
|
|||
|
|
@ -802,7 +802,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
|
|||
unsigned hw_workgroup_size = ALIGN(info->workgroup_size, info->wave_size);
|
||||
|
||||
bool scratch_ring = false;
|
||||
NIR_PASS_V(nir, ac_nir_lower_ngg_mesh, pdev->info.gfx_level, options.clip_cull_dist_mask,
|
||||
NIR_PASS_V(nir, ac_nir_lower_ngg_mesh, &pdev->info, options.clip_cull_dist_mask,
|
||||
options.vs_output_param_offset, options.has_param_exports, &scratch_ring, info->wave_size,
|
||||
hw_workgroup_size, gfx_state->has_multiview_view_index, info->ms.has_query, pdev->mesh_fast_launch_2);
|
||||
ngg_stage->info.ms.needs_ms_scratch_ring = scratch_ring;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue