diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h index a532db6e289..068e6be1576 100644 --- a/src/amd/common/ac_nir.h +++ b/src/amd/common/ac_nir.h @@ -115,6 +115,7 @@ ac_nir_lower_indirect_derefs(nir_shader *shader, void ac_nir_lower_ngg_nogs(nir_shader *shader, + enum radeon_family family, unsigned max_num_es_vertices, unsigned num_vertices_per_primitive, unsigned max_workgroup_size, diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index 9c971975ca2..7aa7544999d 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -1359,6 +1359,7 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c void ac_nir_lower_ngg_nogs(nir_shader *shader, + enum radeon_family family, unsigned max_num_es_vertices, unsigned num_vertices_per_primitives, unsigned max_workgroup_size, @@ -1422,14 +1423,17 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, ngg_nogs_init_vertex_indices_vars(b, impl, &state); if (!can_cull) { - /* Allocate export space on wave 0 - confirm to the HW that we want to use all possible space */ - nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_load_subgroup_id(b), nir_imm_int(b, 0))); - { - nir_ssa_def *vtx_cnt = nir_load_workgroup_num_input_vertices_amd(b); - nir_ssa_def *prim_cnt = nir_load_workgroup_num_input_primitives_amd(b); - nir_alloc_vertices_and_primitives_amd(b, vtx_cnt, prim_cnt); + /* Newer chips can use PRIMGEN_PASSTHRU_NO_MSG to skip gs_alloc_req for NGG passthrough. */ + if (!(passthrough && family >= CHIP_NAVI23)) { + /* Allocate export space on wave 0 - confirm to the HW that we want to use all possible space */ + nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_load_subgroup_id(b), nir_imm_int(b, 0))); + { + nir_ssa_def *vtx_cnt = nir_load_workgroup_num_input_vertices_amd(b); + nir_ssa_def *prim_cnt = nir_load_workgroup_num_input_primitives_amd(b); + nir_alloc_vertices_and_primitives_amd(b, vtx_cnt, prim_cnt); + } + nir_pop_if(b, if_wave_0); } - nir_pop_if(b, if_wave_0); /* Take care of early primitive export, otherwise just pack the primitive export argument */ if (state.early_prim_export) diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index a542af035ba..c7de99d9376 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -1278,8 +1278,10 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx) /* TODO: primitive culling */ - ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx), ngg_get_vtx_cnt(ctx), - ngg_get_prim_cnt(ctx)); + /* Newer chips can use PRIMGEN_PASSTHRU_NO_MSG to skip gs_alloc_req for NGG passthrough. */ + if (!(ctx->shader_info->is_ngg_passthrough && ctx->ac.family >= CHIP_NAVI23)) + ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx), ngg_get_vtx_cnt(ctx), + ngg_get_prim_cnt(ctx)); /* TODO: streamout queries */ /* Export primitive data to the index buffer. diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 31504441a73..df50a56047e 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -6524,8 +6524,11 @@ radv_pipeline_emit_vgt_shader_config(struct radeon_cmdbuf *ctx_cs, stages |= S_028B54_PRIMGEN_EN(1); if (pipeline->streamout_shader) stages |= S_028B54_NGG_WAVE_ID_EN(1); - if (radv_pipeline_has_ngg_passthrough(pipeline)) + if (radv_pipeline_has_ngg_passthrough(pipeline)) { stages |= S_028B54_PRIMGEN_PASSTHRU_EN(1); + if (pdevice->rad_info.family >= CHIP_NAVI23) + stages |= S_028B54_PRIMGEN_PASSTHRU_NO_MSG(1); + } } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); } diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index fe22ae372a7..0a299b94607 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1280,7 +1280,9 @@ void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_ export_prim_id = info->tes.outinfo.export_prim_id; } - NIR_PASS_V(nir, ac_nir_lower_ngg_nogs, max_vtx_in, num_vertices_per_prim, + NIR_PASS_V(nir, ac_nir_lower_ngg_nogs, + device->physical_device->rad_info.family, + max_vtx_in, num_vertices_per_prim, info->workgroup_size, info->wave_size, info->has_ngg_culling, info->has_ngg_early_prim_export, info->is_ngg_passthrough, export_prim_id, pl_key->vs.provoking_vtx_last, false, pl_key->primitives_generated_query,