From df3de4acbbf63a37ce0376c4f9ae2e421b4c4895 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 27 Apr 2026 14:47:31 +0200 Subject: [PATCH] ac,radv,radeonsi: replace mesh_fast_launch_2 by gfx_level checks Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_gpu_info.c | 5 ----- src/amd/common/ac_gpu_info.h | 4 ---- src/amd/common/nir/ac_nir_lower_ngg_mesh.c | 4 ++-- src/amd/vulkan/radv_cmd_buffer.c | 12 ++++++------ src/amd/vulkan/radv_device.c | 1 - src/amd/vulkan/radv_dgc.c | 8 ++++---- src/amd/vulkan/radv_pipeline_graphics.c | 2 +- src/amd/vulkan/radv_shader.c | 8 ++++---- src/amd/vulkan/radv_shader.h | 1 - src/amd/vulkan/radv_shader_args.c | 4 ++-- src/amd/vulkan/radv_shader_info.c | 2 +- src/gallium/drivers/radeonsi/si_mesh_shader.c | 6 +++--- src/gallium/drivers/radeonsi/si_shader.c | 4 ++-- src/gallium/drivers/radeonsi/si_shader_args.c | 2 +- src/gallium/drivers/radeonsi/si_state_shaders.cpp | 6 +++--- 15 files changed, 29 insertions(+), 40 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 9eb1e6ba19e..e4f86cd2d6c 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -342,8 +342,6 @@ ac_fill_compiler_info(struct radeon_info *info, const struct drm_amdgpu_info_dev out->has_attr_ring = info->gfx_level >= GFX11; - out->mesh_fast_launch_2 = info->mesh_fast_launch_2; - /* When distributed tessellation is unsupported, switch between SEs * at a higher frequency to manually balance the workload between SEs. */ @@ -1098,8 +1096,6 @@ void ac_fill_feature_info(struct radeon_info *info, const struct drm_amdgpu_info info->has_image_opcodes = debug_get_bool_option("AMD_IMAGE_OPCODES", info->has_graphics || info->family < CHIP_GFX940); - info->mesh_fast_launch_2 = info->gfx_level >= GFX11; - /* WARNING: Register shadowing decreases performance by up to 50% on GFX11 with current FW. */ info->has_kernelq_reg_shadowing = device_info->ids_flags & AMDGPU_IDS_FLAGS_PREEMPTION && info->gfx_level < GFX11 && @@ -1892,7 +1888,6 @@ void ac_print_gpu_info(FILE *f, const struct radeon_info *info, int fd) fprintf(f, " has_set_sh_pairs = %i\n", info->has_set_sh_pairs); fprintf(f, " has_set_sh_pairs_packed = %i\n", info->has_set_sh_pairs_packed); fprintf(f, " has_set_uconfig_pairs = %i\n", info->has_set_uconfig_pairs); - fprintf(f, " mesh_fast_launch_2 = %i\n", info->mesh_fast_launch_2); if (info->gfx_level < GFX12) { fprintf(f, "Display features:\n"); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 3d27e8ef860..a998006d1e7 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -172,7 +172,6 @@ struct ac_compiler_info { uint32_t conformant_trunc_coord : 1; uint32_t has_attr_ring : 1; - uint32_t mesh_fast_launch_2 : 1; /* GFX6-7: limit TCS workgroup to 16 patches for better performance. */ uint32_t smaller_tcs_workgroups : 1; @@ -294,9 +293,6 @@ struct radeon_info { * the LLVM version doesn't work with multiparts shaders. */ - /* Support GS_FAST_LAUNCH(2) for mesh shaders. */ - bool mesh_fast_launch_2; - /* Display features. */ /* There are 2 display DCC codepaths, because display expects unaligned DCC. */ /* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */ diff --git a/src/amd/common/nir/ac_nir_lower_ngg_mesh.c b/src/amd/common/nir/ac_nir_lower_ngg_mesh.c index c3001c4f9b6..152274937f4 100644 --- a/src/amd/common/nir/ac_nir_lower_ngg_mesh.c +++ b/src/amd/common/nir/ac_nir_lower_ngg_mesh.c @@ -1004,7 +1004,7 @@ emit_ms_finale(nir_builder *b, lower_ngg_ms_state *s) ms_prim_gen_query(b, invocation_index, num_prm, s); nir_def *row_start = NULL; - if (s->ac->mesh_fast_launch_2) + if (s->ac->gfx_level >= GFX11) row_start = s->hw_workgroup_size <= s->wave_size ? nir_imm_int(b, 0) : nir_load_subgroup_id(b); /* Load vertex/primitive attributes from shared memory and @@ -1369,7 +1369,7 @@ ac_nir_lower_ngg_mesh(nir_shader *shader, const ac_nir_lower_ngg_options *option shader->info.workgroup_size[1] * shader->info.workgroup_size[2]; - bool fast_launch_2 = options->compiler_info->mesh_fast_launch_2; + bool fast_launch_2 = options->compiler_info->gfx_level >= GFX11; unsigned hw_workgroup_size = options->max_workgroup_size; lower_ngg_ms_state state = { diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index a92dade5d58..d8dd3116e58 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -3456,7 +3456,7 @@ radv_emit_mesh_shader(struct radv_cmd_buffer *cmd_buffer) radeon_set_uconfig_reg_idx(&pdev->info, R_030908_VGT_PRIMITIVE_TYPE, 1, V_008958_DI_PT_POINTLIST); radeon_end(); - if (pdev->info.mesh_fast_launch_2) + if (pdev->info.gfx_level >= GFX11) radv_gfx11_emit_meshlet(cmd_buffer, ms); radv_emit_vgt_gs_out(cmd_buffer, gs_out); @@ -3771,7 +3771,7 @@ radv_emit_vgt_shader_config_gfx6(struct radv_cmd_buffer *cmd_buffer, const struc stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1); } else if (key->mesh) { assert(!key->ngg_passthrough); - unsigned gs_fast_launch = pdev->info.mesh_fast_launch_2 ? 2 : 1; + unsigned gs_fast_launch = pdev->info.gfx_level >= GFX11 ? 2 : 1; stages |= S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(gs_fast_launch) | S_028B54_NGG_WAVE_ID_EN(key->ngg_wave_id_en); } else if (key->ngg) { @@ -8715,7 +8715,7 @@ radv_bind_pre_rast_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER; if (pdev->info.gfx_level >= GFX11 && pdev->info.gfx_level < GFX12) { - /* GFX11-11.5 need GDS OA for streamout. */ + /* GFX11-11.7 need GDS OA for streamout. */ cmd_buffer->queue_state.gds_oa_needed = true; } } @@ -10924,7 +10924,7 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3 uint32_t draw_id_enable = !!cmd_buffer->state.uses_drawid; uint32_t draw_id_reg = !draw_id_enable ? 0 : (base_reg + (xyz_dim_enable ? 12 : 0) - SI_SH_REG_OFFSET) >> 2; - uint32_t mode1_enable = !pdev->info.mesh_fast_launch_2; + uint32_t mode1_enable = pdev->info.gfx_level < GFX11; radeon_begin(cs); radeon_emit(PKT3(PKT3_DISPATCH_MESH_INDIRECT_MULTI, 7, predicating) | PKT3_RESET_FILTER_CAM_S(1)); @@ -11015,7 +11015,7 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(const struct radv_device *device, cons uint32_t xyz_dim_en = mesh_shader->info.cs.uses_grid_size; uint32_t xyz_dim_reg = !xyz_dim_en ? 0 : (cmd_state->vtx_base_sgpr - SI_SH_REG_OFFSET) >> 2; - uint32_t mode1_en = !pdev->info.mesh_fast_launch_2; + uint32_t mode1_en = pdev->info.gfx_level < GFX11; uint32_t linear_dispatch_en = cmd_state->shaders[MESA_SHADER_TASK]->info.cs.linear_taskmesh_dispatch; const bool sqtt_en = !!device->sqtt.bo; @@ -11354,7 +11354,7 @@ radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x radv_emit_userdata_mesh(cmd_buffer, x, y, z); - if (pdev->info.mesh_fast_launch_2) { + if (pdev->info.gfx_level >= GFX11) { if (!view_mask) { radv_cs_emit_mesh_dispatch_packet(cmd_buffer, x, y, z); } else { diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 068e4765171..0e43317b7e7 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1146,7 +1146,6 @@ radv_device_init_compiler_info(struct radv_device *device) .family = pdev->info.family, .address32_hi = pdev->info.address32_hi, .rbplus_allowed = pdev->info.rbplus_allowed, - .mesh_fast_launch_2 = pdev->info.mesh_fast_launch_2, .has_cs_regalloc_hang_bug = pdev->info.has_cs_regalloc_hang_bug, .lds_size_per_workgroup = pdev->info.lds_size_per_workgroup, }, diff --git a/src/amd/vulkan/radv_dgc.c b/src/amd/vulkan/radv_dgc.c index 1c743dfd874..303dd9b1330 100644 --- a/src/amd/vulkan/radv_dgc.c +++ b/src/amd/vulkan/radv_dgc.c @@ -384,7 +384,7 @@ radv_get_sequence_size_graphics(const struct radv_indirect_command_layout *layou *ace_cmd_size += 6 * 4; } else { /* userdata writes + instance count + non-indexed draw */ - *cmd_size += (6 + 2 + (pdev->info.mesh_fast_launch_2 ? 5 : 3)) * 4; + *cmd_size += (6 + 2 + (pdev->info.gfx_level >= GFX11 ? 5 : 3)) * 4; } } else { /* userdata writes + instance count + non-indexed draw */ @@ -2272,7 +2272,7 @@ dgc_emit_dispatch_taskmesh_gfx(struct dgc_cmdbuf *cs, nir_def *sequence_id) nir_def *ring_entry_reg = load_param16(b, mesh_ring_entry_sgpr); nir_def *xyz_dim_enable = nir_bcsel(b, has_grid_size, nir_imm_int(b, S_4D1_XYZ_DIM_ENABLE(1)), nir_imm_int(b, 0)); - nir_def *mode1_enable = nir_imm_int(b, S_4D1_MODE1_ENABLE(!pdev->info.mesh_fast_launch_2)); + nir_def *mode1_enable = nir_imm_int(b, S_4D1_MODE1_ENABLE((pdev->info.gfx_level < GFX11))); nir_def *linear_dispatch_en = nir_bcsel(b, has_linear_dispatch_en, nir_imm_int(b, S_4D1_LINEAR_DISPATCH_ENABLE(1)), nir_imm_int(b, 0)); nir_def *sqtt_enable = nir_imm_int(b, device->sqtt.bo ? S_4D1_THREAD_TRACE_MARKER_ENABLE(1) : 0); @@ -2323,7 +2323,7 @@ dgc_emit_draw_mesh_tasks_gfx(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_de dgc_emit_userdata_mesh(cs, x, y, z, sequence_id); dgc_emit_instance_count(cs, nir_imm_int(b, 1)); - if (pdev->info.mesh_fast_launch_2) { + if (pdev->info.gfx_level >= GFX11) { dgc_emit_dispatch_mesh_direct(cs, x, y, z); } else { nir_def *vertex_count = nir_imul(b, x, nir_imul(b, y, z)); @@ -2391,7 +2391,7 @@ dgc_emit_draw_mesh_tasks_with_count_gfx(struct dgc_cmdbuf *cs, nir_def *stream_a if (pdev->info.gfx_level >= GFX11) { dgc_cs_emit(nir_ior_imm(b, nir_ior_imm(b, nir_ior(b, draw_index_enable, xyz_dim_enable), - S_4C2_MODE1_ENABLE(!pdev->info.mesh_fast_launch_2)), + S_4C2_MODE1_ENABLE((pdev->info.gfx_level < GFX11))), S_4C2_THREAD_TRACE_MARKER_ENABLE(sqtt_en))); } else { dgc_cs_emit(nir_ior_imm(b, draw_index_enable, S_4C2_THREAD_TRACE_MARKER_ENABLE(sqtt_en))); diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 73b29439551..a707af40ae7 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -2438,7 +2438,7 @@ radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, st active_nir_stages |= mesa_to_vk_shader_stage(i); } - if (!compiler_info->hw.mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir && + if (compiler_info->ac->gfx_level < GFX11 && stages[MESA_SHADER_MESH].nir && BITSET_TEST(stages[MESA_SHADER_MESH].nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID)) { nir_shader *mesh = stages[MESA_SHADER_MESH].nir; nir_shader *task = stages[MESA_SHADER_TASK].nir; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 196de6e5298..ec3544da737 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -657,7 +657,7 @@ radv_shader_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct bool lower_local_invocation_index = false; if (nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_TASK || - (nir->info.stage == MESA_SHADER_MESH && compiler_info->hw.mesh_fast_launch_2)) { + (nir->info.stage == MESA_SHADER_MESH && compiler_info->ac->gfx_level >= GFX11)) { lower_local_invocation_index = nir->info.derivative_group == DERIVATIVE_GROUP_QUADS || (((nir->info.workgroup_size[0] == 1) + (nir->info.workgroup_size[1] == 1) + (nir->info.workgroup_size[2] == 1)) == 2); @@ -667,7 +667,7 @@ radv_shader_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct /* Mesh shaders run as NGG which can implement local_invocation_index from * the wave ID in merged_wave_info, but they don't have local_invocation_ids on GFX10.3. */ - .lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !compiler_info->hw.mesh_fast_launch_2, + .lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && compiler_info->ac->gfx_level < GFX11, .lower_local_invocation_index = lower_local_invocation_index, }; NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options); @@ -1881,7 +1881,7 @@ radv_precompute_registers_hw_ms(struct radv_device *device, struct radv_shader * radv_precompute_registers_hw_ngg(device, shader); - regs->vgt_gs_max_vert_out = pdev->info.mesh_fast_launch_2 ? info->ngg_info.max_out_verts : info->workgroup_size; + regs->vgt_gs_max_vert_out = pdev->info.gfx_level >= GFX11 ? info->ngg_info.max_out_verts : info->workgroup_size; regs->ngg.ms.spi_shader_gs_meshlet_dim = S_00B2B0_MESHLET_NUM_THREAD_X(info->cs.block_size[0] - 1) | S_00B2B0_MESHLET_NUM_THREAD_Y(info->cs.block_size[1] - 1) | @@ -2398,7 +2398,7 @@ radv_postprocess_binary_config(const struct radv_compiler_info *compiler_info, s UNREACHABLE("Unexpected ES shader stage"); } - if (stage == MESA_SHADER_MESH && compiler_info->hw.mesh_fast_launch_2) { + if (stage == MESA_SHADER_MESH && compiler_info->ac->gfx_level >= GFX11) { /* Only VGPR0 is used for X/Y/Z local invocation ID */ gs_vgpr_comp_cnt = 0; } else if (gfx_level >= GFX12) { diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index afc3ab4e650..1d6b800faad 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -518,7 +518,6 @@ struct radv_compiler_info { uint32_t family; uint32_t address32_hi; bool rbplus_allowed; - bool mesh_fast_launch_2; bool has_cs_regalloc_hang_bug; uint32_t lds_size_per_workgroup; } hw; diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index c5202d2ac75..2c2b394b3a6 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -322,7 +322,7 @@ declare_ms_input_sgprs(struct radv_shader_args_state *state, const struct radv_s static void declare_ms_input_vgprs(const struct radv_compiler_info *compiler_info, struct radv_shader_args_state *state) { - if (compiler_info->hw.mesh_fast_launch_2) { + if (compiler_info->ac->gfx_level >= GFX11) { RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.local_invocation_ids_packed); } else { RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.vertex_id); @@ -829,7 +829,7 @@ declare_shader_args(const struct radv_compiler_info *compiler_info, struct radv_ RADV_ADD_UD_ARG(state, 1, AC_ARG_VALUE, ngg_query_buf_va, AC_UD_NGG_QUERY_BUF_VA); } - if (previous_stage != MESA_SHADER_MESH || !compiler_info->hw.mesh_fast_launch_2) { + if (previous_stage != MESA_SHADER_MESH || compiler_info->ac->gfx_level < GFX11) { if (gfx_level >= GFX12) { RADV_ADD_ARRAY_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.gs_vtx_offset, 0); RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.gs_prim_id); diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 6bcfcd3ccda..913d4f5971f 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -764,7 +764,7 @@ calc_mesh_workgroup_size(const struct radv_compiler_info *compiler_info, const n { unsigned api_workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX); - if (compiler_info->hw.mesh_fast_launch_2) { + if (compiler_info->ac->gfx_level >= GFX11) { /* Use multi-row export. It is also necessary to use the API workgroup size for non-emulated queries. */ info->workgroup_size = api_workgroup_size; } else { diff --git a/src/gallium/drivers/radeonsi/si_mesh_shader.c b/src/gallium/drivers/radeonsi/si_mesh_shader.c index 24d9b5fc417..e17a87ab401 100644 --- a/src/gallium/drivers/radeonsi/si_mesh_shader.c +++ b/src/gallium/drivers/radeonsi/si_mesh_shader.c @@ -332,7 +332,7 @@ static void si_emit_draw_mesh_tasks_gfx_packets(struct si_context *sctx, radeon_emit(S_4D0_RING_ENTRY_REG(ring_entry_loc) | S_4D0_XYZ_DIM_REG(grid_size_loc)); if (sctx->gfx_level >= GFX11) radeon_emit(S_4D1_XYZ_DIM_ENABLE(uses_grid_size) | - S_4D1_MODE1_ENABLE(!sctx->screen->info.mesh_fast_launch_2) | + S_4D1_MODE1_ENABLE((sctx->screen->info.gfx_level < GFX11)) | S_4D1_LINEAR_DISPATCH_ENABLE(linear_taskmesh_dispatch)); else radeon_emit(0); @@ -420,7 +420,7 @@ static void si_emit_draw_mesh_shader_only_packets(struct si_context *sctx, radeon_emit(S_4C2_DRAW_INDEX_ENABLE(uses_draw_id) | S_4C2_COUNT_INDIRECT_ENABLE(!!count_va) | S_4C2_XYZ_DIM_ENABLE(uses_grid_size) | - S_4C2_MODE1_ENABLE(!sctx->screen->info.mesh_fast_launch_2)); + S_4C2_MODE1_ENABLE((sctx->screen->info.gfx_level < GFX11))); else radeon_emit(S_4C2_DRAW_INDEX_ENABLE(uses_draw_id) | S_4C2_COUNT_INDIRECT_ENABLE(!!count_va)); @@ -442,7 +442,7 @@ static void si_emit_draw_mesh_shader_only_packets(struct si_context *sctx, si_emit_buffered_gfx_sh_regs_for_mesh(sctx); radeon_begin_again(cs); - if (sctx->screen->info.mesh_fast_launch_2) { + if (sctx->screen->info.gfx_level >= GFX11) { radeon_emit(PKT3(PKT3_DISPATCH_MESH_DIRECT, 3, sctx->render_cond_enabled)); radeon_emit(info->grid[0]); radeon_emit(info->grid[1]); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index d0a324fcec3..c2449693c35 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -165,7 +165,7 @@ unsigned si_get_max_workgroup_size(const struct si_shader *shader) /* Without multi-row export, we need at least number of output vertex/primitive * threads in workgroup for export (one vertex/primitive per thread). */ - if (stage == MESA_SHADER_MESH && !sscreen->info.mesh_fast_launch_2) { + if (stage == MESA_SHADER_MESH && sscreen->info.gfx_level < GFX11) { max_work_group_size = MAX3(max_work_group_size, shader->selector->info.base.mesh.max_vertices_out, shader->selector->info.base.mesh.max_primitives_out); @@ -739,7 +739,7 @@ static void si_preprocess_nir(struct si_nir_shader_ctx *ctx) } } - if (nir->info.stage == MESA_SHADER_MESH && !sel->screen->info.mesh_fast_launch_2) { + if (nir->info.stage == MESA_SHADER_MESH && sel->screen->info.gfx_level < GFX11) { NIR_PASS(progress, nir, nir_lower_compute_system_values, &(nir_lower_compute_system_values_options){ /* Mesh shaders run as NGG which can implement local_invocation_index from diff --git a/src/gallium/drivers/radeonsi/si_shader_args.c b/src/gallium/drivers/radeonsi/si_shader_args.c index 4d0b8a0e670..067dffc0e79 100644 --- a/src/gallium/drivers/radeonsi/si_shader_args.c +++ b/src/gallium/drivers/radeonsi/si_shader_args.c @@ -635,7 +635,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args, ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->mesh_scratch_ring_addr); /* VGPRs */ - if (sel->screen->info.mesh_fast_launch_2) { + if (sel->screen->info.gfx_level >= GFX11) { ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.local_invocation_ids_packed); } else { unsigned unused_args = sel->screen->info.gfx_level >= GFX12 ? 3 : 5; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 6ecf77d1e5a..ec92229100c 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -1569,7 +1569,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader shader->ngg.vgt_gs_max_vert_out = gs_sel->info.base.gs.vertices_out; shader->ngg.ge_ngg_subgrp_cntl = S_028B4C_PRIM_AMP_FACTOR(gs_sel->info.base.gs.vertices_out); } else if (gs_stage == MESA_SHADER_MESH) { - shader->ngg.vgt_gs_max_vert_out = sscreen->info.mesh_fast_launch_2 ? + shader->ngg.vgt_gs_max_vert_out = sscreen->info.gfx_level >= GFX11 ? gs_info->base.mesh.max_vertices_out : si_get_max_workgroup_size(shader); shader->ngg.ge_ngg_subgrp_cntl = gs_info->base.mesh.max_primitives_out; } else { @@ -1730,7 +1730,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader if (gs_stage == MESA_SHADER_MESH) { shader->ngg.vgt_shader_stages_en = S_028B54_GS_EN(1) | - S_028B54_GS_FAST_LAUNCH(sscreen->info.mesh_fast_launch_2 ? 2 : 1); + S_028B54_GS_FAST_LAUNCH(sscreen->info.gfx_level >= GFX11 ? 2 : 1); } else { shader->ngg.vgt_shader_stages_en = S_028B54_ES_EN(es_stage == MESA_SHADER_TESS_EVAL ? @@ -1747,7 +1747,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader S_028B54_MAX_PRIMGRP_IN_WAVE(2); } - if (gs_stage == MESA_SHADER_MESH && sscreen->info.mesh_fast_launch_2) { + if (gs_stage == MESA_SHADER_MESH && sscreen->info.gfx_level >= GFX11) { unsigned workgroup_threads = gs_info->base.workgroup_size[0] * gs_info->base.workgroup_size[1] *