mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 02:28:10 +02:00
ac,radv,radeonsi: replace mesh_fast_launch_2 by gfx_level checks
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41204>
This commit is contained in:
parent
94ae99f16f
commit
df3de4acbb
15 changed files with 29 additions and 40 deletions
|
|
@ -342,8 +342,6 @@ ac_fill_compiler_info(struct radeon_info *info, const struct drm_amdgpu_info_dev
|
|||
|
||||
out->has_attr_ring = info->gfx_level >= GFX11;
|
||||
|
||||
out->mesh_fast_launch_2 = info->mesh_fast_launch_2;
|
||||
|
||||
/* When distributed tessellation is unsupported, switch between SEs
|
||||
* at a higher frequency to manually balance the workload between SEs.
|
||||
*/
|
||||
|
|
@ -1098,8 +1096,6 @@ void ac_fill_feature_info(struct radeon_info *info, const struct drm_amdgpu_info
|
|||
info->has_image_opcodes = debug_get_bool_option("AMD_IMAGE_OPCODES",
|
||||
info->has_graphics || info->family < CHIP_GFX940);
|
||||
|
||||
info->mesh_fast_launch_2 = info->gfx_level >= GFX11;
|
||||
|
||||
/* WARNING: Register shadowing decreases performance by up to 50% on GFX11 with current FW. */
|
||||
info->has_kernelq_reg_shadowing = device_info->ids_flags & AMDGPU_IDS_FLAGS_PREEMPTION &&
|
||||
info->gfx_level < GFX11 &&
|
||||
|
|
@ -1892,7 +1888,6 @@ void ac_print_gpu_info(FILE *f, const struct radeon_info *info, int fd)
|
|||
fprintf(f, " has_set_sh_pairs = %i\n", info->has_set_sh_pairs);
|
||||
fprintf(f, " has_set_sh_pairs_packed = %i\n", info->has_set_sh_pairs_packed);
|
||||
fprintf(f, " has_set_uconfig_pairs = %i\n", info->has_set_uconfig_pairs);
|
||||
fprintf(f, " mesh_fast_launch_2 = %i\n", info->mesh_fast_launch_2);
|
||||
|
||||
if (info->gfx_level < GFX12) {
|
||||
fprintf(f, "Display features:\n");
|
||||
|
|
|
|||
|
|
@ -172,7 +172,6 @@ struct ac_compiler_info {
|
|||
uint32_t conformant_trunc_coord : 1;
|
||||
|
||||
uint32_t has_attr_ring : 1;
|
||||
uint32_t mesh_fast_launch_2 : 1;
|
||||
|
||||
/* GFX6-7: limit TCS workgroup to 16 patches for better performance. */
|
||||
uint32_t smaller_tcs_workgroups : 1;
|
||||
|
|
@ -294,9 +293,6 @@ struct radeon_info {
|
|||
* the LLVM version doesn't work with multiparts shaders.
|
||||
*/
|
||||
|
||||
/* Support GS_FAST_LAUNCH(2) for mesh shaders. */
|
||||
bool mesh_fast_launch_2;
|
||||
|
||||
/* Display features. */
|
||||
/* There are 2 display DCC codepaths, because display expects unaligned DCC. */
|
||||
/* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */
|
||||
|
|
|
|||
|
|
@ -1004,7 +1004,7 @@ emit_ms_finale(nir_builder *b, lower_ngg_ms_state *s)
|
|||
ms_prim_gen_query(b, invocation_index, num_prm, s);
|
||||
|
||||
nir_def *row_start = NULL;
|
||||
if (s->ac->mesh_fast_launch_2)
|
||||
if (s->ac->gfx_level >= GFX11)
|
||||
row_start = s->hw_workgroup_size <= s->wave_size ? nir_imm_int(b, 0) : nir_load_subgroup_id(b);
|
||||
|
||||
/* Load vertex/primitive attributes from shared memory and
|
||||
|
|
@ -1369,7 +1369,7 @@ ac_nir_lower_ngg_mesh(nir_shader *shader, const ac_nir_lower_ngg_options *option
|
|||
shader->info.workgroup_size[1] *
|
||||
shader->info.workgroup_size[2];
|
||||
|
||||
bool fast_launch_2 = options->compiler_info->mesh_fast_launch_2;
|
||||
bool fast_launch_2 = options->compiler_info->gfx_level >= GFX11;
|
||||
|
||||
unsigned hw_workgroup_size = options->max_workgroup_size;
|
||||
lower_ngg_ms_state state = {
|
||||
|
|
|
|||
|
|
@ -3456,7 +3456,7 @@ radv_emit_mesh_shader(struct radv_cmd_buffer *cmd_buffer)
|
|||
radeon_set_uconfig_reg_idx(&pdev->info, R_030908_VGT_PRIMITIVE_TYPE, 1, V_008958_DI_PT_POINTLIST);
|
||||
radeon_end();
|
||||
|
||||
if (pdev->info.mesh_fast_launch_2)
|
||||
if (pdev->info.gfx_level >= GFX11)
|
||||
radv_gfx11_emit_meshlet(cmd_buffer, ms);
|
||||
|
||||
radv_emit_vgt_gs_out(cmd_buffer, gs_out);
|
||||
|
|
@ -3771,7 +3771,7 @@ radv_emit_vgt_shader_config_gfx6(struct radv_cmd_buffer *cmd_buffer, const struc
|
|||
stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1);
|
||||
} else if (key->mesh) {
|
||||
assert(!key->ngg_passthrough);
|
||||
unsigned gs_fast_launch = pdev->info.mesh_fast_launch_2 ? 2 : 1;
|
||||
unsigned gs_fast_launch = pdev->info.gfx_level >= GFX11 ? 2 : 1;
|
||||
stages |=
|
||||
S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(gs_fast_launch) | S_028B54_NGG_WAVE_ID_EN(key->ngg_wave_id_en);
|
||||
} else if (key->ngg) {
|
||||
|
|
@ -8715,7 +8715,7 @@ radv_bind_pre_rast_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_
|
|||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11 && pdev->info.gfx_level < GFX12) {
|
||||
/* GFX11-11.5 need GDS OA for streamout. */
|
||||
/* GFX11-11.7 need GDS OA for streamout. */
|
||||
cmd_buffer->queue_state.gds_oa_needed = true;
|
||||
}
|
||||
}
|
||||
|
|
@ -10924,7 +10924,7 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3
|
|||
uint32_t draw_id_enable = !!cmd_buffer->state.uses_drawid;
|
||||
uint32_t draw_id_reg = !draw_id_enable ? 0 : (base_reg + (xyz_dim_enable ? 12 : 0) - SI_SH_REG_OFFSET) >> 2;
|
||||
|
||||
uint32_t mode1_enable = !pdev->info.mesh_fast_launch_2;
|
||||
uint32_t mode1_enable = pdev->info.gfx_level < GFX11;
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_emit(PKT3(PKT3_DISPATCH_MESH_INDIRECT_MULTI, 7, predicating) | PKT3_RESET_FILTER_CAM_S(1));
|
||||
|
|
@ -11015,7 +11015,7 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(const struct radv_device *device, cons
|
|||
|
||||
uint32_t xyz_dim_en = mesh_shader->info.cs.uses_grid_size;
|
||||
uint32_t xyz_dim_reg = !xyz_dim_en ? 0 : (cmd_state->vtx_base_sgpr - SI_SH_REG_OFFSET) >> 2;
|
||||
uint32_t mode1_en = !pdev->info.mesh_fast_launch_2;
|
||||
uint32_t mode1_en = pdev->info.gfx_level < GFX11;
|
||||
uint32_t linear_dispatch_en = cmd_state->shaders[MESA_SHADER_TASK]->info.cs.linear_taskmesh_dispatch;
|
||||
const bool sqtt_en = !!device->sqtt.bo;
|
||||
|
||||
|
|
@ -11354,7 +11354,7 @@ radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x
|
|||
|
||||
radv_emit_userdata_mesh(cmd_buffer, x, y, z);
|
||||
|
||||
if (pdev->info.mesh_fast_launch_2) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
if (!view_mask) {
|
||||
radv_cs_emit_mesh_dispatch_packet(cmd_buffer, x, y, z);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -1146,7 +1146,6 @@ radv_device_init_compiler_info(struct radv_device *device)
|
|||
.family = pdev->info.family,
|
||||
.address32_hi = pdev->info.address32_hi,
|
||||
.rbplus_allowed = pdev->info.rbplus_allowed,
|
||||
.mesh_fast_launch_2 = pdev->info.mesh_fast_launch_2,
|
||||
.has_cs_regalloc_hang_bug = pdev->info.has_cs_regalloc_hang_bug,
|
||||
.lds_size_per_workgroup = pdev->info.lds_size_per_workgroup,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -384,7 +384,7 @@ radv_get_sequence_size_graphics(const struct radv_indirect_command_layout *layou
|
|||
*ace_cmd_size += 6 * 4;
|
||||
} else {
|
||||
/* userdata writes + instance count + non-indexed draw */
|
||||
*cmd_size += (6 + 2 + (pdev->info.mesh_fast_launch_2 ? 5 : 3)) * 4;
|
||||
*cmd_size += (6 + 2 + (pdev->info.gfx_level >= GFX11 ? 5 : 3)) * 4;
|
||||
}
|
||||
} else {
|
||||
/* userdata writes + instance count + non-indexed draw */
|
||||
|
|
@ -2272,7 +2272,7 @@ dgc_emit_dispatch_taskmesh_gfx(struct dgc_cmdbuf *cs, nir_def *sequence_id)
|
|||
nir_def *ring_entry_reg = load_param16(b, mesh_ring_entry_sgpr);
|
||||
|
||||
nir_def *xyz_dim_enable = nir_bcsel(b, has_grid_size, nir_imm_int(b, S_4D1_XYZ_DIM_ENABLE(1)), nir_imm_int(b, 0));
|
||||
nir_def *mode1_enable = nir_imm_int(b, S_4D1_MODE1_ENABLE(!pdev->info.mesh_fast_launch_2));
|
||||
nir_def *mode1_enable = nir_imm_int(b, S_4D1_MODE1_ENABLE((pdev->info.gfx_level < GFX11)));
|
||||
nir_def *linear_dispatch_en =
|
||||
nir_bcsel(b, has_linear_dispatch_en, nir_imm_int(b, S_4D1_LINEAR_DISPATCH_ENABLE(1)), nir_imm_int(b, 0));
|
||||
nir_def *sqtt_enable = nir_imm_int(b, device->sqtt.bo ? S_4D1_THREAD_TRACE_MARKER_ENABLE(1) : 0);
|
||||
|
|
@ -2323,7 +2323,7 @@ dgc_emit_draw_mesh_tasks_gfx(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_de
|
|||
dgc_emit_userdata_mesh(cs, x, y, z, sequence_id);
|
||||
dgc_emit_instance_count(cs, nir_imm_int(b, 1));
|
||||
|
||||
if (pdev->info.mesh_fast_launch_2) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
dgc_emit_dispatch_mesh_direct(cs, x, y, z);
|
||||
} else {
|
||||
nir_def *vertex_count = nir_imul(b, x, nir_imul(b, y, z));
|
||||
|
|
@ -2391,7 +2391,7 @@ dgc_emit_draw_mesh_tasks_with_count_gfx(struct dgc_cmdbuf *cs, nir_def *stream_a
|
|||
if (pdev->info.gfx_level >= GFX11) {
|
||||
dgc_cs_emit(nir_ior_imm(b,
|
||||
nir_ior_imm(b, nir_ior(b, draw_index_enable, xyz_dim_enable),
|
||||
S_4C2_MODE1_ENABLE(!pdev->info.mesh_fast_launch_2)),
|
||||
S_4C2_MODE1_ENABLE((pdev->info.gfx_level < GFX11))),
|
||||
S_4C2_THREAD_TRACE_MARKER_ENABLE(sqtt_en)));
|
||||
} else {
|
||||
dgc_cs_emit(nir_ior_imm(b, draw_index_enable, S_4C2_THREAD_TRACE_MARKER_ENABLE(sqtt_en)));
|
||||
|
|
|
|||
|
|
@ -2438,7 +2438,7 @@ radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, st
|
|||
active_nir_stages |= mesa_to_vk_shader_stage(i);
|
||||
}
|
||||
|
||||
if (!compiler_info->hw.mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir &&
|
||||
if (compiler_info->ac->gfx_level < GFX11 && stages[MESA_SHADER_MESH].nir &&
|
||||
BITSET_TEST(stages[MESA_SHADER_MESH].nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID)) {
|
||||
nir_shader *mesh = stages[MESA_SHADER_MESH].nir;
|
||||
nir_shader *task = stages[MESA_SHADER_TASK].nir;
|
||||
|
|
|
|||
|
|
@ -657,7 +657,7 @@ radv_shader_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct
|
|||
bool lower_local_invocation_index = false;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_TASK ||
|
||||
(nir->info.stage == MESA_SHADER_MESH && compiler_info->hw.mesh_fast_launch_2)) {
|
||||
(nir->info.stage == MESA_SHADER_MESH && compiler_info->ac->gfx_level >= GFX11)) {
|
||||
lower_local_invocation_index = nir->info.derivative_group == DERIVATIVE_GROUP_QUADS ||
|
||||
(((nir->info.workgroup_size[0] == 1) + (nir->info.workgroup_size[1] == 1) +
|
||||
(nir->info.workgroup_size[2] == 1)) == 2);
|
||||
|
|
@ -667,7 +667,7 @@ radv_shader_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct
|
|||
/* Mesh shaders run as NGG which can implement local_invocation_index from
|
||||
* the wave ID in merged_wave_info, but they don't have local_invocation_ids on GFX10.3.
|
||||
*/
|
||||
.lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !compiler_info->hw.mesh_fast_launch_2,
|
||||
.lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && compiler_info->ac->gfx_level < GFX11,
|
||||
.lower_local_invocation_index = lower_local_invocation_index,
|
||||
};
|
||||
NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options);
|
||||
|
|
@ -1881,7 +1881,7 @@ radv_precompute_registers_hw_ms(struct radv_device *device, struct radv_shader *
|
|||
|
||||
radv_precompute_registers_hw_ngg(device, shader);
|
||||
|
||||
regs->vgt_gs_max_vert_out = pdev->info.mesh_fast_launch_2 ? info->ngg_info.max_out_verts : info->workgroup_size;
|
||||
regs->vgt_gs_max_vert_out = pdev->info.gfx_level >= GFX11 ? info->ngg_info.max_out_verts : info->workgroup_size;
|
||||
|
||||
regs->ngg.ms.spi_shader_gs_meshlet_dim = S_00B2B0_MESHLET_NUM_THREAD_X(info->cs.block_size[0] - 1) |
|
||||
S_00B2B0_MESHLET_NUM_THREAD_Y(info->cs.block_size[1] - 1) |
|
||||
|
|
@ -2398,7 +2398,7 @@ radv_postprocess_binary_config(const struct radv_compiler_info *compiler_info, s
|
|||
UNREACHABLE("Unexpected ES shader stage");
|
||||
}
|
||||
|
||||
if (stage == MESA_SHADER_MESH && compiler_info->hw.mesh_fast_launch_2) {
|
||||
if (stage == MESA_SHADER_MESH && compiler_info->ac->gfx_level >= GFX11) {
|
||||
/* Only VGPR0 is used for X/Y/Z local invocation ID */
|
||||
gs_vgpr_comp_cnt = 0;
|
||||
} else if (gfx_level >= GFX12) {
|
||||
|
|
|
|||
|
|
@ -518,7 +518,6 @@ struct radv_compiler_info {
|
|||
uint32_t family;
|
||||
uint32_t address32_hi;
|
||||
bool rbplus_allowed;
|
||||
bool mesh_fast_launch_2;
|
||||
bool has_cs_regalloc_hang_bug;
|
||||
uint32_t lds_size_per_workgroup;
|
||||
} hw;
|
||||
|
|
|
|||
|
|
@ -322,7 +322,7 @@ declare_ms_input_sgprs(struct radv_shader_args_state *state, const struct radv_s
|
|||
static void
|
||||
declare_ms_input_vgprs(const struct radv_compiler_info *compiler_info, struct radv_shader_args_state *state)
|
||||
{
|
||||
if (compiler_info->hw.mesh_fast_launch_2) {
|
||||
if (compiler_info->ac->gfx_level >= GFX11) {
|
||||
RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.local_invocation_ids_packed);
|
||||
} else {
|
||||
RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.vertex_id);
|
||||
|
|
@ -829,7 +829,7 @@ declare_shader_args(const struct radv_compiler_info *compiler_info, struct radv_
|
|||
RADV_ADD_UD_ARG(state, 1, AC_ARG_VALUE, ngg_query_buf_va, AC_UD_NGG_QUERY_BUF_VA);
|
||||
}
|
||||
|
||||
if (previous_stage != MESA_SHADER_MESH || !compiler_info->hw.mesh_fast_launch_2) {
|
||||
if (previous_stage != MESA_SHADER_MESH || compiler_info->ac->gfx_level < GFX11) {
|
||||
if (gfx_level >= GFX12) {
|
||||
RADV_ADD_ARRAY_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.gs_vtx_offset, 0);
|
||||
RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.gs_prim_id);
|
||||
|
|
|
|||
|
|
@ -764,7 +764,7 @@ calc_mesh_workgroup_size(const struct radv_compiler_info *compiler_info, const n
|
|||
{
|
||||
unsigned api_workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
|
||||
|
||||
if (compiler_info->hw.mesh_fast_launch_2) {
|
||||
if (compiler_info->ac->gfx_level >= GFX11) {
|
||||
/* Use multi-row export. It is also necessary to use the API workgroup size for non-emulated queries. */
|
||||
info->workgroup_size = api_workgroup_size;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -332,7 +332,7 @@ static void si_emit_draw_mesh_tasks_gfx_packets(struct si_context *sctx,
|
|||
radeon_emit(S_4D0_RING_ENTRY_REG(ring_entry_loc) | S_4D0_XYZ_DIM_REG(grid_size_loc));
|
||||
if (sctx->gfx_level >= GFX11)
|
||||
radeon_emit(S_4D1_XYZ_DIM_ENABLE(uses_grid_size) |
|
||||
S_4D1_MODE1_ENABLE(!sctx->screen->info.mesh_fast_launch_2) |
|
||||
S_4D1_MODE1_ENABLE((sctx->screen->info.gfx_level < GFX11)) |
|
||||
S_4D1_LINEAR_DISPATCH_ENABLE(linear_taskmesh_dispatch));
|
||||
else
|
||||
radeon_emit(0);
|
||||
|
|
@ -420,7 +420,7 @@ static void si_emit_draw_mesh_shader_only_packets(struct si_context *sctx,
|
|||
radeon_emit(S_4C2_DRAW_INDEX_ENABLE(uses_draw_id) |
|
||||
S_4C2_COUNT_INDIRECT_ENABLE(!!count_va) |
|
||||
S_4C2_XYZ_DIM_ENABLE(uses_grid_size) |
|
||||
S_4C2_MODE1_ENABLE(!sctx->screen->info.mesh_fast_launch_2));
|
||||
S_4C2_MODE1_ENABLE((sctx->screen->info.gfx_level < GFX11)));
|
||||
else
|
||||
radeon_emit(S_4C2_DRAW_INDEX_ENABLE(uses_draw_id) |
|
||||
S_4C2_COUNT_INDIRECT_ENABLE(!!count_va));
|
||||
|
|
@ -442,7 +442,7 @@ static void si_emit_draw_mesh_shader_only_packets(struct si_context *sctx,
|
|||
si_emit_buffered_gfx_sh_regs_for_mesh(sctx);
|
||||
radeon_begin_again(cs);
|
||||
|
||||
if (sctx->screen->info.mesh_fast_launch_2) {
|
||||
if (sctx->screen->info.gfx_level >= GFX11) {
|
||||
radeon_emit(PKT3(PKT3_DISPATCH_MESH_DIRECT, 3, sctx->render_cond_enabled));
|
||||
radeon_emit(info->grid[0]);
|
||||
radeon_emit(info->grid[1]);
|
||||
|
|
|
|||
|
|
@ -165,7 +165,7 @@ unsigned si_get_max_workgroup_size(const struct si_shader *shader)
|
|||
/* Without multi-row export, we need at least number of output vertex/primitive
|
||||
* threads in workgroup for export (one vertex/primitive per thread).
|
||||
*/
|
||||
if (stage == MESA_SHADER_MESH && !sscreen->info.mesh_fast_launch_2) {
|
||||
if (stage == MESA_SHADER_MESH && sscreen->info.gfx_level < GFX11) {
|
||||
max_work_group_size = MAX3(max_work_group_size,
|
||||
shader->selector->info.base.mesh.max_vertices_out,
|
||||
shader->selector->info.base.mesh.max_primitives_out);
|
||||
|
|
@ -739,7 +739,7 @@ static void si_preprocess_nir(struct si_nir_shader_ctx *ctx)
|
|||
}
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_MESH && !sel->screen->info.mesh_fast_launch_2) {
|
||||
if (nir->info.stage == MESA_SHADER_MESH && sel->screen->info.gfx_level < GFX11) {
|
||||
NIR_PASS(progress, nir, nir_lower_compute_system_values,
|
||||
&(nir_lower_compute_system_values_options){
|
||||
/* Mesh shaders run as NGG which can implement local_invocation_index from
|
||||
|
|
|
|||
|
|
@ -635,7 +635,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args,
|
|||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->mesh_scratch_ring_addr);
|
||||
|
||||
/* VGPRs */
|
||||
if (sel->screen->info.mesh_fast_launch_2) {
|
||||
if (sel->screen->info.gfx_level >= GFX11) {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.local_invocation_ids_packed);
|
||||
} else {
|
||||
unsigned unused_args = sel->screen->info.gfx_level >= GFX12 ? 3 : 5;
|
||||
|
|
|
|||
|
|
@ -1569,7 +1569,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
shader->ngg.vgt_gs_max_vert_out = gs_sel->info.base.gs.vertices_out;
|
||||
shader->ngg.ge_ngg_subgrp_cntl = S_028B4C_PRIM_AMP_FACTOR(gs_sel->info.base.gs.vertices_out);
|
||||
} else if (gs_stage == MESA_SHADER_MESH) {
|
||||
shader->ngg.vgt_gs_max_vert_out = sscreen->info.mesh_fast_launch_2 ?
|
||||
shader->ngg.vgt_gs_max_vert_out = sscreen->info.gfx_level >= GFX11 ?
|
||||
gs_info->base.mesh.max_vertices_out : si_get_max_workgroup_size(shader);
|
||||
shader->ngg.ge_ngg_subgrp_cntl = gs_info->base.mesh.max_primitives_out;
|
||||
} else {
|
||||
|
|
@ -1730,7 +1730,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
if (gs_stage == MESA_SHADER_MESH) {
|
||||
shader->ngg.vgt_shader_stages_en =
|
||||
S_028B54_GS_EN(1) |
|
||||
S_028B54_GS_FAST_LAUNCH(sscreen->info.mesh_fast_launch_2 ? 2 : 1);
|
||||
S_028B54_GS_FAST_LAUNCH(sscreen->info.gfx_level >= GFX11 ? 2 : 1);
|
||||
} else {
|
||||
shader->ngg.vgt_shader_stages_en =
|
||||
S_028B54_ES_EN(es_stage == MESA_SHADER_TESS_EVAL ?
|
||||
|
|
@ -1747,7 +1747,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
S_028B54_MAX_PRIMGRP_IN_WAVE(2);
|
||||
}
|
||||
|
||||
if (gs_stage == MESA_SHADER_MESH && sscreen->info.mesh_fast_launch_2) {
|
||||
if (gs_stage == MESA_SHADER_MESH && sscreen->info.gfx_level >= GFX11) {
|
||||
unsigned workgroup_threads =
|
||||
gs_info->base.workgroup_size[0] *
|
||||
gs_info->base.workgroup_size[1] *
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue